Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include <wctype.h>
15 : #include "sql_mem.h"
16 : #include "sql_scan.h"
17 : #include "sql_types.h"
18 : #include "sql_symbol.h"
19 : #include "sql_mvc.h"
20 : #include "sql_parser.tab.h"
21 : #include "sql_semantic.h"
22 : #include "sql_parser.h" /* for sql_error() */
23 :
24 : #include "stream.h"
25 : #include "mapi_prompt.h"
26 : #include <unistd.h>
27 : #include <string.h>
28 : #include <ctype.h>
29 : #include "sql_keyword.h"
30 :
31 : /**
32 : * Removes all comments before the query. In query comments are kept.
33 : */
34 : char *
35 389734 : query_cleaned(allocator *sa, const char *query)
36 : {
37 389734 : char *q, *r, *c = NULL;
38 389734 : int lines = 0;
39 389734 : int quote = 0; /* inside quotes ('..', "..", {..}) */
40 389734 : bool bs = false; /* seen a backslash in a quoted string */
41 389734 : bool incomment1 = false; /* inside traditional C style comment */
42 389734 : bool incomment2 = false; /* inside comment starting with -- */
43 389734 : bool inline_comment = false;
44 :
45 389734 : r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
46 389971 : if(!r)
47 : return NULL;
48 :
49 64749137 : for (q = r; *query; query++) {
50 64359166 : if (incomment1) {
51 15836 : if (*query == '/' && query[-1] == '*') {
52 229 : incomment1 = false;
53 229 : if (c == r && lines > 0) {
54 221 : q = r; // reset to beginning
55 221 : lines = 0;
56 221 : continue;
57 : }
58 : }
59 15615 : if (*query == '\n') lines++;
60 15615 : *q++ = *query;
61 64343330 : } else if (incomment2) {
62 831781 : if (*query == '\n') {
63 2889 : incomment2 = false;
64 2889 : inline_comment = false;
65 : /* add newline only if comment doesn't
66 : * occupy whole line */
67 2889 : if (q > r && q[-1] != '\n'){
68 999 : *q++ = '\n';
69 999 : lines++;
70 : }
71 828892 : } else if (inline_comment){
72 24710 : *q++ = *query; // preserve in line query comments
73 : }
74 63511549 : } else if (quote) {
75 21347323 : if (bs) {
76 : bs = false;
77 21344162 : } else if (*query == '\\') {
78 : bs = true;
79 21341001 : } else if (*query == quote) {
80 606061 : quote = 0;
81 : }
82 21347323 : *q++ = *query;
83 42164226 : } else if (*query == '"' || *query == '\'') {
84 605604 : quote = *query;
85 605604 : *q++ = *query;
86 41558622 : } else if (*query == '{') {
87 502 : quote = '}';
88 502 : *q++ = *query;
89 41558120 : } else if (*query == '-' && query[1] == '-') {
90 2889 : if (q > r && q[-1] != '\n') {
91 999 : inline_comment = true;
92 999 : *q++ = *query; // preserve in line query comments
93 : }
94 : incomment2 = true;
95 41555231 : } else if (*query == '/' && query[1] == '*') {
96 229 : incomment1 = true;
97 229 : c = q;
98 229 : *q++ = *query;
99 41555002 : } else if (*query == '\n') {
100 : /* collapse newlines */
101 847007 : if (q > r && q[-1] != '\n') {
102 805084 : *q++ = '\n';
103 805084 : lines++;
104 : }
105 40707995 : } else if (*query == ' ' || *query == '\t') {
106 : /* collapse white space */
107 6872776 : if (q > r && q[-1] != ' ')
108 5401920 : *q++ = ' ';
109 : } else {
110 33835219 : *q++ = *query;
111 : }
112 : }
113 389971 : *q = 0;
114 389971 : return r;
115 : }
116 :
117 : int
118 341 : scanner_init_keywords(void)
119 : {
120 341 : int failed = 0;
121 :
122 341 : failed += keywords_insert("false", BOOL_FALSE);
123 341 : failed += keywords_insert("true", BOOL_TRUE);
124 341 : failed += keywords_insert("bool", sqlBOOL);
125 :
126 341 : failed += keywords_insert("ALTER", ALTER);
127 341 : failed += keywords_insert("ADD", ADD);
128 341 : failed += keywords_insert("AND", AND);
129 :
130 341 : failed += keywords_insert("RANK", RANK);
131 341 : failed += keywords_insert("DENSE_RANK", RANK);
132 341 : failed += keywords_insert("PERCENT_RANK", RANK);
133 341 : failed += keywords_insert("CUME_DIST", RANK);
134 341 : failed += keywords_insert("ROW_NUMBER", RANK);
135 341 : failed += keywords_insert("NTILE", RANK);
136 341 : failed += keywords_insert("LAG", RANK);
137 341 : failed += keywords_insert("LEAD", RANK);
138 341 : failed += keywords_insert("FETCH", FETCH);
139 341 : failed += keywords_insert("FIRST_VALUE", RANK);
140 341 : failed += keywords_insert("LAST_VALUE", RANK);
141 341 : failed += keywords_insert("NTH_VALUE", RANK);
142 :
143 341 : failed += keywords_insert("BEST", BEST);
144 341 : failed += keywords_insert("EFFORT", EFFORT);
145 :
146 341 : failed += keywords_insert("AS", AS);
147 341 : failed += keywords_insert("ASC", ASC);
148 341 : failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
149 341 : failed += keywords_insert("BETWEEN", BETWEEN);
150 341 : failed += keywords_insert("SYMMETRIC", SYMMETRIC);
151 341 : failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
152 341 : failed += keywords_insert("BY", BY);
153 341 : failed += keywords_insert("CAST", CAST);
154 341 : failed += keywords_insert("CONVERT", CONVERT);
155 341 : failed += keywords_insert("CHARACTER", CHARACTER);
156 341 : failed += keywords_insert("CHAR", CHARACTER);
157 341 : failed += keywords_insert("VARYING", VARYING);
158 341 : failed += keywords_insert("VARCHAR", VARCHAR);
159 341 : failed += keywords_insert("BINARY", BINARY);
160 341 : failed += keywords_insert("LARGE", LARGE);
161 341 : failed += keywords_insert("OBJECT", OBJECT);
162 341 : failed += keywords_insert("CLOB", CLOB);
163 341 : failed += keywords_insert("BLOB", sqlBLOB);
164 341 : failed += keywords_insert("TEXT", sqlTEXT);
165 341 : failed += keywords_insert("TINYTEXT", sqlTEXT);
166 341 : failed += keywords_insert("STRING", CLOB); /* ? */
167 341 : failed += keywords_insert("CHECK", CHECK);
168 341 : failed += keywords_insert("CLIENT", CLIENT);
169 341 : failed += keywords_insert("SERVER", SERVER);
170 341 : failed += keywords_insert("COMMENT", COMMENT);
171 341 : failed += keywords_insert("CONSTRAINT", CONSTRAINT);
172 341 : failed += keywords_insert("CREATE", CREATE);
173 341 : failed += keywords_insert("CROSS", CROSS);
174 341 : failed += keywords_insert("COPY", COPY);
175 341 : failed += keywords_insert("RECORDS", RECORDS);
176 341 : failed += keywords_insert("DELIMITERS", DELIMITERS);
177 341 : failed += keywords_insert("STDIN", STDIN);
178 341 : failed += keywords_insert("STDOUT", STDOUT);
179 :
180 341 : failed += keywords_insert("TINYINT", TINYINT);
181 341 : failed += keywords_insert("SMALLINT", SMALLINT);
182 341 : failed += keywords_insert("INTEGER", sqlINTEGER);
183 341 : failed += keywords_insert("INT", sqlINTEGER);
184 341 : failed += keywords_insert("MEDIUMINT", sqlINTEGER);
185 341 : failed += keywords_insert("BIGINT", BIGINT);
186 : #ifdef HAVE_HGE
187 341 : failed += keywords_insert("HUGEINT", HUGEINT);
188 : #endif
189 341 : failed += keywords_insert("DEC", sqlDECIMAL);
190 341 : failed += keywords_insert("DECIMAL", sqlDECIMAL);
191 341 : failed += keywords_insert("NUMERIC", sqlDECIMAL);
192 341 : failed += keywords_insert("DECLARE", DECLARE);
193 341 : failed += keywords_insert("DEFAULT", DEFAULT);
194 341 : failed += keywords_insert("DESC", DESC);
195 341 : failed += keywords_insert("DISTINCT", DISTINCT);
196 341 : failed += keywords_insert("DOUBLE", sqlDOUBLE);
197 341 : failed += keywords_insert("REAL", sqlREAL);
198 341 : failed += keywords_insert("DROP", DROP);
199 341 : failed += keywords_insert("ESCAPE", ESCAPE);
200 341 : failed += keywords_insert("EXISTS", EXISTS);
201 341 : failed += keywords_insert("UESCAPE", UESCAPE);
202 341 : failed += keywords_insert("EXTRACT", EXTRACT);
203 341 : failed += keywords_insert("FLOAT", sqlFLOAT);
204 341 : failed += keywords_insert("FOR", FOR);
205 341 : failed += keywords_insert("FOREIGN", FOREIGN);
206 341 : failed += keywords_insert("FROM", FROM);
207 341 : failed += keywords_insert("FWF", FWF);
208 :
209 341 : failed += keywords_insert("BIG", BIG);
210 341 : failed += keywords_insert("LITTLE", LITTLE);
211 341 : failed += keywords_insert("NATIVE", NATIVE);
212 341 : failed += keywords_insert("ENDIAN", ENDIAN);
213 :
214 341 : failed += keywords_insert("REFERENCES", REFERENCES);
215 :
216 341 : failed += keywords_insert("MATCH", MATCH);
217 341 : failed += keywords_insert("FULL", FULL);
218 341 : failed += keywords_insert("PARTIAL", PARTIAL);
219 341 : failed += keywords_insert("SIMPLE", SIMPLE);
220 :
221 341 : failed += keywords_insert("INSERT", INSERT);
222 341 : failed += keywords_insert("UPDATE", UPDATE);
223 341 : failed += keywords_insert("DELETE", sqlDELETE);
224 341 : failed += keywords_insert("TRUNCATE", TRUNCATE);
225 341 : failed += keywords_insert("MATCHED", MATCHED);
226 :
227 341 : failed += keywords_insert("ACTION", ACTION);
228 341 : failed += keywords_insert("CASCADE", CASCADE);
229 341 : failed += keywords_insert("RESTRICT", RESTRICT);
230 341 : failed += keywords_insert("FIRST", FIRST);
231 341 : failed += keywords_insert("GLOBAL", GLOBAL);
232 341 : failed += keywords_insert("GROUP", sqlGROUP);
233 341 : failed += keywords_insert("GROUPING", GROUPING);
234 341 : failed += keywords_insert("ROLLUP", ROLLUP);
235 341 : failed += keywords_insert("CUBE", CUBE);
236 341 : failed += keywords_insert("HAVING", HAVING);
237 341 : failed += keywords_insert("ILIKE", ILIKE);
238 341 : failed += keywords_insert("IMPRINTS", IMPRINTS);
239 341 : failed += keywords_insert("IN", sqlIN);
240 341 : failed += keywords_insert("INNER", INNER);
241 341 : failed += keywords_insert("INTO", INTO);
242 341 : failed += keywords_insert("IS", IS);
243 341 : failed += keywords_insert("JOIN", JOIN);
244 341 : failed += keywords_insert("KEY", KEY);
245 341 : failed += keywords_insert("LATERAL", LATERAL);
246 341 : failed += keywords_insert("LEFT", LEFT);
247 341 : failed += keywords_insert("LIKE", LIKE);
248 341 : failed += keywords_insert("LIMIT", LIMIT);
249 341 : failed += keywords_insert("SAMPLE", SAMPLE);
250 341 : failed += keywords_insert("SEED", SEED);
251 341 : failed += keywords_insert("LAST", LAST);
252 341 : failed += keywords_insert("LOCAL", LOCAL);
253 341 : failed += keywords_insert("NATURAL", NATURAL);
254 341 : failed += keywords_insert("NOT", NOT);
255 341 : failed += keywords_insert("NULL", sqlNULL);
256 341 : failed += keywords_insert("NULLS", NULLS);
257 341 : failed += keywords_insert("OFFSET", OFFSET);
258 341 : failed += keywords_insert("ON", ON);
259 341 : failed += keywords_insert("OPTIONS", OPTIONS);
260 341 : failed += keywords_insert("OPTION", OPTION);
261 341 : failed += keywords_insert("OR", OR);
262 341 : failed += keywords_insert("ORDER", ORDER);
263 341 : failed += keywords_insert("ORDERED", ORDERED);
264 341 : failed += keywords_insert("OUTER", OUTER);
265 341 : failed += keywords_insert("OVER", OVER);
266 341 : failed += keywords_insert("PARTITION", PARTITION);
267 341 : failed += keywords_insert("PATH", PATH);
268 341 : failed += keywords_insert("PRECISION", PRECISION);
269 341 : failed += keywords_insert("PRIMARY", PRIMARY);
270 :
271 341 : failed += keywords_insert("USER", USER);
272 341 : failed += keywords_insert("RENAME", RENAME);
273 341 : failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
274 341 : failed += keywords_insert("ENCRYPTED", ENCRYPTED);
275 341 : failed += keywords_insert("PASSWORD", PASSWORD);
276 341 : failed += keywords_insert("GRANT", GRANT);
277 341 : failed += keywords_insert("REVOKE", REVOKE);
278 341 : failed += keywords_insert("ROLE", ROLE);
279 341 : failed += keywords_insert("ADMIN", ADMIN);
280 341 : failed += keywords_insert("PRIVILEGES", PRIVILEGES);
281 341 : failed += keywords_insert("PUBLIC", PUBLIC);
282 341 : failed += keywords_insert("CURRENT_USER", CURRENT_USER);
283 341 : failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
284 341 : failed += keywords_insert("SESSION_USER", SESSION_USER);
285 341 : failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
286 341 : failed += keywords_insert("SESSION", sqlSESSION);
287 341 : failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
288 341 : failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
289 341 : failed += keywords_insert("OPTIMIZER", OPTIMIZER);
290 :
291 341 : failed += keywords_insert("RIGHT", RIGHT);
292 341 : failed += keywords_insert("SCHEMA", SCHEMA);
293 341 : failed += keywords_insert("SELECT", SELECT);
294 341 : failed += keywords_insert("SET", SET);
295 341 : failed += keywords_insert("SETS", SETS);
296 341 : failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
297 :
298 341 : failed += keywords_insert("ALL", ALL);
299 341 : failed += keywords_insert("ANY", ANY);
300 341 : failed += keywords_insert("SOME", SOME);
301 341 : failed += keywords_insert("EVERY", ANY);
302 : /*
303 : failed += keywords_insert("SQLCODE", SQLCODE );
304 : */
305 341 : failed += keywords_insert("COLUMN", COLUMN);
306 341 : failed += keywords_insert("TABLE", TABLE);
307 341 : failed += keywords_insert("TEMPORARY", TEMPORARY);
308 341 : failed += keywords_insert("TEMP", TEMP);
309 341 : failed += keywords_insert("REMOTE", REMOTE);
310 341 : failed += keywords_insert("MERGE", MERGE);
311 341 : failed += keywords_insert("REPLICA", REPLICA);
312 341 : failed += keywords_insert("UNLOGGED", UNLOGGED);
313 341 : failed += keywords_insert("TO", TO);
314 341 : failed += keywords_insert("UNION", UNION);
315 341 : failed += keywords_insert("EXCEPT", EXCEPT);
316 341 : failed += keywords_insert("INTERSECT", INTERSECT);
317 341 : failed += keywords_insert("CORRESPONDING", CORRESPONDING);
318 341 : failed += keywords_insert("UNIQUE", UNIQUE);
319 341 : failed += keywords_insert("USING", USING);
320 341 : failed += keywords_insert("VALUES", VALUES);
321 341 : failed += keywords_insert("VIEW", VIEW);
322 341 : failed += keywords_insert("WHERE", WHERE);
323 341 : failed += keywords_insert("WITH", WITH);
324 341 : failed += keywords_insert("WITHOUT", WITHOUT);
325 341 : failed += keywords_insert("DATA", DATA);
326 :
327 341 : failed += keywords_insert("DATE", sqlDATE);
328 341 : failed += keywords_insert("TIME", TIME);
329 341 : failed += keywords_insert("TIMESTAMP", TIMESTAMP);
330 341 : failed += keywords_insert("INTERVAL", INTERVAL);
331 341 : failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
332 341 : failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
333 341 : failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
334 341 : failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
335 341 : failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
336 341 : failed += keywords_insert("LOCALTIME", LOCALTIME);
337 341 : failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
338 341 : failed += keywords_insert("ZONE", ZONE);
339 :
340 341 : failed += keywords_insert("CENTURY", CENTURY);
341 341 : failed += keywords_insert("DECADE", DECADE);
342 341 : failed += keywords_insert("YEAR", YEAR);
343 341 : failed += keywords_insert("QUARTER", QUARTER);
344 341 : failed += keywords_insert("MONTH", MONTH);
345 341 : failed += keywords_insert("WEEK", WEEK);
346 341 : failed += keywords_insert("DOW", DOW);
347 341 : failed += keywords_insert("DOY", DOY);
348 341 : failed += keywords_insert("DAY", DAY);
349 341 : failed += keywords_insert("HOUR", HOUR);
350 341 : failed += keywords_insert("MINUTE", MINUTE);
351 341 : failed += keywords_insert("SECOND", SECOND);
352 341 : failed += keywords_insert("EPOCH", EPOCH);
353 :
354 341 : failed += keywords_insert("POSITION", POSITION);
355 341 : failed += keywords_insert("SUBSTRING", SUBSTRING);
356 341 : failed += keywords_insert("SPLIT_PART", SPLIT_PART);
357 341 : failed += keywords_insert("TRIM", TRIM);
358 341 : failed += keywords_insert("LEADING", LEADING);
359 341 : failed += keywords_insert("TRAILING", TRAILING);
360 341 : failed += keywords_insert("BOTH", BOTH);
361 :
362 341 : failed += keywords_insert("CASE", CASE);
363 341 : failed += keywords_insert("WHEN", WHEN);
364 341 : failed += keywords_insert("THEN", THEN);
365 341 : failed += keywords_insert("ELSE", ELSE);
366 341 : failed += keywords_insert("END", END);
367 341 : failed += keywords_insert("NULLIF", NULLIF);
368 341 : failed += keywords_insert("COALESCE", COALESCE);
369 341 : failed += keywords_insert("ELSEIF", ELSEIF);
370 341 : failed += keywords_insert("IF", IF);
371 341 : failed += keywords_insert("WHILE", WHILE);
372 341 : failed += keywords_insert("DO", DO);
373 :
374 341 : failed += keywords_insert("COMMIT", COMMIT);
375 341 : failed += keywords_insert("ROLLBACK", ROLLBACK);
376 341 : failed += keywords_insert("SAVEPOINT", SAVEPOINT);
377 341 : failed += keywords_insert("RELEASE", RELEASE);
378 341 : failed += keywords_insert("WORK", WORK);
379 341 : failed += keywords_insert("CHAIN", CHAIN);
380 341 : failed += keywords_insert("PRESERVE", PRESERVE);
381 341 : failed += keywords_insert("ROWS", ROWS);
382 341 : failed += keywords_insert("NO", NO);
383 341 : failed += keywords_insert("START", START);
384 341 : failed += keywords_insert("TRANSACTION", TRANSACTION);
385 341 : failed += keywords_insert("READ", READ);
386 341 : failed += keywords_insert("WRITE", WRITE);
387 341 : failed += keywords_insert("ONLY", ONLY);
388 341 : failed += keywords_insert("ISOLATION", ISOLATION);
389 341 : failed += keywords_insert("LEVEL", LEVEL);
390 341 : failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
391 341 : failed += keywords_insert("COMMITTED", COMMITTED);
392 341 : failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
393 341 : failed += keywords_insert("SNAPSHOT", SNAPSHOT);
394 341 : failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
395 341 : failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
396 341 : failed += keywords_insert("SIZE", sqlSIZE);
397 341 : failed += keywords_insert("STORAGE", STORAGE);
398 :
399 341 : failed += keywords_insert("TYPE", TYPE);
400 341 : failed += keywords_insert("PROCEDURE", PROCEDURE);
401 341 : failed += keywords_insert("FUNCTION", FUNCTION);
402 341 : failed += keywords_insert("LOADER", sqlLOADER);
403 341 : failed += keywords_insert("REPLACE", REPLACE);
404 :
405 341 : failed += keywords_insert("FIELD", FIELD);
406 341 : failed += keywords_insert("FILTER", FILTER);
407 341 : failed += keywords_insert("AGGREGATE", AGGREGATE);
408 341 : failed += keywords_insert("RETURNS", RETURNS);
409 341 : failed += keywords_insert("EXTERNAL", EXTERNAL);
410 341 : failed += keywords_insert("NAME", sqlNAME);
411 341 : failed += keywords_insert("RETURN", RETURN);
412 341 : failed += keywords_insert("CALL", CALL);
413 341 : failed += keywords_insert("LANGUAGE", LANGUAGE);
414 :
415 341 : failed += keywords_insert("ANALYZE", ANALYZE);
416 341 : failed += keywords_insert("MINMAX", MINMAX);
417 341 : failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
418 341 : failed += keywords_insert("PLAN", SQL_PLAN);
419 341 : failed += keywords_insert("TRACE", SQL_TRACE);
420 341 : failed += keywords_insert("PREPARE", PREPARE);
421 341 : failed += keywords_insert("PREP", PREP);
422 341 : failed += keywords_insert("EXECUTE", EXECUTE);
423 341 : failed += keywords_insert("EXEC", EXEC);
424 341 : failed += keywords_insert("DEALLOCATE", DEALLOCATE);
425 :
426 341 : failed += keywords_insert("INDEX", INDEX);
427 :
428 341 : failed += keywords_insert("SEQUENCE", SEQUENCE);
429 341 : failed += keywords_insert("RESTART", RESTART);
430 341 : failed += keywords_insert("INCREMENT", INCREMENT);
431 341 : failed += keywords_insert("MAXVALUE", MAXVALUE);
432 341 : failed += keywords_insert("MINVALUE", MINVALUE);
433 341 : failed += keywords_insert("CYCLE", CYCLE);
434 341 : failed += keywords_insert("CACHE", CACHE);
435 341 : failed += keywords_insert("NEXT", NEXT);
436 341 : failed += keywords_insert("VALUE", VALUE);
437 341 : failed += keywords_insert("GENERATED", GENERATED);
438 341 : failed += keywords_insert("ALWAYS", ALWAYS);
439 341 : failed += keywords_insert("IDENTITY", IDENTITY);
440 341 : failed += keywords_insert("SERIAL", SERIAL);
441 341 : failed += keywords_insert("BIGSERIAL", BIGSERIAL);
442 341 : failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
443 341 : failed += keywords_insert("CONTINUE", CONTINUE);
444 :
445 341 : failed += keywords_insert("TRIGGER", TRIGGER);
446 341 : failed += keywords_insert("ATOMIC", ATOMIC);
447 341 : failed += keywords_insert("BEGIN", BEGIN);
448 341 : failed += keywords_insert("OF", OF);
449 341 : failed += keywords_insert("BEFORE", BEFORE);
450 341 : failed += keywords_insert("AFTER", AFTER);
451 341 : failed += keywords_insert("ROW", ROW);
452 341 : failed += keywords_insert("STATEMENT", STATEMENT);
453 341 : failed += keywords_insert("NEW", sqlNEW);
454 341 : failed += keywords_insert("OLD", OLD);
455 341 : failed += keywords_insert("EACH", EACH);
456 341 : failed += keywords_insert("REFERENCING", REFERENCING);
457 :
458 341 : failed += keywords_insert("RANGE", RANGE);
459 341 : failed += keywords_insert("UNBOUNDED", UNBOUNDED);
460 341 : failed += keywords_insert("PRECEDING", PRECEDING);
461 341 : failed += keywords_insert("FOLLOWING", FOLLOWING);
462 341 : failed += keywords_insert("CURRENT", CURRENT);
463 341 : failed += keywords_insert("EXCLUDE", EXCLUDE);
464 341 : failed += keywords_insert("OTHERS", OTHERS);
465 341 : failed += keywords_insert("TIES", TIES);
466 341 : failed += keywords_insert("GROUPS", GROUPS);
467 341 : failed += keywords_insert("WINDOW", WINDOW);
468 :
469 : /* special SQL/XML keywords */
470 341 : failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
471 341 : failed += keywords_insert("XMLCONCAT", XMLCONCAT);
472 341 : failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
473 341 : failed += keywords_insert("XMLELEMENT", XMLELEMENT);
474 341 : failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
475 341 : failed += keywords_insert("XMLFOREST", XMLFOREST);
476 341 : failed += keywords_insert("XMLPARSE", XMLPARSE);
477 341 : failed += keywords_insert("STRIP", STRIP);
478 341 : failed += keywords_insert("WHITESPACE", WHITESPACE);
479 341 : failed += keywords_insert("XMLPI", XMLPI);
480 341 : failed += keywords_insert("XMLQUERY", XMLQUERY);
481 341 : failed += keywords_insert("PASSING", PASSING);
482 341 : failed += keywords_insert("XMLTEXT", XMLTEXT);
483 341 : failed += keywords_insert("NIL", NIL);
484 341 : failed += keywords_insert("REF", REF);
485 341 : failed += keywords_insert("ABSENT", ABSENT);
486 341 : failed += keywords_insert("DOCUMENT", DOCUMENT);
487 341 : failed += keywords_insert("ELEMENT", ELEMENT);
488 341 : failed += keywords_insert("CONTENT", CONTENT);
489 341 : failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
490 341 : failed += keywords_insert("NAMESPACE", NAMESPACE);
491 341 : failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
492 341 : failed += keywords_insert("RETURNING", RETURNING);
493 341 : failed += keywords_insert("LOCATION", LOCATION);
494 341 : failed += keywords_insert("ID", ID);
495 341 : failed += keywords_insert("ACCORDING", ACCORDING);
496 341 : failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
497 341 : failed += keywords_insert("URI", URI);
498 341 : failed += keywords_insert("XMLAGG", XMLAGG);
499 :
500 : /* keywords for opengis */
501 341 : failed += keywords_insert("GEOMETRY", GEOMETRY);
502 :
503 341 : failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
504 341 : failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
505 341 : failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
506 341 : failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
507 341 : failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
508 341 : failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
509 341 : failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
510 :
511 341 : failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
512 341 : failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
513 341 : failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
514 341 : failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
515 341 : failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
516 341 : failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
517 341 : failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
518 :
519 341 : failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
520 341 : failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
521 341 : failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
522 341 : failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
523 341 : failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
524 341 : failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
525 341 : failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
526 :
527 341 : failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
528 341 : failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
529 341 : failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
530 341 : failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
531 341 : failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
532 341 : failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
533 341 : failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
534 341 : failed += keywords_insert("LOGIN", LOGIN);
535 : // odbc keywords
536 341 : failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
537 341 : failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
538 341 : failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
539 341 : failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
540 341 : failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
541 341 : failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
542 341 : failed += keywords_insert("DAYNAME", DAYNAME);
543 341 : failed += keywords_insert("IFNULL", IFNULL);
544 341 : failed += keywords_insert("MONTHNAME", MONTHNAME);
545 341 : failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
546 341 : failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
547 341 : failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
548 341 : failed += keywords_insert("SQL_BINARY", SQL_BINARY);
549 341 : failed += keywords_insert("SQL_BIT", SQL_BIT);
550 341 : failed += keywords_insert("SQL_CHAR", SQL_CHAR);
551 341 : failed += keywords_insert("SQL_DATE", SQL_DATE);
552 341 : failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
553 341 : failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
554 341 : failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
555 341 : failed += keywords_insert("SQL_GUID", SQL_GUID);
556 341 : failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
557 341 : failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
558 341 : failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
559 341 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
560 341 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
561 341 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
562 341 : failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
563 341 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
564 341 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
565 341 : failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
566 341 : failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
567 341 : failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
568 341 : failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
569 341 : failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
570 341 : failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
571 341 : failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
572 341 : failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
573 341 : failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
574 341 : failed += keywords_insert("SQL_REAL", SQL_REAL);
575 341 : failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
576 341 : failed += keywords_insert("SQL_TIME", SQL_TIME);
577 341 : failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
578 341 : failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
579 341 : failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
580 341 : failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
581 341 : failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
582 341 : failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
583 341 : failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
584 341 : failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
585 341 : failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
586 341 : failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
587 341 : failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
588 341 : failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
589 341 : failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
590 341 : failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
591 341 : failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
592 341 : failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
593 :
594 341 : failed += keywords_insert("LEAST", MARGFUNC);
595 341 : failed += keywords_insert("GREATEST", MARGFUNC);
596 341 : return failed;
597 : }
598 :
599 : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
600 :
601 : void
602 241055 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
603 : {
604 482110 : *s = (struct scanner) {
605 : .rs = rs,
606 : .ws = ws,
607 : .mode = LINE_N,
608 241055 : .raw_string_mode = GDKgetenv_istrue("raw_strings"),
609 : .aborted = false,
610 : };
611 241055 : }
612 :
613 : void
614 1279490 : scanner_query_processed(struct scanner *s)
615 : {
616 1279490 : int cur;
617 :
618 1279490 : if (s->yybak) {
619 501178 : s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
620 501178 : s->yybak = 0;
621 : }
622 1279490 : if (s->rs) {
623 1279490 : s->rs->pos += s->yycur;
624 : /* completely eat the query including white space after the ; */
625 2425888 : while (s->rs->pos < s->rs->len &&
626 2105983 : (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
627 1146398 : s->rs->pos++;
628 : }
629 : }
630 : /*assert(s->rs->pos <= s->rs->len);*/
631 1279490 : s->yycur = 0;
632 1279490 : s->started = 0;
633 1279490 : s->as = 0;
634 1279490 : s->schema = NULL;
635 1279490 : }
636 :
637 : static int
638 33 : scanner_error(mvc *lc, int cur)
639 : {
640 33 : switch (cur) {
641 0 : case EOF:
642 0 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
643 0 : return EOF;
644 33 : default:
645 : /* on Windows at least, iswcntrl returns TRUE for
646 : * U+FEFF, but we just want consistent error
647 : * messages */
648 33 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
649 : }
650 33 : return LEX_ERROR;
651 : }
652 :
653 :
654 : /*
655 : UTF-8 encoding is as follows:
656 : U-00000000 - U-0000007F: 0xxxxxxx
657 : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
658 : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
659 : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
660 : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
661 : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
662 : */
663 : /* To be correctly coded UTF-8, the sequence should be the shortest
664 : possible encoding of the value being encoded. This means that for
665 : an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
666 : utf8chkmsk[n] should be non-zero (else the encoding could be
667 : shorter).
668 : */
669 : static const int utf8chkmsk[] = {
670 : 0x0000007f,
671 : 0x00000780,
672 : 0x0000f800,
673 : 0x001f0000,
674 : 0x03e00000,
675 : 0x7c000000
676 : };
677 :
678 : static void
679 30491329 : utf8_putchar(struct scanner *lc, int ch)
680 : {
681 30491329 : if ((ch) < 0x80) {
682 30491324 : lc->yycur--;
683 5 : } else if ((ch) < 0x800) {
684 0 : lc->yycur -= 2;
685 5 : } else if ((ch) < 0x10000) {
686 5 : lc->yycur -= 3;
687 : } else {
688 0 : lc->yycur -= 4;
689 : }
690 30491329 : }
691 :
692 : static inline int
693 132508524 : scanner_read_more(struct scanner *lc, size_t n)
694 : {
695 132508524 : bstream *b = lc->rs;
696 132508524 : bool more = false;
697 :
698 :
699 132508524 : if (lc->aborted)
700 : return EOF;
701 132512759 : while (b->len < b->pos + lc->yycur + n) {
702 :
703 128566 : if (lc->mode == LINE_1 || !lc->started)
704 : return EOF;
705 :
706 : /* query is not finished ask for more */
707 0 : if (b->eof || !isa_block_stream(b->s)) {
708 0 : if (bstream_getoob(b)) {
709 0 : lc->aborted = true;
710 0 : return EOF;
711 : }
712 2121 : if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
713 2121 : mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
714 2121 : b->eof = false;
715 2121 : more = true;
716 : }
717 : /* we need more query text */
718 4242 : if (bstream_next(b) < 0 ||
719 : /* we asked for more data but didn't get any */
720 2121 : (more && b->eof && b->len < b->pos + lc->yycur + n))
721 : return EOF;
722 4235 : if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
723 0 : lc->errstr = "Query aborted";
724 0 : b->len -= 2;
725 0 : b->buf[b->len] = 0;
726 0 : return EOF;
727 : }
728 : }
729 : return 1;
730 : }
731 :
732 : static inline int
733 131286290 : scanner_getc(struct scanner *lc)
734 : {
735 131286290 : bstream *b = lc->rs;
736 131286290 : unsigned char *s = NULL;
737 131286290 : int c, m, n, mask;
738 :
739 131286290 : if (scanner_read_more(lc, 1) == EOF) {
740 : //lc->errstr = SQLSTATE(42000) "end of input stream";
741 : return EOF;
742 : }
743 131173489 : lc->errstr = NULL;
744 :
745 131173489 : s = (unsigned char *) b->buf + b->pos + lc->yycur++;
746 131173489 : if (((c = *s) & 0x80) == 0) {
747 : /* 7-bit char */
748 : return c;
749 : }
750 88175 : for (n = 0, m = 0x40; c & m; n++, m >>= 1)
751 : ;
752 : /* n now is number of 10xxxxxx bytes that should follow */
753 29410 : if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
754 : /* incorrect UTF-8 sequence */
755 : /* n==0: c == 10xxxxxx */
756 : /* n>=6: c == 1111111x */
757 0 : lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
758 0 : goto error;
759 : }
760 :
761 29410 : if (scanner_read_more(lc, (size_t) n) == EOF)
762 : return EOF;
763 29410 : s = (unsigned char *) b->buf + b->pos + lc->yycur;
764 :
765 29410 : mask = utf8chkmsk[n];
766 29410 : c &= ~(0xFFC0 >> n); /* remove non-x bits */
767 88174 : while (--n >= 0) {
768 58765 : c <<= 6;
769 58765 : lc->yycur++;
770 58765 : if (((m = *s++) & 0xC0) != 0x80) {
771 : /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
772 : /* this includes end-of-string (m == 0) */
773 1 : lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
774 1 : goto error;
775 : }
776 58764 : c |= m & 0x3F;
777 : }
778 29409 : if ((c & mask) == 0) {
779 : /* incorrect UTF-8 sequence: not shortest possible */
780 0 : lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
781 0 : goto error;
782 : }
783 :
784 : return c;
785 :
786 1 : error:
787 1 : if (b->pos + lc->yycur < b->len) /* skip bogus char */
788 0 : lc->yycur++;
789 : return EOF;
790 : }
791 :
792 : static int
793 27046966 : scanner_token(struct scanner *lc, int token)
794 : {
795 27046966 : lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
796 27046966 : lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
797 27046966 : lc->yyval = token;
798 27046966 : return lc->yyval;
799 : }
800 :
801 : static int
802 1998656 : scanner_string(mvc *c, int quote, bool escapes)
803 : {
804 1998656 : struct scanner *lc = &c->scanner;
805 1998656 : bstream *rs = lc->rs;
806 1998656 : int cur = quote;
807 1998656 : bool escape = false;
808 1998656 : const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
809 :
810 1998656 : lc->started = 1;
811 2036348 : while (cur != EOF) {
812 2036333 : size_t pos = 0;
813 2036333 : const size_t yycur = rs->pos + lc->yycur;
814 :
815 33942221 : while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
816 31905888 : (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
817 63782355 : cur && (cur != quote || escape)) {
818 29869556 : if (escapes && cur == '\\')
819 6392 : escape = !escape;
820 : else
821 : escape = false;
822 : }
823 2036333 : if (pos == limit) {
824 0 : (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
825 0 : return LEX_ERROR;
826 : }
827 : /* BOM character not allowed as an identifier */
828 2036333 : if (cur == EOF || (quote == '"' && cur == 0xFEFF))
829 1 : return scanner_error(c, cur);
830 2036332 : lc->yycur += pos;
831 : /* check for quote escaped quote: Obscure SQL Rule */
832 2036332 : if (cur == quote && rs->buf[yycur + pos] == quote) {
833 8284 : lc->yycur++;
834 8284 : continue;
835 : }
836 2028048 : assert(yycur + pos <= rs->len + 1);
837 2028048 : if (cur == quote && !escape) {
838 1998626 : return scanner_token(lc, STRING);
839 : }
840 29422 : lc->yycur--; /* go back to current (possibly invalid) char */
841 : /* long utf8, if correct isn't the quote */
842 29422 : if (!cur) {
843 30 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
844 14 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
845 14 : return LEX_ERROR;
846 : }
847 16 : cur = scanner_read_more(lc, 1);
848 : } else {
849 29392 : cur = scanner_getc(lc);
850 : }
851 : }
852 15 : (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
853 15 : return EOF;
854 : }
855 :
856 : /* scan a structure {blah} into a string. We only count the matching {}
857 : * unless escaped. We do not consider embeddings in string literals yet
858 : */
859 :
860 : static int
861 224 : scanner_body(mvc *c)
862 : {
863 224 : struct scanner *lc = &c->scanner;
864 224 : bstream *rs = lc->rs;
865 224 : int cur = (int) 'x';
866 224 : int blk = 1;
867 224 : bool escape = false;
868 :
869 224 : lc->started = 1;
870 224 : assert(rs->buf[rs->pos + lc->yycur-1] == '{');
871 280 : while (cur != EOF) {
872 280 : size_t pos = rs->pos + lc->yycur;
873 :
874 30642 : while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
875 30362 : if (cur != '\\')
876 : escape = false;
877 : else
878 12 : escape = !escape;
879 30362 : blk += cur =='{';
880 30362 : blk -= cur =='}';
881 : }
882 280 : lc->yycur = pos - rs->pos;
883 280 : assert(pos <= rs->len + 1);
884 280 : if (blk == 0 && !escape){
885 224 : lc->yycur--; /* go back to current (possibly invalid) char */
886 224 : return scanner_token(lc, X_BODY);
887 : }
888 56 : lc->yycur--; /* go back to current (possibly invalid) char */
889 56 : if (!cur) {
890 56 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
891 0 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
892 0 : return LEX_ERROR;
893 : }
894 56 : cur = scanner_read_more(lc, 1);
895 : } else {
896 0 : cur = scanner_getc(lc);
897 : }
898 : }
899 0 : (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
900 0 : return EOF;
901 : }
902 :
903 : static int
904 13150564 : keyword_or_ident(mvc * c, int cur)
905 : {
906 13150564 : struct scanner *lc = &c->scanner;
907 13150564 : keyword *k = NULL;
908 13150564 : size_t s;
909 :
910 13150564 : lc->started = 1;
911 13150564 : utf8_putchar(lc, cur);
912 13150550 : s = lc->yycur;
913 13150550 : lc->yyval = IDENT;
914 78578155 : while ((cur = scanner_getc(lc)) != EOF) {
915 78578002 : if (!iswalnum(cur) && cur != '_') {
916 13150397 : utf8_putchar(lc, cur);
917 13150398 : (void)scanner_token(lc, IDENT);
918 13150398 : if ((k = find_keyword_bs(lc,s)))
919 8045040 : lc->yyval = k->token;
920 13150648 : return lc->yyval;
921 : }
922 : }
923 : if (cur < 0)
924 : return cur;
925 : (void)scanner_token(lc, IDENT);
926 : if ((k = find_keyword_bs(lc,s)))
927 : lc->yyval = k->token;
928 : return lc->yyval;
929 : }
930 :
931 : static int
932 13814392 : skip_white_space(struct scanner * lc)
933 : {
934 17394627 : int cur;
935 :
936 17394627 : do {
937 17394627 : lc->yysval = lc->yycur;
938 17394627 : } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
939 13813295 : return cur;
940 : }
941 :
942 : static int
943 67038 : skip_c_comment(struct scanner * lc)
944 : {
945 67038 : int cur;
946 67038 : int prev = 0;
947 67038 : int started = lc->started;
948 67038 : int depth = 1;
949 :
950 67038 : lc->started = 1;
951 1352148 : while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
952 1285110 : if (prev == '*' && cur == '/')
953 67038 : depth--;
954 1218072 : else if (prev == '/' && cur == '*') {
955 : /* block comments can nest */
956 0 : cur = 0; /* prevent slash-star-slash from matching */
957 0 : depth++;
958 : }
959 : prev = cur;
960 : }
961 67038 : lc->yysval = lc->yycur;
962 67038 : lc->started = started;
963 : /* a comment is equivalent to a newline */
964 67038 : return cur == EOF ? cur : '\n';
965 : }
966 :
967 : static int
968 3169 : skip_sql_comment(struct scanner * lc)
969 : {
970 3169 : int cur;
971 3169 : int started = lc->started;
972 :
973 3169 : lc->started = 1;
974 834518 : while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
975 : ;
976 3169 : lc->yysval = lc->yycur;
977 3169 : lc->started = started;
978 : /* a comment is equivalent to a newline */
979 3169 : return cur;
980 : }
981 :
982 : static int tokenize(mvc * lc, int cur);
983 :
984 5139359 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
985 13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
986 10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
987 3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
988 :
989 1758648 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
990 1758648 : struct scanner *lc = &c->scanner;
991 1758648 : bool (*is_valid_n_ary_digit)(int);
992 :
993 1758648 : if (pcur == '_' && !initial_underscore_allowed) /* ERROR: initial underscore not allowed */ {
994 0 : *token = 0;
995 0 : return '_';
996 : }
997 :
998 1758648 : switch (type) {
999 : case BINARYNUM:
1000 : is_valid_n_ary_digit = &is_valid_binary_digit;
1001 : break;
1002 3 : case OCTALNUM:
1003 3 : is_valid_n_ary_digit = &is_valid_octal_digit;
1004 3 : break;
1005 280 : case HEXADECIMALNUM:
1006 280 : is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
1007 280 : break;
1008 1758363 : default:
1009 1758363 : is_valid_n_ary_digit = &is_valid_decimal_digit;
1010 1758363 : break;
1011 : }
1012 :
1013 1758648 : if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
1014 17 : *token = 0;
1015 17 : return pcur;
1016 : }
1017 :
1018 1758792 : int cur = scanner_getc(lc);
1019 1759050 : *token = type;
1020 3395780 : while (cur != EOF) {
1021 3395545 : if (cur == '_') {
1022 25 : if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
1023 2 : *token = 0;
1024 2 : return '_';
1025 : }
1026 : }
1027 3395520 : else if (!is_valid_n_ary_digit(cur))
1028 : break;
1029 1636906 : pcur = cur;
1030 1636906 : cur = scanner_getc(lc);
1031 : }
1032 :
1033 1758612 : if (pcur == '_') {
1034 3 : *token = 0;
1035 3 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1036 : return cur;
1037 : else /* ERROR: number ends with underscore */
1038 : return '_';
1039 : }
1040 :
1041 : return cur;
1042 : }
1043 :
1044 : static int
1045 1745167 : number(mvc * c, int cur)
1046 : {
1047 1745167 : struct scanner *lc = &c->scanner;
1048 1745167 : int token = sqlINT;
1049 :
1050 : /* a number has one of these forms (expressed in regular expressions):
1051 : * 0x[0-9A-Fa-f]+ -- (hexadecimal) INTEGER
1052 : * \.[0-9]+ -- DECIMAL
1053 : * [0-9]+\.[0-9]* -- DECIMAL
1054 : * [0-9]+@0 -- OID
1055 : * [0-9]*\.[0-9]+[eE][-+]?[0-9]+ -- REAL
1056 : * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
1057 : * [0-9]+ -- (decimal) INTEGER
1058 : */
1059 1745167 : lc->started = 1;
1060 1745167 : if (cur == '0') {
1061 264052 : switch ((cur = scanner_getc(lc))) {
1062 2 : case 'b':
1063 2 : cur = scanner_getc(lc);
1064 2 : if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
1065 : break;
1066 3 : case 'o':
1067 3 : cur = scanner_getc(lc);
1068 3 : if ((cur = check_validity_number(c, cur, true, &token, OCTALNUM)) == EOF) return cur;
1069 : break;
1070 280 : case 'x':
1071 280 : cur = scanner_getc(lc);
1072 280 : if ((cur = check_validity_number(c, cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
1073 : break;
1074 263763 : default:
1075 263763 : utf8_putchar(lc, cur);
1076 263763 : cur = '0';
1077 : }
1078 : }
1079 1745163 : if (token == sqlINT) {
1080 1745149 : if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
1081 1745396 : if (cur == '@') {
1082 0 : if (token == sqlINT) {
1083 0 : cur = scanner_getc(lc);
1084 0 : if (cur == EOF)
1085 : return cur;
1086 0 : if (cur == '0') {
1087 0 : cur = scanner_getc(lc);
1088 0 : if (cur == EOF)
1089 : return cur;
1090 0 : token = OIDNUM;
1091 : } else {
1092 : /* number + '@' not followed by 0: show '@' as erroneous */
1093 0 : utf8_putchar(lc, cur);
1094 0 : cur = '@';
1095 0 : token = 0;
1096 : }
1097 : }
1098 : } else {
1099 1745396 : if (cur == '.') {
1100 10735 : cur = scanner_getc(lc);
1101 10735 : if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
1102 10729 : if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
1103 : }
1104 1745396 : if (token != 0)
1105 1745036 : if (cur == 'e' || cur == 'E') {
1106 2229 : cur = scanner_getc(lc);
1107 2229 : if (cur == '+' || cur == '-')
1108 2111 : cur = scanner_getc(lc);
1109 2229 : if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
1110 : }
1111 : }
1112 : }
1113 :
1114 1743181 : assert(cur != EOF);
1115 :
1116 1745410 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1117 6 : token = 0;
1118 :
1119 1745410 : utf8_putchar(lc, cur);
1120 :
1121 1745490 : if (token) {
1122 1745480 : return scanner_token(lc, token);
1123 : } else {
1124 10 : (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
1125 10 : return LEX_ERROR;
1126 : }
1127 : }
1128 :
1129 : static
1130 12214420 : int scanner_symbol(mvc * c, int cur)
1131 : {
1132 12214420 : struct scanner *lc = &c->scanner;
1133 12214420 : int next = 0;
1134 12214420 : int started = lc->started;
1135 :
1136 12214420 : switch (cur) {
1137 69633 : case '/':
1138 69633 : lc->started = 1;
1139 69633 : next = scanner_getc(lc);
1140 69633 : if (next < 0)
1141 : return EOF;
1142 69633 : if (next == '*') {
1143 67038 : lc->started = started;
1144 67038 : cur = skip_c_comment(lc);
1145 67038 : if (cur < 0)
1146 : return EOF;
1147 67038 : return tokenize(c, cur);
1148 : } else {
1149 2595 : utf8_putchar(lc, next);
1150 2595 : return scanner_token(lc, cur);
1151 : }
1152 0 : case '0':
1153 : case '1':
1154 : case '2':
1155 : case '3':
1156 : case '4':
1157 : case '5':
1158 : case '6':
1159 : case '7':
1160 : case '8':
1161 : case '9':
1162 0 : return number(c, cur);
1163 5 : case '#':
1164 5 : if ((cur = skip_sql_comment(lc)) == EOF)
1165 : return cur;
1166 5 : return tokenize(c, cur);
1167 743520 : case '\'':
1168 743520 : if (lc->raw_string_mode || lc->next_string_is_raw)
1169 59 : return scanner_string(c, cur, false);
1170 743461 : return scanner_string(c, cur, true);
1171 1248182 : case '"':
1172 1248182 : return scanner_string(c, cur, false);
1173 490 : case '{':
1174 : // if previous tokens like LANGUAGE IDENT
1175 : // TODO checking on IDENT only may not be enough
1176 490 : if (lc->yylast == IDENT)
1177 224 : return scanner_body(c);
1178 266 : lc->started = 1;
1179 266 : return scanner_token(lc, cur);
1180 266 : case '}':
1181 266 : lc->started = 1;
1182 266 : return scanner_token(lc, cur);
1183 29773 : case '-':
1184 29773 : lc->started = 1;
1185 29773 : next = scanner_getc(lc);
1186 29773 : if (next < 0)
1187 : return EOF;
1188 29772 : if (next == '-') {
1189 3164 : lc->started = started;
1190 3164 : if ((cur = skip_sql_comment(lc)) == EOF)
1191 : return cur;
1192 3164 : return tokenize(c, cur);
1193 : }
1194 26608 : lc->started = 1;
1195 26608 : utf8_putchar(lc, next);
1196 26608 : return scanner_token(lc, cur);
1197 11 : case '~': /* binary not */
1198 11 : lc->started = 1;
1199 11 : next = scanner_getc(lc);
1200 11 : if (next < 0)
1201 : return EOF;
1202 11 : if (next == '=')
1203 5 : return scanner_token(lc, GEOM_MBR_EQUAL);
1204 6 : utf8_putchar(lc, next);
1205 6 : return scanner_token(lc, cur);
1206 6772997 : case '^': /* binary xor */
1207 : case '*':
1208 : case '?':
1209 : case ':':
1210 : case '%':
1211 : case '+':
1212 : case '(':
1213 : case ')':
1214 : case ',':
1215 : case '=':
1216 : case '[':
1217 : case ']':
1218 6772997 : lc->started = 1;
1219 6772997 : return scanner_token(lc, cur);
1220 5993 : case '&':
1221 5993 : lc->started = 1;
1222 5993 : cur = scanner_getc(lc);
1223 5993 : if (cur < 0)
1224 : return EOF;
1225 5993 : if (cur < 0)
1226 : return EOF;
1227 5993 : if(cur == '<') {
1228 3 : next = scanner_getc(lc);
1229 3 : if (next < 0)
1230 : return EOF;
1231 3 : if(next == '|') {
1232 0 : return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1233 : } else {
1234 3 : utf8_putchar(lc, next); //put the char back
1235 3 : return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1236 : }
1237 5990 : } else if(cur == '>')
1238 3 : return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1239 5987 : else if(cur == '&')
1240 3 : return scanner_token(lc, GEOM_OVERLAP);
1241 : else {/* binary and */
1242 5984 : utf8_putchar(lc, cur); //put the char back
1243 5984 : return scanner_token(lc, '&');
1244 : }
1245 19 : case '@':
1246 19 : lc->started = 1;
1247 19 : return scanner_token(lc, AT);
1248 970193 : case ';':
1249 970193 : lc->started = 0;
1250 970193 : return scanner_token(lc, SCOLON);
1251 50780 : case '<':
1252 50780 : lc->started = 1;
1253 50780 : cur = scanner_getc(lc);
1254 50780 : if (cur < 0)
1255 : return EOF;
1256 50780 : if (cur == '=') {
1257 3052 : return scanner_token( lc, COMPARISON);
1258 47728 : } else if (cur == '>') {
1259 34361 : return scanner_token( lc, COMPARISON);
1260 13367 : } else if (cur == '<') {
1261 40 : next = scanner_getc(lc);
1262 40 : if (next < 0)
1263 : return EOF;
1264 40 : if (next == '=') {
1265 2 : return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1266 38 : } else if (next == '|') {
1267 1 : return scanner_token(lc, GEOM_BELOW);
1268 : } else {
1269 37 : utf8_putchar(lc, next); //put the char back
1270 37 : return scanner_token( lc, LEFT_SHIFT);
1271 : }
1272 13327 : } else if(cur == '-') {
1273 19 : next = scanner_getc(lc);
1274 19 : if (next < 0)
1275 : return EOF;
1276 19 : if(next == '>') {
1277 7 : return scanner_token(lc, GEOM_DIST);
1278 : } else {
1279 : //put the characters back and fall in the next possible case
1280 12 : utf8_putchar(lc, next);
1281 12 : utf8_putchar(lc, cur);
1282 12 : return scanner_token( lc, COMPARISON);
1283 : }
1284 : } else {
1285 13308 : utf8_putchar(lc, cur);
1286 13308 : return scanner_token( lc, COMPARISON);
1287 : }
1288 46709 : case '>':
1289 46709 : lc->started = 1;
1290 46709 : cur = scanner_getc(lc);
1291 46709 : if (cur < 0)
1292 : return EOF;
1293 46709 : if (cur == '>') {
1294 2525 : cur = scanner_getc(lc);
1295 2525 : if (cur < 0)
1296 : return EOF;
1297 2525 : if (cur == '=')
1298 1 : return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1299 2524 : utf8_putchar(lc, cur);
1300 2524 : return scanner_token( lc, RIGHT_SHIFT);
1301 44184 : } else if (cur != '=') {
1302 42038 : utf8_putchar(lc, cur);
1303 42038 : return scanner_token( lc, COMPARISON);
1304 : } else {
1305 2146 : return scanner_token( lc, COMPARISON);
1306 : }
1307 2092731 : case '.':
1308 2092731 : lc->started = 1;
1309 2092731 : cur = scanner_getc(lc);
1310 2092731 : if (cur < 0)
1311 : return EOF;
1312 2092730 : if (!iswdigit(cur)) {
1313 2092717 : utf8_putchar(lc, cur);
1314 2092717 : return scanner_token( lc, '.');
1315 : } else {
1316 13 : utf8_putchar(lc, cur);
1317 13 : cur = '.';
1318 13 : return number(c, cur);
1319 : }
1320 183084 : case '|': /* binary or or string concat */
1321 183084 : lc->started = 1;
1322 183084 : cur = scanner_getc(lc);
1323 183084 : if (cur < 0)
1324 : return EOF;
1325 183084 : if (cur == '|') {
1326 183055 : return scanner_token(lc, CONCATSTRING);
1327 29 : } else if (cur == '&') {
1328 0 : next = scanner_getc(lc);
1329 0 : if (next < 0)
1330 : return EOF;
1331 0 : if(next == '>') {
1332 0 : return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1333 : } else {
1334 0 : utf8_putchar(lc, next); //put the char back
1335 0 : utf8_putchar(lc, cur); //put the char back
1336 0 : return scanner_token(lc, '|');
1337 : }
1338 29 : } else if (cur == '>') {
1339 1 : next = scanner_getc(lc);
1340 1 : if (next < 0)
1341 : return EOF;
1342 1 : if(next == '>') {
1343 1 : return scanner_token(lc, GEOM_ABOVE);
1344 : } else {
1345 0 : utf8_putchar(lc, next); //put the char back
1346 0 : utf8_putchar(lc, cur); //put the char back
1347 0 : return scanner_token(lc, '|');
1348 : }
1349 : } else {
1350 28 : utf8_putchar(lc, cur);
1351 28 : return scanner_token(lc, '|');
1352 : }
1353 : }
1354 34 : (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1355 34 : return LEX_ERROR;
1356 : }
1357 :
1358 : static int
1359 27133787 : tokenize(mvc * c, int cur)
1360 : {
1361 27133787 : struct scanner *lc = &c->scanner;
1362 54717929 : while (1) {
1363 40925858 : if (cur == 0xFEFF) {
1364 : /* on Linux at least, iswpunct returns TRUE
1365 : * for U+FEFF, but we don't want that, we just
1366 : * want to go to the scanner_error case
1367 : * below */
1368 : ;
1369 40926179 : } else if (iswspace(cur)) {
1370 13808871 : if ((cur = skip_white_space(lc)) == EOF)
1371 : return cur;
1372 13792071 : continue; /* try again */
1373 27117308 : } else if (iswdigit(cur)) {
1374 1745637 : return number(c, cur);
1375 25371671 : } else if (iswalpha(cur) || cur == '_') {
1376 13123156 : switch (cur) {
1377 635422 : case 'e': /* string with escapes */
1378 : case 'E':
1379 635422 : if (scanner_read_more(lc, 1) != EOF &&
1380 635422 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1381 3679 : return scanner_string(c, scanner_getc(lc), true);
1382 : }
1383 : break;
1384 407661 : case 'x': /* blob */
1385 : case 'X':
1386 : case 'r': /* raw string */
1387 : case 'R':
1388 407661 : if (scanner_read_more(lc, 1) != EOF &&
1389 407661 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1390 3258 : return scanner_string(c, scanner_getc(lc), false);
1391 : }
1392 : break;
1393 149018 : case 'u': /* unicode string */
1394 : case 'U':
1395 149018 : if (scanner_read_more(lc, 1) != EOF &&
1396 149035 : lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1397 17 : scanner_read_more(lc, 2) != EOF &&
1398 17 : (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1399 : lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1400 17 : cur = scanner_getc(lc); /* '&' */
1401 17 : return scanner_string(c, scanner_getc(lc), false);
1402 : }
1403 : break;
1404 : default:
1405 : break;
1406 : }
1407 13150581 : return keyword_or_ident(c, cur);
1408 12214136 : } else if (iswpunct(cur)) {
1409 12213783 : return scanner_symbol(c, cur);
1410 : }
1411 32 : if (cur == EOF) {
1412 0 : if (lc->mode == LINE_1 || !lc->started )
1413 : return cur;
1414 0 : return scanner_error(c, cur);
1415 : }
1416 : /* none of the above: error */
1417 32 : return scanner_error(c, cur);
1418 : }
1419 : }
1420 :
1421 : /* SQL 'quoted' idents consist of a set of any character of
1422 : * the source language character set other than a 'quote'
1423 : *
1424 : * MonetDB has 3 restrictions:
1425 : * 1 we disallow '%' as the first character.
1426 : * 2 the length is limited to 1024 characters
1427 : * 3 the identifier 'TID%' is not allowed
1428 : */
1429 : static bool
1430 1248171 : valid_ident(const char *restrict s, char *restrict dst)
1431 : {
1432 1248171 : int p = 0;
1433 :
1434 1248171 : if (*s == '%')
1435 : return false;
1436 :
1437 9253412 : while (*s) {
1438 8005241 : if ((dst[p++] = *s++) == '"' && *s == '"')
1439 68 : s++;
1440 8005241 : if (p >= 1024)
1441 : return false;
1442 : }
1443 1248171 : dst[p] = '\0';
1444 1248171 : if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
1445 : return false;
1446 : return true;
1447 : }
1448 :
1449 : static inline int
1450 27225496 : sql_get_next_token(YYSTYPE *yylval, void *parm)
1451 : {
1452 27225496 : mvc *c = (mvc*)parm;
1453 27225496 : struct scanner *lc = &c->scanner;
1454 27225496 : int token = 0, cur = 0;
1455 :
1456 27225496 : if (lc->rs->buf == NULL) /* malloc failure */
1457 : return EOF;
1458 :
1459 27225496 : if (lc->yynext) {
1460 59566 : int next = lc->yynext;
1461 :
1462 59566 : lc->yynext = 0;
1463 59566 : return(next);
1464 : }
1465 :
1466 27165930 : if (lc->yybak) {
1467 26164926 : lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1468 26164926 : lc->yybak = 0;
1469 : }
1470 :
1471 27165930 : lc->yysval = lc->yycur;
1472 27165930 : lc->yylast = lc->yyval;
1473 27165930 : cur = scanner_getc(lc);
1474 27174312 : if (cur < 0)
1475 : return EOF;
1476 27063357 : token = tokenize(c, cur);
1477 :
1478 27055124 : yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1479 :
1480 27055124 : if (token == KW_TYPE)
1481 48241 : token = aTYPE;
1482 :
1483 27055124 : if (token == IDENT || token == COMPARISON ||
1484 21854586 : token == RANK || token == aTYPE || token == MARGFUNC) {
1485 5259085 : yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1486 5259004 : lc->next_string_is_raw = false;
1487 21796039 : } else if (token == STRING) {
1488 1998626 : char quote = *yylval->sval;
1489 1998626 : char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1490 1998626 : char *dst;
1491 :
1492 1998626 : assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
1493 :
1494 1998626 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1495 1998626 : switch (quote) {
1496 1248171 : case '"':
1497 1248171 : if (valid_ident(yylval->sval+1,str)) {
1498 : token = IDENT;
1499 : } else {
1500 0 : sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1501 0 : return LEX_ERROR;
1502 : }
1503 : break;
1504 3678 : case 'e':
1505 : case 'E':
1506 3678 : assert(yylval->sval[1] == '\'');
1507 3678 : if (GDKstrFromStr((unsigned char *) str,
1508 : (unsigned char *) yylval->sval + 2,
1509 3678 : lc->yycur-lc->yysval - 2, '\'') < 0) {
1510 1 : char *err = GDKerrbuf;
1511 1 : if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
1512 1 : err += strlen(GDKERROR);
1513 0 : else if (*err == '!')
1514 0 : err++;
1515 1 : sql_error(c, 1, SQLSTATE(42000) "%s", err);
1516 1 : return LEX_ERROR;
1517 : }
1518 : quote = '\'';
1519 : break;
1520 17 : case 'u':
1521 : case 'U':
1522 17 : assert(yylval->sval[1] == '&');
1523 17 : assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1524 17 : strcpy(str, yylval->sval + 3);
1525 17 : token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
1526 17 : quote = yylval->sval[2];
1527 17 : lc->next_string_is_raw = true;
1528 17 : break;
1529 1 : case 'x':
1530 : case 'X':
1531 1 : assert(yylval->sval[1] == '\'');
1532 1 : dst = str;
1533 5 : for (char *src = yylval->sval + 2; *src; dst++)
1534 4 : if ((*dst = *src++) == '\'' && *src == '\'')
1535 0 : src++;
1536 1 : *dst = 0;
1537 1 : quote = '\'';
1538 1 : token = XSTRING;
1539 1 : lc->next_string_is_raw = true;
1540 1 : break;
1541 3250 : case 'r':
1542 : case 'R':
1543 3250 : assert(yylval->sval[1] == '\'');
1544 3250 : dst = str;
1545 448755 : for (char *src = yylval->sval + 2; *src; dst++)
1546 445505 : if ((*dst = *src++) == '\'' && *src == '\'')
1547 2684 : src++;
1548 3250 : quote = '\'';
1549 3250 : *dst = 0;
1550 3250 : break;
1551 743509 : default:
1552 743509 : if (lc->raw_string_mode || lc->next_string_is_raw) {
1553 59 : dst = str;
1554 553 : for (char *src = yylval->sval + 1; *src; dst++)
1555 494 : if ((*dst = *src++) == '\'' && *src == '\'')
1556 3 : src++;
1557 59 : *dst = 0;
1558 : } else {
1559 743450 : if (GDKstrFromStr((unsigned char *)str,
1560 743450 : (unsigned char *)yylval->sval + 1,
1561 743450 : lc->yycur - lc->yysval - 1,
1562 : '\'') < 0) {
1563 0 : sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
1564 0 : return LEX_ERROR;
1565 : }
1566 : }
1567 : break;
1568 : }
1569 1998625 : yylval->sval = str;
1570 :
1571 : /* reset original */
1572 1998625 : lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1573 : } else {
1574 19797413 : lc->next_string_is_raw = false;
1575 : }
1576 :
1577 : return(token);
1578 : }
1579 :
1580 : static int scanner( YYSTYPE *yylval, void *m, bool log);
1581 :
1582 : static int
1583 27096329 : scanner(YYSTYPE * yylval, void *parm, bool log)
1584 : {
1585 27096329 : int token;
1586 27096329 : mvc *c = (mvc *) parm;
1587 27096329 : struct scanner *lc = &c->scanner;
1588 27096329 : size_t pos;
1589 :
1590 : /* store position for when view's query ends */
1591 27096329 : pos = lc->rs->pos + lc->yycur;
1592 :
1593 27096329 : token = sql_get_next_token(yylval, parm);
1594 :
1595 27090671 : if (token == NOT) {
1596 71706 : int next = scanner(yylval, parm, false);
1597 :
1598 71706 : if (next == NOT) {
1599 2 : return scanner(yylval, parm, false);
1600 : } else if (next == EXISTS) {
1601 : token = NOT_EXISTS;
1602 : } else if (next == BETWEEN) {
1603 : token = NOT_BETWEEN;
1604 : } else if (next == sqlIN) {
1605 : token = NOT_IN;
1606 : } else if (next == LIKE) {
1607 : token = NOT_LIKE;
1608 : } else if (next == ILIKE) {
1609 : token = NOT_ILIKE;
1610 : } else {
1611 59566 : lc->yynext = next;
1612 : }
1613 27018965 : } else if (token == SCOLON) {
1614 : /* ignore semi-colon(s) following a semi-colon */
1615 970215 : if (lc->yylast == SCOLON) {
1616 131623 : size_t prev = lc->yycur;
1617 131624 : while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1618 1 : prev = lc->yycur;
1619 :
1620 : /* skip the skipped stuff also in the buffer */
1621 131702 : lc->rs->pos += prev;
1622 131702 : lc->yycur -= prev;
1623 : }
1624 : }
1625 :
1626 27090748 : if (lc->log && log)
1627 0 : mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1628 :
1629 27090748 : lc->started += (token != EOF);
1630 27090748 : return token;
1631 : }
1632 :
1633 : /* also see sql_parser.y */
1634 : extern int sqllex(YYSTYPE * yylval, void *parm);
1635 :
1636 : int
1637 27026094 : sqllex(YYSTYPE * yylval, void *parm)
1638 : {
1639 27026094 : return scanner(yylval, parm, true);
1640 : }
|