LCOV - code coverage report
Current view: top level - sql/server - sql_scan.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1035 1087 95.2 %
Date: 2024-04-25 20:03:45 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include <wctype.h>
      15             : #include "sql_mem.h"
      16             : #include "sql_scan.h"
      17             : #include "sql_types.h"
      18             : #include "sql_symbol.h"
      19             : #include "sql_mvc.h"
      20             : #include "sql_parser.tab.h"
      21             : #include "sql_semantic.h"
      22             : #include "sql_parser.h"               /* for sql_error() */
      23             : 
      24             : #include "stream.h"
      25             : #include "mapi_prompt.h"
      26             : #include <unistd.h>
      27             : #include <string.h>
      28             : #include <ctype.h>
      29             : #include "sql_keyword.h"
      30             : 
      31             : /**
      32             :  * Removes all comments before the query. In query comments are kept.
      33             :  */
      34             : char *
      35      389593 : query_cleaned(sql_allocator *sa, const char *query)
      36             : {
      37      389593 :         char *q, *r, *c = NULL;
      38      389593 :         int lines = 0;
      39      389593 :         int quote = 0;          /* inside quotes ('..', "..", {..}) */
      40      389593 :         bool bs = false;                /* seen a backslash in a quoted string */
      41      389593 :         bool incomment1 = false;        /* inside traditional C style comment */
      42      389593 :         bool incomment2 = false;        /* inside comment starting with --  */
      43      389593 :         bool inline_comment = false;
      44             : 
      45      389593 :         r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
      46      389598 :         if(!r)
      47             :                 return NULL;
      48             : 
      49    63212704 :         for (q = r; *query; query++) {
      50    62823106 :                 if (incomment1) {
      51       15976 :                         if (*query == '/' && query[-1] == '*') {
      52         231 :                                 incomment1 = false;
      53         231 :                                 if (c == r && lines > 0) {
      54         223 :                                         q = r; // reset to beginning
      55         223 :                                         lines = 0;
      56         223 :                                         continue;
      57             :                                 }
      58             :                         }
      59       15753 :                         if (*query == '\n') lines++;
      60       15753 :                         *q++ = *query;
      61    62807130 :                 } else if (incomment2) {
      62      825093 :                         if (*query == '\n') {
      63        2770 :                                 incomment2 = false;
      64        2770 :                                 inline_comment = false;
      65             :                                 /* add newline only if comment doesn't
      66             :                                  * occupy whole line */
      67        2770 :                                 if (q > r && q[-1] != '\n'){
      68         887 :                                         *q++ = '\n';
      69         887 :                                         lines++;
      70             :                                 }
      71      822323 :                         } else if (inline_comment){
      72       18166 :                                 *q++ = *query; // preserve in line query comments
      73             :                         }
      74    61982037 :                 } else if (quote) {
      75    18362623 :                         if (bs) {
      76             :                                 bs = false;
      77    18359427 :                         } else if (*query == '\\') {
      78             :                                 bs = true;
      79    18356231 :                         } else if (*query == quote) {
      80      677699 :                                 quote = 0;
      81             :                         }
      82    18362623 :                         *q++ = *query;
      83    43619414 :                 } else if (*query == '"' || *query == '\'') {
      84      677251 :                         quote = *query;
      85      677251 :                         *q++ = *query;
      86    42942163 :                 } else if (*query == '{') {
      87         493 :                         quote = '}';
      88         493 :                         *q++ = *query;
      89    42941670 :                 } else if (*query == '-' && query[1] == '-') {
      90        2770 :                         if (q > r && q[-1] != '\n') {
      91         887 :                                 inline_comment = true;
      92         887 :                                 *q++ = *query; // preserve in line query comments
      93             :                         }
      94             :                         incomment2 = true;
      95    42938900 :                 } else if (*query == '/' && query[1] == '*') {
      96         231 :                         incomment1 = true;
      97         231 :                         c = q;
      98         231 :                         *q++ = *query;
      99    42938669 :                 } else if (*query == '\n') {
     100             :                         /* collapse newlines */
     101      877280 :                         if (q > r && q[-1] != '\n') {
     102      835307 :                                 *q++ = '\n';
     103      835307 :                                 lines++;
     104             :                         }
     105    42061389 :                 } else if (*query == ' ' || *query == '\t') {
     106             :                         /* collapse white space */
     107     7135912 :                         if (q > r && q[-1] != ' ')
     108     5508521 :                                 *q++ = ' ';
     109             :                 } else {
     110    34925477 :                         *q++ = *query;
     111             :                 }
     112             :         }
     113      389598 :         *q = 0;
     114      389598 :         return r;
     115             : }
     116             : 
     117             : int
     118         336 : scanner_init_keywords(void)
     119             : {
     120         336 :         int failed = 0;
     121             : 
     122         336 :         failed += keywords_insert("false", BOOL_FALSE);
     123         336 :         failed += keywords_insert("true", BOOL_TRUE);
     124             : 
     125         336 :         failed += keywords_insert("ALTER", ALTER);
     126         336 :         failed += keywords_insert("ADD", ADD);
     127         336 :         failed += keywords_insert("AND", AND);
     128             : 
     129         336 :         failed += keywords_insert("RANK", RANK);
     130         336 :         failed += keywords_insert("DENSE_RANK", RANK);
     131         336 :         failed += keywords_insert("PERCENT_RANK", RANK);
     132         336 :         failed += keywords_insert("CUME_DIST", RANK);
     133         336 :         failed += keywords_insert("ROW_NUMBER", RANK);
     134         336 :         failed += keywords_insert("NTILE", RANK);
     135         336 :         failed += keywords_insert("LAG", RANK);
     136         336 :         failed += keywords_insert("LEAD", RANK);
     137         336 :         failed += keywords_insert("FETCH", FETCH);
     138         336 :         failed += keywords_insert("FIRST_VALUE", RANK);
     139         336 :         failed += keywords_insert("LAST_VALUE", RANK);
     140         336 :         failed += keywords_insert("NTH_VALUE", RANK);
     141             : 
     142         336 :         failed += keywords_insert("BEST", BEST);
     143         336 :         failed += keywords_insert("EFFORT", EFFORT);
     144             : 
     145         336 :         failed += keywords_insert("AS", AS);
     146         336 :         failed += keywords_insert("ASC", ASC);
     147         336 :         failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
     148         336 :         failed += keywords_insert("BETWEEN", BETWEEN);
     149         336 :         failed += keywords_insert("SYMMETRIC", SYMMETRIC);
     150         336 :         failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
     151         336 :         failed += keywords_insert("BY", BY);
     152         336 :         failed += keywords_insert("CAST", CAST);
     153         336 :         failed += keywords_insert("CONVERT", CONVERT);
     154         336 :         failed += keywords_insert("CHARACTER", CHARACTER);
     155         336 :         failed += keywords_insert("CHAR", CHARACTER);
     156         336 :         failed += keywords_insert("VARYING", VARYING);
     157         336 :         failed += keywords_insert("VARCHAR", VARCHAR);
     158         336 :         failed += keywords_insert("BINARY", BINARY);
     159         336 :         failed += keywords_insert("LARGE", LARGE);
     160         336 :         failed += keywords_insert("OBJECT", OBJECT);
     161         336 :         failed += keywords_insert("CLOB", CLOB);
     162         336 :         failed += keywords_insert("BLOB", sqlBLOB);
     163         336 :         failed += keywords_insert("TEXT", sqlTEXT);
     164         336 :         failed += keywords_insert("TINYTEXT", sqlTEXT);
     165         336 :         failed += keywords_insert("STRING", CLOB);    /* ? */
     166         336 :         failed += keywords_insert("CHECK", CHECK);
     167         336 :         failed += keywords_insert("CLIENT", CLIENT);
     168         336 :         failed += keywords_insert("SERVER", SERVER);
     169         336 :         failed += keywords_insert("COMMENT", COMMENT);
     170         336 :         failed += keywords_insert("CONSTRAINT", CONSTRAINT);
     171         336 :         failed += keywords_insert("CREATE", CREATE);
     172         336 :         failed += keywords_insert("CROSS", CROSS);
     173         336 :         failed += keywords_insert("COPY", COPY);
     174         336 :         failed += keywords_insert("RECORDS", RECORDS);
     175         336 :         failed += keywords_insert("DELIMITERS", DELIMITERS);
     176         336 :         failed += keywords_insert("STDIN", STDIN);
     177         336 :         failed += keywords_insert("STDOUT", STDOUT);
     178             : 
     179         336 :         failed += keywords_insert("TINYINT", TINYINT);
     180         336 :         failed += keywords_insert("SMALLINT", SMALLINT);
     181         336 :         failed += keywords_insert("INTEGER", sqlINTEGER);
     182         336 :         failed += keywords_insert("INT", sqlINTEGER);
     183         336 :         failed += keywords_insert("MEDIUMINT", sqlINTEGER);
     184         336 :         failed += keywords_insert("BIGINT", BIGINT);
     185             : #ifdef HAVE_HGE
     186         336 :         failed += keywords_insert("HUGEINT", HUGEINT);
     187             : #endif
     188         336 :         failed += keywords_insert("DEC", sqlDECIMAL);
     189         336 :         failed += keywords_insert("DECIMAL", sqlDECIMAL);
     190         336 :         failed += keywords_insert("NUMERIC", sqlDECIMAL);
     191         336 :         failed += keywords_insert("DECLARE", DECLARE);
     192         336 :         failed += keywords_insert("DEFAULT", DEFAULT);
     193         336 :         failed += keywords_insert("DESC", DESC);
     194         336 :         failed += keywords_insert("DISTINCT", DISTINCT);
     195         336 :         failed += keywords_insert("DOUBLE", sqlDOUBLE);
     196         336 :         failed += keywords_insert("REAL", sqlREAL);
     197         336 :         failed += keywords_insert("DROP", DROP);
     198         336 :         failed += keywords_insert("ESCAPE", ESCAPE);
     199         336 :         failed += keywords_insert("EXISTS", EXISTS);
     200         336 :         failed += keywords_insert("UESCAPE", UESCAPE);
     201         336 :         failed += keywords_insert("EXTRACT", EXTRACT);
     202         336 :         failed += keywords_insert("FLOAT", sqlFLOAT);
     203         336 :         failed += keywords_insert("FOR", FOR);
     204         336 :         failed += keywords_insert("FOREIGN", FOREIGN);
     205         336 :         failed += keywords_insert("FROM", FROM);
     206         336 :         failed += keywords_insert("FWF", FWF);
     207             : 
     208         336 :         failed += keywords_insert("BIG", BIG);
     209         336 :         failed += keywords_insert("LITTLE", LITTLE);
     210         336 :         failed += keywords_insert("NATIVE", NATIVE);
     211         336 :         failed += keywords_insert("ENDIAN", ENDIAN);
     212             : 
     213         336 :         failed += keywords_insert("REFERENCES", REFERENCES);
     214             : 
     215         336 :         failed += keywords_insert("MATCH", MATCH);
     216         336 :         failed += keywords_insert("FULL", FULL);
     217         336 :         failed += keywords_insert("PARTIAL", PARTIAL);
     218         336 :         failed += keywords_insert("SIMPLE", SIMPLE);
     219             : 
     220         336 :         failed += keywords_insert("INSERT", INSERT);
     221         336 :         failed += keywords_insert("UPDATE", UPDATE);
     222         336 :         failed += keywords_insert("DELETE", sqlDELETE);
     223         336 :         failed += keywords_insert("TRUNCATE", TRUNCATE);
     224         336 :         failed += keywords_insert("MATCHED", MATCHED);
     225             : 
     226         336 :         failed += keywords_insert("ACTION", ACTION);
     227         336 :         failed += keywords_insert("CASCADE", CASCADE);
     228         336 :         failed += keywords_insert("RESTRICT", RESTRICT);
     229         336 :         failed += keywords_insert("FIRST", FIRST);
     230         336 :         failed += keywords_insert("GLOBAL", GLOBAL);
     231         336 :         failed += keywords_insert("GROUP", sqlGROUP);
     232         336 :         failed += keywords_insert("GROUPING", GROUPING);
     233         336 :         failed += keywords_insert("ROLLUP", ROLLUP);
     234         336 :         failed += keywords_insert("CUBE", CUBE);
     235         336 :         failed += keywords_insert("HAVING", HAVING);
     236         336 :         failed += keywords_insert("ILIKE", ILIKE);
     237         336 :         failed += keywords_insert("IMPRINTS", IMPRINTS);
     238         336 :         failed += keywords_insert("IN", sqlIN);
     239         336 :         failed += keywords_insert("INNER", INNER);
     240         336 :         failed += keywords_insert("INTO", INTO);
     241         336 :         failed += keywords_insert("IS", IS);
     242         336 :         failed += keywords_insert("JOIN", JOIN);
     243         336 :         failed += keywords_insert("KEY", KEY);
     244         336 :         failed += keywords_insert("LATERAL", LATERAL);
     245         336 :         failed += keywords_insert("LEFT", LEFT);
     246         336 :         failed += keywords_insert("LIKE", LIKE);
     247         336 :         failed += keywords_insert("LIMIT", LIMIT);
     248         336 :         failed += keywords_insert("SAMPLE", SAMPLE);
     249         336 :         failed += keywords_insert("SEED", SEED);
     250         336 :         failed += keywords_insert("LAST", LAST);
     251         336 :         failed += keywords_insert("LOCAL", LOCAL);
     252         336 :         failed += keywords_insert("NATURAL", NATURAL);
     253         336 :         failed += keywords_insert("NOT", NOT);
     254         336 :         failed += keywords_insert("NULL", sqlNULL);
     255         336 :         failed += keywords_insert("NULLS", NULLS);
     256         336 :         failed += keywords_insert("OFFSET", OFFSET);
     257         336 :         failed += keywords_insert("ON", ON);
     258         336 :         failed += keywords_insert("OPTIONS", OPTIONS);
     259         336 :         failed += keywords_insert("OPTION", OPTION);
     260         336 :         failed += keywords_insert("OR", OR);
     261         336 :         failed += keywords_insert("ORDER", ORDER);
     262         336 :         failed += keywords_insert("ORDERED", ORDERED);
     263         336 :         failed += keywords_insert("OUTER", OUTER);
     264         336 :         failed += keywords_insert("OVER", OVER);
     265         336 :         failed += keywords_insert("PARTITION", PARTITION);
     266         336 :         failed += keywords_insert("PATH", PATH);
     267         336 :         failed += keywords_insert("PRECISION", PRECISION);
     268         336 :         failed += keywords_insert("PRIMARY", PRIMARY);
     269             : 
     270         336 :         failed += keywords_insert("USER", USER);
     271         336 :         failed += keywords_insert("RENAME", RENAME);
     272         336 :         failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
     273         336 :         failed += keywords_insert("ENCRYPTED", ENCRYPTED);
     274         336 :         failed += keywords_insert("PASSWORD", PASSWORD);
     275         336 :         failed += keywords_insert("GRANT", GRANT);
     276         336 :         failed += keywords_insert("REVOKE", REVOKE);
     277         336 :         failed += keywords_insert("ROLE", ROLE);
     278         336 :         failed += keywords_insert("ADMIN", ADMIN);
     279         336 :         failed += keywords_insert("PRIVILEGES", PRIVILEGES);
     280         336 :         failed += keywords_insert("PUBLIC", PUBLIC);
     281         336 :         failed += keywords_insert("CURRENT_USER", CURRENT_USER);
     282         336 :         failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
     283         336 :         failed += keywords_insert("SESSION_USER", SESSION_USER);
     284         336 :         failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
     285         336 :         failed += keywords_insert("SESSION", sqlSESSION);
     286         336 :         failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
     287         336 :         failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
     288         336 :         failed += keywords_insert("OPTIMIZER", OPTIMIZER);
     289             : 
     290         336 :         failed += keywords_insert("RIGHT", RIGHT);
     291         336 :         failed += keywords_insert("SCHEMA", SCHEMA);
     292         336 :         failed += keywords_insert("SELECT", SELECT);
     293         336 :         failed += keywords_insert("SET", SET);
     294         336 :         failed += keywords_insert("SETS", SETS);
     295         336 :         failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
     296             : 
     297         336 :         failed += keywords_insert("ALL", ALL);
     298         336 :         failed += keywords_insert("ANY", ANY);
     299         336 :         failed += keywords_insert("SOME", SOME);
     300         336 :         failed += keywords_insert("EVERY", ANY);
     301             :         /*
     302             :            failed += keywords_insert("SQLCODE", SQLCODE );
     303             :          */
     304         336 :         failed += keywords_insert("COLUMN", COLUMN);
     305         336 :         failed += keywords_insert("TABLE", TABLE);
     306         336 :         failed += keywords_insert("TEMPORARY", TEMPORARY);
     307         336 :         failed += keywords_insert("TEMP", TEMP);
     308         336 :         failed += keywords_insert("REMOTE", REMOTE);
     309         336 :         failed += keywords_insert("MERGE", MERGE);
     310         336 :         failed += keywords_insert("REPLICA", REPLICA);
     311         336 :         failed += keywords_insert("UNLOGGED", UNLOGGED);
     312         336 :         failed += keywords_insert("TO", TO);
     313         336 :         failed += keywords_insert("UNION", UNION);
     314         336 :         failed += keywords_insert("EXCEPT", EXCEPT);
     315         336 :         failed += keywords_insert("INTERSECT", INTERSECT);
     316         336 :         failed += keywords_insert("CORRESPONDING", CORRESPONDING);
     317         336 :         failed += keywords_insert("UNIQUE", UNIQUE);
     318         336 :         failed += keywords_insert("USING", USING);
     319         336 :         failed += keywords_insert("VALUES", VALUES);
     320         336 :         failed += keywords_insert("VIEW", VIEW);
     321         336 :         failed += keywords_insert("WHERE", WHERE);
     322         336 :         failed += keywords_insert("WITH", WITH);
     323         336 :         failed += keywords_insert("WITHOUT", WITHOUT);
     324         336 :         failed += keywords_insert("DATA", DATA);
     325             : 
     326         336 :         failed += keywords_insert("DATE", sqlDATE);
     327         336 :         failed += keywords_insert("TIME", TIME);
     328         336 :         failed += keywords_insert("TIMESTAMP", TIMESTAMP);
     329         336 :         failed += keywords_insert("INTERVAL", INTERVAL);
     330         336 :         failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
     331         336 :         failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
     332         336 :         failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
     333         336 :         failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
     334         336 :         failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
     335         336 :         failed += keywords_insert("LOCALTIME", LOCALTIME);
     336         336 :         failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
     337         336 :         failed += keywords_insert("ZONE", ZONE);
     338             : 
     339         336 :         failed += keywords_insert("CENTURY", CENTURY);
     340         336 :         failed += keywords_insert("DECADE", DECADE);
     341         336 :         failed += keywords_insert("YEAR", YEAR);
     342         336 :         failed += keywords_insert("QUARTER", QUARTER);
     343         336 :         failed += keywords_insert("MONTH", MONTH);
     344         336 :         failed += keywords_insert("WEEK", WEEK);
     345         336 :         failed += keywords_insert("DOW", DOW);
     346         336 :         failed += keywords_insert("DOY", DOY);
     347         336 :         failed += keywords_insert("DAY", DAY);
     348         336 :         failed += keywords_insert("HOUR", HOUR);
     349         336 :         failed += keywords_insert("MINUTE", MINUTE);
     350         336 :         failed += keywords_insert("SECOND", SECOND);
     351         336 :         failed += keywords_insert("EPOCH", EPOCH);
     352             : 
     353         336 :         failed += keywords_insert("POSITION", POSITION);
     354         336 :         failed += keywords_insert("SUBSTRING", SUBSTRING);
     355         336 :         failed += keywords_insert("SPLIT_PART", SPLIT_PART);
     356         336 :         failed += keywords_insert("TRIM", TRIM);
     357         336 :         failed += keywords_insert("LEADING", LEADING);
     358         336 :         failed += keywords_insert("TRAILING", TRAILING);
     359         336 :         failed += keywords_insert("BOTH", BOTH);
     360             : 
     361         336 :         failed += keywords_insert("CASE", CASE);
     362         336 :         failed += keywords_insert("WHEN", WHEN);
     363         336 :         failed += keywords_insert("THEN", THEN);
     364         336 :         failed += keywords_insert("ELSE", ELSE);
     365         336 :         failed += keywords_insert("END", END);
     366         336 :         failed += keywords_insert("NULLIF", NULLIF);
     367         336 :         failed += keywords_insert("COALESCE", COALESCE);
     368         336 :         failed += keywords_insert("ELSEIF", ELSEIF);
     369         336 :         failed += keywords_insert("IF", IF);
     370         336 :         failed += keywords_insert("WHILE", WHILE);
     371         336 :         failed += keywords_insert("DO", DO);
     372             : 
     373         336 :         failed += keywords_insert("COMMIT", COMMIT);
     374         336 :         failed += keywords_insert("ROLLBACK", ROLLBACK);
     375         336 :         failed += keywords_insert("SAVEPOINT", SAVEPOINT);
     376         336 :         failed += keywords_insert("RELEASE", RELEASE);
     377         336 :         failed += keywords_insert("WORK", WORK);
     378         336 :         failed += keywords_insert("CHAIN", CHAIN);
     379         336 :         failed += keywords_insert("PRESERVE", PRESERVE);
     380         336 :         failed += keywords_insert("ROWS", ROWS);
     381         336 :         failed += keywords_insert("NO", NO);
     382         336 :         failed += keywords_insert("START", START);
     383         336 :         failed += keywords_insert("TRANSACTION", TRANSACTION);
     384         336 :         failed += keywords_insert("READ", READ);
     385         336 :         failed += keywords_insert("WRITE", WRITE);
     386         336 :         failed += keywords_insert("ONLY", ONLY);
     387         336 :         failed += keywords_insert("ISOLATION", ISOLATION);
     388         336 :         failed += keywords_insert("LEVEL", LEVEL);
     389         336 :         failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
     390         336 :         failed += keywords_insert("COMMITTED", COMMITTED);
     391         336 :         failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
     392         336 :         failed += keywords_insert("SNAPSHOT", SNAPSHOT);
     393         336 :         failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
     394         336 :         failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
     395         336 :         failed += keywords_insert("SIZE", sqlSIZE);
     396         336 :         failed += keywords_insert("STORAGE", STORAGE);
     397             : 
     398         336 :         failed += keywords_insert("TYPE", TYPE);
     399         336 :         failed += keywords_insert("PROCEDURE", PROCEDURE);
     400         336 :         failed += keywords_insert("FUNCTION", FUNCTION);
     401         336 :         failed += keywords_insert("LOADER", sqlLOADER);
     402         336 :         failed += keywords_insert("REPLACE", REPLACE);
     403             : 
     404         336 :         failed += keywords_insert("FIELD", FIELD);
     405         336 :         failed += keywords_insert("FILTER", FILTER);
     406         336 :         failed += keywords_insert("AGGREGATE", AGGREGATE);
     407         336 :         failed += keywords_insert("RETURNS", RETURNS);
     408         336 :         failed += keywords_insert("EXTERNAL", EXTERNAL);
     409         336 :         failed += keywords_insert("NAME", sqlNAME);
     410         336 :         failed += keywords_insert("RETURN", RETURN);
     411         336 :         failed += keywords_insert("CALL", CALL);
     412         336 :         failed += keywords_insert("LANGUAGE", LANGUAGE);
     413             : 
     414         336 :         failed += keywords_insert("ANALYZE", ANALYZE);
     415         336 :         failed += keywords_insert("MINMAX", MINMAX);
     416         336 :         failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
     417         336 :         failed += keywords_insert("PLAN", SQL_PLAN);
     418         336 :         failed += keywords_insert("TRACE", SQL_TRACE);
     419         336 :         failed += keywords_insert("PREPARE", PREPARE);
     420         336 :         failed += keywords_insert("PREP", PREP);
     421         336 :         failed += keywords_insert("EXECUTE", EXECUTE);
     422         336 :         failed += keywords_insert("EXEC", EXEC);
     423         336 :         failed += keywords_insert("DEALLOCATE", DEALLOCATE);
     424             : 
     425         336 :         failed += keywords_insert("INDEX", INDEX);
     426             : 
     427         336 :         failed += keywords_insert("SEQUENCE", SEQUENCE);
     428         336 :         failed += keywords_insert("RESTART", RESTART);
     429         336 :         failed += keywords_insert("INCREMENT", INCREMENT);
     430         336 :         failed += keywords_insert("MAXVALUE", MAXVALUE);
     431         336 :         failed += keywords_insert("MINVALUE", MINVALUE);
     432         336 :         failed += keywords_insert("CYCLE", CYCLE);
     433         336 :         failed += keywords_insert("CACHE", CACHE);
     434         336 :         failed += keywords_insert("NEXT", NEXT);
     435         336 :         failed += keywords_insert("VALUE", VALUE);
     436         336 :         failed += keywords_insert("GENERATED", GENERATED);
     437         336 :         failed += keywords_insert("ALWAYS", ALWAYS);
     438         336 :         failed += keywords_insert("IDENTITY", IDENTITY);
     439         336 :         failed += keywords_insert("SERIAL", SERIAL);
     440         336 :         failed += keywords_insert("BIGSERIAL", BIGSERIAL);
     441         336 :         failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
     442         336 :         failed += keywords_insert("CONTINUE", CONTINUE);
     443             : 
     444         336 :         failed += keywords_insert("TRIGGER", TRIGGER);
     445         336 :         failed += keywords_insert("ATOMIC", ATOMIC);
     446         336 :         failed += keywords_insert("BEGIN", BEGIN);
     447         336 :         failed += keywords_insert("OF", OF);
     448         336 :         failed += keywords_insert("BEFORE", BEFORE);
     449         336 :         failed += keywords_insert("AFTER", AFTER);
     450         336 :         failed += keywords_insert("ROW", ROW);
     451         336 :         failed += keywords_insert("STATEMENT", STATEMENT);
     452         336 :         failed += keywords_insert("NEW", sqlNEW);
     453         336 :         failed += keywords_insert("OLD", OLD);
     454         336 :         failed += keywords_insert("EACH", EACH);
     455         336 :         failed += keywords_insert("REFERENCING", REFERENCING);
     456             : 
     457         336 :         failed += keywords_insert("RANGE", RANGE);
     458         336 :         failed += keywords_insert("UNBOUNDED", UNBOUNDED);
     459         336 :         failed += keywords_insert("PRECEDING", PRECEDING);
     460         336 :         failed += keywords_insert("FOLLOWING", FOLLOWING);
     461         336 :         failed += keywords_insert("CURRENT", CURRENT);
     462         336 :         failed += keywords_insert("EXCLUDE", EXCLUDE);
     463         336 :         failed += keywords_insert("OTHERS", OTHERS);
     464         336 :         failed += keywords_insert("TIES", TIES);
     465         336 :         failed += keywords_insert("GROUPS", GROUPS);
     466         336 :         failed += keywords_insert("WINDOW", WINDOW);
     467             : 
     468             :         /* special SQL/XML keywords */
     469         336 :         failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
     470         336 :         failed += keywords_insert("XMLCONCAT", XMLCONCAT);
     471         336 :         failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
     472         336 :         failed += keywords_insert("XMLELEMENT", XMLELEMENT);
     473         336 :         failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
     474         336 :         failed += keywords_insert("XMLFOREST", XMLFOREST);
     475         336 :         failed += keywords_insert("XMLPARSE", XMLPARSE);
     476         336 :         failed += keywords_insert("STRIP", STRIP);
     477         336 :         failed += keywords_insert("WHITESPACE", WHITESPACE);
     478         336 :         failed += keywords_insert("XMLPI", XMLPI);
     479         336 :         failed += keywords_insert("XMLQUERY", XMLQUERY);
     480         336 :         failed += keywords_insert("PASSING", PASSING);
     481         336 :         failed += keywords_insert("XMLTEXT", XMLTEXT);
     482         336 :         failed += keywords_insert("NIL", NIL);
     483         336 :         failed += keywords_insert("REF", REF);
     484         336 :         failed += keywords_insert("ABSENT", ABSENT);
     485         336 :         failed += keywords_insert("DOCUMENT", DOCUMENT);
     486         336 :         failed += keywords_insert("ELEMENT", ELEMENT);
     487         336 :         failed += keywords_insert("CONTENT", CONTENT);
     488         336 :         failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
     489         336 :         failed += keywords_insert("NAMESPACE", NAMESPACE);
     490         336 :         failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
     491         336 :         failed += keywords_insert("RETURNING", RETURNING);
     492         336 :         failed += keywords_insert("LOCATION", LOCATION);
     493         336 :         failed += keywords_insert("ID", ID);
     494         336 :         failed += keywords_insert("ACCORDING", ACCORDING);
     495         336 :         failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
     496         336 :         failed += keywords_insert("URI", URI);
     497         336 :         failed += keywords_insert("XMLAGG", XMLAGG);
     498             : 
     499             :         /* keywords for opengis */
     500         336 :         failed += keywords_insert("GEOMETRY", GEOMETRY);
     501             : 
     502         336 :         failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
     503         336 :         failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
     504         336 :         failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
     505         336 :         failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
     506         336 :         failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
     507         336 :         failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
     508         336 :         failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
     509             : 
     510         336 :         failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
     511         336 :         failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
     512         336 :         failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
     513         336 :         failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
     514         336 :         failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
     515         336 :         failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
     516         336 :         failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
     517             : 
     518         336 :         failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
     519         336 :         failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
     520         336 :         failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
     521         336 :         failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
     522         336 :         failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
     523         336 :         failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
     524         336 :         failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
     525             : 
     526         336 :         failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
     527         336 :         failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
     528         336 :         failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
     529         336 :         failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
     530         336 :         failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
     531         336 :         failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
     532         336 :         failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
     533         336 :         failed += keywords_insert("LOGIN", LOGIN);
     534             :         // odbc keywords
     535         336 :         failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
     536         336 :         failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
     537         336 :         failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
     538         336 :         failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
     539         336 :         failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
     540         336 :         failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
     541         336 :         failed += keywords_insert("DAYNAME", DAYNAME);
     542         336 :         failed += keywords_insert("IFNULL", IFNULL);
     543         336 :         failed += keywords_insert("MONTHNAME", MONTHNAME);
     544         336 :         failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
     545         336 :         failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
     546         336 :         failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
     547         336 :         failed += keywords_insert("SQL_BINARY", SQL_BINARY);
     548         336 :         failed += keywords_insert("SQL_BIT", SQL_BIT);
     549         336 :         failed += keywords_insert("SQL_CHAR", SQL_CHAR);
     550         336 :         failed += keywords_insert("SQL_DATE", SQL_DATE);
     551         336 :         failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
     552         336 :         failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
     553         336 :         failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
     554         336 :         failed += keywords_insert("SQL_GUID", SQL_GUID);
     555         336 :         failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
     556         336 :         failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
     557         336 :         failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
     558         336 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
     559         336 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
     560         336 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
     561         336 :         failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
     562         336 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
     563         336 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
     564         336 :         failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
     565         336 :         failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
     566         336 :         failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
     567         336 :         failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
     568         336 :         failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
     569         336 :         failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
     570         336 :         failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
     571         336 :         failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
     572         336 :         failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
     573         336 :         failed += keywords_insert("SQL_REAL", SQL_REAL);
     574         336 :         failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
     575         336 :         failed += keywords_insert("SQL_TIME", SQL_TIME);
     576         336 :         failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
     577         336 :         failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
     578         336 :         failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
     579         336 :         failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
     580         336 :         failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
     581         336 :         failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
     582         336 :         failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
     583         336 :         failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
     584         336 :         failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
     585         336 :         failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
     586         336 :         failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
     587         336 :         failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
     588         336 :         failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
     589         336 :         failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
     590         336 :         failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
     591         336 :         failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
     592             : 
     593         336 :         failed += keywords_insert("LEAST", MARGFUNC);
     594         336 :         failed += keywords_insert("GREATEST", MARGFUNC);
     595         336 :         return failed;
     596             : }
     597             : 
     598             : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
     599             : 
     600             : void
     601      238296 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
     602             : {
     603      476592 :         *s = (struct scanner) {
     604             :                 .rs = rs,
     605             :                 .ws = ws,
     606             :                 .mode = LINE_N,
     607      238296 :                 .raw_string_mode = GDKgetenv_istrue("raw_strings"),
     608             :         };
     609      238296 : }
     610             : 
     611             : void
     612     1260415 : scanner_query_processed(struct scanner *s)
     613             : {
     614     1260415 :         int cur;
     615             : 
     616     1260415 :         if (s->yybak) {
     617      488213 :                 s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
     618      488213 :                 s->yybak = 0;
     619             :         }
     620     1260415 :         if (s->rs) {
     621     1260415 :                 s->rs->pos += s->yycur;
     622             :                 /* completely eat the query including white space after the ; */
     623     2394044 :                 while (s->rs->pos < s->rs->len &&
     624     2069664 :                            (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
     625     1133629 :                         s->rs->pos++;
     626             :                 }
     627             :         }
     628             :         /*assert(s->rs->pos <= s->rs->len);*/
     629     1260415 :         s->yycur = 0;
     630     1260415 :         s->started = 0;
     631     1260415 :         s->as = 0;
     632     1260415 :         s->schema = NULL;
     633     1260415 : }
     634             : 
     635             : static int
     636          33 : scanner_error(mvc *lc, int cur)
     637             : {
     638          33 :         switch (cur) {
     639           0 :         case EOF:
     640           0 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
     641           0 :                 return EOF;
     642          33 :         default:
     643             :                 /* on Windows at least, iswcntrl returns TRUE for
     644             :                  * U+FEFF, but we just want consistent error
     645             :                  * messages */
     646          33 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
     647             :         }
     648          33 :         return LEX_ERROR;
     649             : }
     650             : 
     651             : 
     652             : /*
     653             :    UTF-8 encoding is as follows:
     654             : U-00000000 - U-0000007F: 0xxxxxxx
     655             : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
     656             : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
     657             : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     658             : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     659             : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     660             : */
     661             : /* To be correctly coded UTF-8, the sequence should be the shortest
     662             :    possible encoding of the value being encoded.  This means that for
     663             :    an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
     664             :    utf8chkmsk[n] should be non-zero (else the encoding could be
     665             :    shorter).
     666             : */
     667             : static const int utf8chkmsk[] = {
     668             :         0x0000007f,
     669             :         0x00000780,
     670             :         0x0000f800,
     671             :         0x001f0000,
     672             :         0x03e00000,
     673             :         0x7c000000
     674             : };
     675             : 
     676             : static void
     677    30633609 : utf8_putchar(struct scanner *lc, int ch)
     678             : {
     679    30633609 :         if ((ch) < 0x80) {
     680    30633604 :                 lc->yycur--;
     681           5 :         } else if ((ch) < 0x800) {
     682           0 :                 lc->yycur -= 2;
     683           5 :         } else if ((ch) < 0x10000) {
     684           5 :                 lc->yycur -= 3;
     685             :         } else {
     686           0 :                 lc->yycur -= 4;
     687             :         }
     688    30633609 : }
     689             : 
     690             : static inline int
     691   133757971 : scanner_read_more(struct scanner *lc, size_t n)
     692             : {
     693   133757971 :         bstream *b = lc->rs;
     694   133757971 :         bool more = false;
     695             : 
     696             : 
     697   133761788 :         while (b->len < b->pos + lc->yycur + n) {
     698             : 
     699      138819 :                 if (lc->mode == LINE_1 || !lc->started)
     700             :                         return EOF;
     701             : 
     702             :                 /* query is not finished ask for more */
     703        6272 :                 if (b->eof || !isa_block_stream(b->s)) {
     704        4360 :                         if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
     705        1912 :                                 mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
     706        1912 :                         b->eof = false;
     707        1912 :                         more = true;
     708             :                 }
     709             :                 /* we need more query text */
     710        3824 :                 if (bstream_next(b) < 0 ||
     711             :                     /* we asked for more data but didn't get any */
     712        1912 :                     (more && b->eof && b->len < b->pos + lc->yycur + n))
     713             :                         return EOF;
     714             :         }
     715             :         return 1;
     716             : }
     717             : 
     718             : static inline int
     719   132529438 : scanner_getc(struct scanner *lc)
     720             : {
     721   132529438 :         bstream *b = lc->rs;
     722   132529438 :         unsigned char *s = NULL;
     723   132529438 :         int c, m, n, mask;
     724             : 
     725   132529438 :         if (scanner_read_more(lc, 1) == EOF) {
     726             :                 //lc->errstr = SQLSTATE(42000) "end of input stream";
     727             :                 return EOF;
     728             :         }
     729   132396649 :         lc->errstr = NULL;
     730             : 
     731   132396649 :         s = (unsigned char *) b->buf + b->pos + lc->yycur++;
     732   132396649 :         if (((c = *s) & 0x80) == 0) {
     733             :                 /* 7-bit char */
     734             :                 return c;
     735             :         }
     736       88230 :         for (n = 0, m = 0x40; c & m; n++, m >>= 1)
     737             :                 ;
     738             :         /* n now is number of 10xxxxxx bytes that should follow */
     739       29435 :         if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
     740             :                 /* incorrect UTF-8 sequence */
     741             :                 /* n==0: c == 10xxxxxx */
     742             :                 /* n>=6: c == 1111111x */
     743           0 :                 lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
     744           0 :                 goto error;
     745             :         }
     746             : 
     747       29435 :         if (scanner_read_more(lc, (size_t) n) == EOF)
     748             :                 return EOF;
     749       29435 :         s = (unsigned char *) b->buf + b->pos + lc->yycur;
     750             : 
     751       29435 :         mask = utf8chkmsk[n];
     752       29435 :         c &= ~(0xFFC0 >> n);  /* remove non-x bits */
     753       88229 :         while (--n >= 0) {
     754       58795 :                 c <<= 6;
     755       58795 :                 lc->yycur++;
     756       58795 :                 if (((m = *s++) & 0xC0) != 0x80) {
     757             :                         /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
     758             :                         /* this includes end-of-string (m == 0) */
     759           1 :                         lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
     760           1 :                         goto error;
     761             :                 }
     762       58794 :                 c |= m & 0x3F;
     763             :         }
     764       29434 :         if ((c & mask) == 0) {
     765             :                 /* incorrect UTF-8 sequence: not shortest possible */
     766           0 :                 lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
     767           0 :                 goto error;
     768             :         }
     769             : 
     770             :         return c;
     771             : 
     772           1 : error:
     773           1 :         if (b->pos + lc->yycur < b->len)    /* skip bogus char */
     774           0 :                 lc->yycur++;
     775             :         return EOF;
     776             : }
     777             : 
     778             : static int
     779    27441207 : scanner_token(struct scanner *lc, int token)
     780             : {
     781    27441207 :         lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
     782    27441207 :         lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
     783    27441207 :         lc->yyval = token;
     784    27441207 :         return lc->yyval;
     785             : }
     786             : 
     787             : static int
     788     2080833 : scanner_string(mvc *c, int quote, bool escapes)
     789             : {
     790     2080833 :         struct scanner *lc = &c->scanner;
     791     2080833 :         bstream *rs = lc->rs;
     792     2080833 :         int cur = quote;
     793     2080833 :         bool escape = false;
     794     2080833 :         const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
     795             : 
     796     2080833 :         lc->started = 1;
     797     2118597 :         while (cur != EOF) {
     798     2118582 :                 size_t pos = 0;
     799     2118582 :                 const size_t yycur = rs->pos + lc->yycur;
     800             : 
     801    31145077 :                 while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
     802    29026495 :                        (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
     803    58023544 :                        cur && (cur != quote || escape)) {
     804    26907914 :                         if (escapes && cur == '\\')
     805        6477 :                                 escape = !escape;
     806             :                         else
     807             :                                 escape = false;
     808             :                 }
     809     2118582 :                 if (pos == limit) {
     810           0 :                         (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
     811           0 :                         return LEX_ERROR;
     812             :                 }
     813             :                 /* BOM character not allowed as an identifier */
     814     2118582 :                 if (cur == EOF || (quote == '"' && cur == 0xFEFF))
     815           1 :                         return scanner_error(c, cur);
     816     2118581 :                 lc->yycur += pos;
     817             :                 /* check for quote escaped quote: Obscure SQL Rule */
     818     2118581 :                 if (cur == quote && rs->buf[yycur + pos] == quote) {
     819        8331 :                         lc->yycur++;
     820        8331 :                         continue;
     821             :                 }
     822     2110250 :                 assert(yycur + pos <= rs->len + 1);
     823     2110250 :                 if (cur == quote && !escape) {
     824     2080803 :                         return scanner_token(lc, STRING);
     825             :                 }
     826       29447 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     827             :                 /* long utf8, if correct isn't the quote */
     828       29447 :                 if (!cur) {
     829          30 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     830          14 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     831          14 :                                 return LEX_ERROR;
     832             :                         }
     833          16 :                         cur = scanner_read_more(lc, 1);
     834             :                 } else {
     835       29417 :                         cur = scanner_getc(lc);
     836             :                 }
     837             :         }
     838          15 :         (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
     839          15 :         return EOF;
     840             : }
     841             : 
     842             : /* scan a structure {blah} into a string. We only count the matching {}
     843             :  * unless escaped. We do not consider embeddings in string literals yet
     844             :  */
     845             : 
     846             : static int
     847         215 : scanner_body(mvc *c)
     848             : {
     849         215 :         struct scanner *lc = &c->scanner;
     850         215 :         bstream *rs = lc->rs;
     851         215 :         int cur = (int) 'x';
     852         215 :         int blk = 1;
     853         215 :         bool escape = false;
     854             : 
     855         215 :         lc->started = 1;
     856         215 :         assert(rs->buf[rs->pos + lc->yycur-1] == '{');
     857         243 :         while (cur != EOF) {
     858         243 :                 size_t pos = rs->pos + lc->yycur;
     859             : 
     860       30022 :                 while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
     861       29779 :                         if (cur != '\\')
     862             :                                 escape = false;
     863             :                         else
     864          12 :                                 escape = !escape;
     865       29779 :                         blk += cur =='{';
     866       29779 :                         blk -= cur =='}';
     867             :                 }
     868         243 :                 lc->yycur = pos - rs->pos;
     869         243 :                 assert(pos <= rs->len + 1);
     870         243 :                 if (blk == 0 && !escape){
     871         215 :                         lc->yycur--; /* go back to current (possibly invalid) char */
     872         215 :                         return scanner_token(lc, X_BODY);
     873             :                 }
     874          28 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     875          28 :                 if (!cur) {
     876          28 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     877           0 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     878           0 :                                 return LEX_ERROR;
     879             :                         }
     880          28 :                         cur = scanner_read_more(lc, 1);
     881             :                 } else {
     882           0 :                         cur = scanner_getc(lc);
     883             :                 }
     884             :         }
     885           0 :         (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
     886           0 :         return EOF;
     887             : }
     888             : 
     889             : static int
     890    13156751 : keyword_or_ident(mvc * c, int cur)
     891             : {
     892    13156751 :         struct scanner *lc = &c->scanner;
     893    13156751 :         keyword *k = NULL;
     894    13156751 :         size_t s;
     895             : 
     896    13156751 :         lc->started = 1;
     897    13156751 :         utf8_putchar(lc, cur);
     898    13156731 :         s = lc->yycur;
     899    13156731 :         lc->yyval = IDENT;
     900    78560386 :         while ((cur = scanner_getc(lc)) != EOF) {
     901    78560359 :                 if (!iswalnum(cur) && cur != '_') {
     902    13156704 :                         utf8_putchar(lc, cur);
     903    13156728 :                         (void)scanner_token(lc, IDENT);
     904    13156728 :                         if ((k = find_keyword_bs(lc,s)))
     905     8073866 :                                 lc->yyval = k->token;
     906    13156863 :                         return lc->yyval;
     907             :                 }
     908             :         }
     909             :         if (cur < 0)
     910             :                 return cur;
     911             :         (void)scanner_token(lc, IDENT);
     912             :         if ((k = find_keyword_bs(lc,s)))
     913             :                 lc->yyval = k->token;
     914             :         return lc->yyval;
     915             : }
     916             : 
     917             : static int
     918    13903400 : skip_white_space(struct scanner * lc)
     919             : {
     920    17698662 :         int cur;
     921             : 
     922    17698662 :         do {
     923    17698662 :                 lc->yysval = lc->yycur;
     924    17698662 :         } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
     925    13903542 :         return cur;
     926             : }
     927             : 
     928             : static int
     929       67564 : skip_c_comment(struct scanner * lc)
     930             : {
     931       67564 :         int cur;
     932       67564 :         int prev = 0;
     933       67564 :         int started = lc->started;
     934       67564 :         int depth = 1;
     935             : 
     936       67564 :         lc->started = 1;
     937     1362768 :         while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
     938     1295204 :                 if (prev == '*' && cur == '/')
     939       67564 :                         depth--;
     940     1227640 :                 else if (prev == '/' && cur == '*') {
     941             :                         /* block comments can nest */
     942           0 :                         cur = 0; /* prevent slash-star-slash from matching */
     943           0 :                         depth++;
     944             :                 }
     945             :                 prev = cur;
     946             :         }
     947       67564 :         lc->yysval = lc->yycur;
     948       67564 :         lc->started = started;
     949             :         /* a comment is equivalent to a newline */
     950       67564 :         return cur == EOF ? cur : '\n';
     951             : }
     952             : 
     953             : static int
     954        2919 : skip_sql_comment(struct scanner * lc)
     955             : {
     956        2919 :         int cur;
     957        2919 :         int started = lc->started;
     958             : 
     959        2919 :         lc->started = 1;
     960      820924 :         while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
     961             :                 ;
     962        2919 :         lc->yysval = lc->yycur;
     963        2919 :         lc->started = started;
     964             :         /* a comment is equivalent to a newline */
     965        2919 :         return cur;
     966             : }
     967             : 
     968             : static int tokenize(mvc * lc, int cur);
     969             : 
     970     5647365 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
     971          13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
     972          10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
     973        3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
     974             : 
     975     1874015 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
     976     1874015 :         struct scanner *lc = &c->scanner;
     977     1874015 :         bool (*is_valid_n_ary_digit)(int);
     978             : 
     979     1874015 :         if (pcur == '_' && !initial_underscore_allowed)  /* ERROR: initial underscore not allowed */  {
     980           0 :                 *token = 0;
     981           0 :                 return '_';
     982             :         }
     983             : 
     984     1874015 :         switch (type) {
     985             :         case BINARYNUM:
     986             :                 is_valid_n_ary_digit = &is_valid_binary_digit;
     987             :                 break;
     988           3 :         case OCTALNUM:
     989           3 :                 is_valid_n_ary_digit = &is_valid_octal_digit;
     990           3 :                 break;
     991         280 :         case HEXADECIMALNUM:
     992         280 :                 is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
     993         280 :                 break;
     994     1873730 :         default:
     995     1873730 :                 is_valid_n_ary_digit = &is_valid_decimal_digit;
     996     1873730 :                 break;
     997             :         }
     998             : 
     999     1874015 :         if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
    1000          17 :                 *token = 0;
    1001          17 :                 return pcur;
    1002             :         }
    1003             : 
    1004     1873909 :         int cur = scanner_getc(lc);
    1005     1873861 :         *token = type;
    1006     3779058 :         while (cur != EOF) {
    1007     3779054 :                 if (cur == '_') {
    1008          25 :                         if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
    1009           2 :                                 *token = 0;
    1010           2 :                                 return '_';
    1011             :                         }
    1012             :                 }
    1013     3779029 :                 else if (!is_valid_n_ary_digit(cur))
    1014             :                         break;
    1015     1905315 :                 pcur = cur;
    1016     1905315 :                 cur = scanner_getc(lc);
    1017             :         }
    1018             : 
    1019     1873566 :         if (pcur == '_')  {
    1020           3 :                 *token = 0;
    1021           3 :                 if (iswalnum(cur))       /* ERROR: not a valid digit */
    1022             :                         return cur;
    1023             :                 else                            /* ERROR: number ends with underscore */
    1024             :                         return '_';
    1025             :         }
    1026             : 
    1027             :         return cur;
    1028             : }
    1029             : 
    1030             : static int
    1031     1860701 : number(mvc * c, int cur)
    1032             : {
    1033     1860701 :         struct scanner *lc = &c->scanner;
    1034     1860701 :         int token = sqlINT;
    1035             : 
    1036             :         /* a number has one of these forms (expressed in regular expressions):
    1037             :          * 0x[0-9A-Fa-f]+                   -- (hexadecimal) INTEGER
    1038             :          * \.[0-9]+                         -- DECIMAL
    1039             :          * [0-9]+\.[0-9]*                   -- DECIMAL
    1040             :          * [0-9]+@0                         -- OID
    1041             :          * [0-9]*\.[0-9]+[eE][-+]?[0-9]+    -- REAL
    1042             :          * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
    1043             :          * [0-9]+                           -- (decimal) INTEGER
    1044             :          */
    1045     1860701 :         lc->started = 1;
    1046     1860701 :         if (cur == '0') {
    1047      303697 :                 switch ((cur = scanner_getc(lc))) {
    1048           2 :                 case 'b':
    1049           2 :                         cur = scanner_getc(lc);
    1050           2 :                         if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
    1051             :                         break;
    1052           3 :                 case 'o':
    1053           3 :                         cur = scanner_getc(lc);
    1054           3 :                         if ((cur = check_validity_number(c,  cur, true, &token, OCTALNUM)) == EOF) return cur;
    1055             :                         break;
    1056         280 :                 case 'x':
    1057         280 :                         cur = scanner_getc(lc);
    1058         280 :                         if ((cur = check_validity_number(c,  cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
    1059             :                         break;
    1060      303413 :                 default:
    1061      303413 :                         utf8_putchar(lc, cur);
    1062      303413 :                         cur = '0';
    1063             :                 }
    1064             :         }
    1065     1860702 :         if (token == sqlINT) {
    1066     1860426 :                 if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
    1067     1860032 :                 if (cur == '@') {
    1068           0 :                         if (token == sqlINT) {
    1069           0 :                                 cur = scanner_getc(lc);
    1070           0 :                                 if (cur == EOF)
    1071             :                                         return cur;
    1072           0 :                                 if (cur == '0') {
    1073           0 :                                         cur = scanner_getc(lc);
    1074           0 :                                         if (cur == EOF)
    1075             :                                                 return cur;
    1076           0 :                                         token = OIDNUM;
    1077             :                                 } else {
    1078             :                                         /* number + '@' not followed by 0: show '@' as erroneous */
    1079           0 :                                         utf8_putchar(lc, cur);
    1080           0 :                                         cur = '@';
    1081           0 :                                         token = 0;
    1082             :                                 }
    1083             :                         }
    1084             :                 } else {
    1085     1860032 :                         if (cur == '.') {
    1086       11060 :                                 cur = scanner_getc(lc);
    1087       11060 :                                 if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
    1088       11054 :                                 if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
    1089             :                         }
    1090     1860032 :                         if (token != 0)
    1091     1860040 :                         if (cur == 'e' || cur == 'E') {
    1092        2229 :                                 cur = scanner_getc(lc);
    1093        2229 :                                 if (cur == '+' || cur == '-')
    1094        2111 :                                         cur = scanner_getc(lc);
    1095        2229 :                                 if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
    1096             :                         }
    1097             :                 }
    1098             :         }
    1099             : 
    1100     1858079 :         assert(cur != EOF);
    1101             : 
    1102     1860308 :         if (iswalnum(cur)) /* ERROR: not a valid digit */
    1103           6 :                 token = 0;
    1104             : 
    1105     1860308 :         utf8_putchar(lc, cur);
    1106             : 
    1107     1860247 :         if (token) {
    1108     1860237 :                 return scanner_token(lc, token);
    1109             :         } else {
    1110          10 :                 (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
    1111          10 :                 return LEX_ERROR;
    1112             :         }
    1113             : }
    1114             : 
    1115             : static
    1116    12487831 : int scanner_symbol(mvc * c, int cur)
    1117             : {
    1118    12487831 :         struct scanner *lc = &c->scanner;
    1119    12487831 :         int next = 0;
    1120    12487831 :         int started = lc->started;
    1121             : 
    1122    12487831 :         switch (cur) {
    1123       70194 :         case '/':
    1124       70194 :                 lc->started = 1;
    1125       70194 :                 next = scanner_getc(lc);
    1126       70194 :                 if (next < 0)
    1127             :                         return EOF;
    1128       70194 :                 if (next == '*') {
    1129       67564 :                         lc->started = started;
    1130       67564 :                         cur = skip_c_comment(lc);
    1131       67564 :                         if (cur < 0)
    1132             :                                 return EOF;
    1133       67564 :                         return tokenize(c, cur);
    1134             :                 } else {
    1135        2630 :                         utf8_putchar(lc, next);
    1136        2630 :                         return scanner_token(lc, cur);
    1137             :                 }
    1138           0 :         case '0':
    1139             :         case '1':
    1140             :         case '2':
    1141             :         case '3':
    1142             :         case '4':
    1143             :         case '5':
    1144             :         case '6':
    1145             :         case '7':
    1146             :         case '8':
    1147             :         case '9':
    1148           0 :                 return number(c, cur);
    1149           5 :         case '#':
    1150           5 :                 if ((cur = skip_sql_comment(lc)) == EOF)
    1151             :                         return cur;
    1152           5 :                 return tokenize(c, cur);
    1153      832785 :         case '\'':
    1154      832785 :                 if (lc->raw_string_mode || lc->next_string_is_raw)
    1155          57 :                         return scanner_string(c, cur, false);
    1156      832728 :                 return scanner_string(c, cur, true);
    1157     1241075 :         case '"':
    1158     1241075 :                 return scanner_string(c, cur, false);
    1159         481 :         case '{':
    1160             :                 // if previous tokens like LANGUAGE IDENT
    1161             :                 // TODO checking on IDENT only may not be enough
    1162         481 :                 if (lc->yylast == IDENT)
    1163         215 :                         return scanner_body(c);
    1164         266 :                 lc->started = 1;
    1165         266 :                 return scanner_token(lc, cur);
    1166         266 :         case '}':
    1167         266 :                 lc->started = 1;
    1168         266 :                 return scanner_token(lc, cur);
    1169       31417 :         case '-':
    1170       31417 :                 lc->started = 1;
    1171       31417 :                 next = scanner_getc(lc);
    1172       31417 :                 if (next < 0)
    1173             :                         return EOF;
    1174       31416 :                 if (next == '-') {
    1175        2914 :                         lc->started = started;
    1176        2914 :                         if ((cur = skip_sql_comment(lc)) == EOF)
    1177             :                                 return cur;
    1178        2914 :                         return tokenize(c, cur);
    1179             :                 }
    1180       28502 :                 lc->started = 1;
    1181       28502 :                 utf8_putchar(lc, next);
    1182       28502 :                 return scanner_token(lc, cur);
    1183          12 :         case '~': /* binary not */
    1184          12 :                 lc->started = 1;
    1185          12 :                 next = scanner_getc(lc);
    1186          12 :                 if (next < 0)
    1187             :                         return EOF;
    1188          12 :                 if (next == '=')
    1189           5 :                         return scanner_token(lc, GEOM_MBR_EQUAL);
    1190           7 :                 utf8_putchar(lc, next);
    1191           7 :                 return scanner_token(lc, cur);
    1192     6976019 :         case '^': /* binary xor */
    1193             :         case '*':
    1194             :         case '?':
    1195             :         case ':':
    1196             :         case '%':
    1197             :         case '+':
    1198             :         case '(':
    1199             :         case ')':
    1200             :         case ',':
    1201             :         case '=':
    1202             :         case '[':
    1203             :         case ']':
    1204     6976019 :                 lc->started = 1;
    1205     6976019 :                 return scanner_token(lc, cur);
    1206        5989 :         case '&':
    1207        5989 :                 lc->started = 1;
    1208        5989 :                 cur = scanner_getc(lc);
    1209        5989 :                 if (cur < 0)
    1210             :                         return EOF;
    1211        5989 :                 if (cur < 0)
    1212             :                         return EOF;
    1213        5989 :                 if(cur == '<') {
    1214           3 :                         next = scanner_getc(lc);
    1215           3 :                         if (next < 0)
    1216             :                                 return EOF;
    1217           3 :                         if(next == '|') {
    1218           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
    1219             :                         } else {
    1220           3 :                                 utf8_putchar(lc, next); //put the char back
    1221           3 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
    1222             :                         }
    1223        5986 :                 } else if(cur == '>')
    1224           3 :                         return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
    1225        5983 :                 else if(cur == '&')
    1226           3 :                         return scanner_token(lc, GEOM_OVERLAP);
    1227             :                 else {/* binary and */
    1228        5980 :                         utf8_putchar(lc, cur); //put the char back
    1229        5980 :                         return scanner_token(lc, '&');
    1230             :                 }
    1231          19 :         case '@':
    1232          19 :                 lc->started = 1;
    1233          19 :                 return scanner_token(lc, AT);
    1234      957712 :         case ';':
    1235      957712 :                 lc->started = 0;
    1236      957712 :                 return scanner_token(lc, SCOLON);
    1237       50465 :         case '<':
    1238       50465 :                 lc->started = 1;
    1239       50465 :                 cur = scanner_getc(lc);
    1240       50465 :                 if (cur < 0)
    1241             :                         return EOF;
    1242       50465 :                 if (cur == '=') {
    1243        3120 :                         return scanner_token( lc, COMPARISON);
    1244       47345 :                 } else if (cur == '>') {
    1245       33991 :                         return scanner_token( lc, COMPARISON);
    1246       13354 :                 } else if (cur == '<') {
    1247          46 :                         next = scanner_getc(lc);
    1248          46 :                         if (next < 0)
    1249             :                                 return EOF;
    1250          46 :                         if (next == '=') {
    1251           4 :                                 return scanner_token( lc, LEFT_SHIFT_ASSIGN);
    1252          42 :                         } else if (next == '|') {
    1253           1 :                                 return scanner_token(lc, GEOM_BELOW);
    1254             :                         } else {
    1255          41 :                                 utf8_putchar(lc, next); //put the char back
    1256          41 :                                 return scanner_token( lc, LEFT_SHIFT);
    1257             :                         }
    1258       13308 :                 } else if(cur == '-') {
    1259          19 :                         next = scanner_getc(lc);
    1260          19 :                         if (next < 0)
    1261             :                                 return EOF;
    1262          19 :                         if(next == '>') {
    1263           7 :                                 return scanner_token(lc, GEOM_DIST);
    1264             :                         } else {
    1265             :                                 //put the characters back and fall in the next possible case
    1266          12 :                                 utf8_putchar(lc, next);
    1267          12 :                                 utf8_putchar(lc, cur);
    1268          12 :                                 return scanner_token( lc, COMPARISON);
    1269             :                         }
    1270             :                 } else {
    1271       13289 :                         utf8_putchar(lc, cur);
    1272       13289 :                         return scanner_token( lc, COMPARISON);
    1273             :                 }
    1274       46980 :         case '>':
    1275       46980 :                 lc->started = 1;
    1276       46980 :                 cur = scanner_getc(lc);
    1277       46980 :                 if (cur < 0)
    1278             :                         return EOF;
    1279       46980 :                 if (cur == '>') {
    1280        2531 :                         cur = scanner_getc(lc);
    1281        2531 :                         if (cur < 0)
    1282             :                                 return EOF;
    1283        2531 :                         if (cur == '=')
    1284           3 :                                 return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
    1285        2528 :                         utf8_putchar(lc, cur);
    1286        2528 :                         return scanner_token( lc, RIGHT_SHIFT);
    1287       44449 :                 } else if (cur != '=') {
    1288       42221 :                         utf8_putchar(lc, cur);
    1289       42221 :                         return scanner_token( lc, COMPARISON);
    1290             :                 } else {
    1291        2228 :                         return scanner_token( lc, COMPARISON);
    1292             :                 }
    1293     2062802 :         case '.':
    1294     2062802 :                 lc->started = 1;
    1295     2062802 :                 cur = scanner_getc(lc);
    1296     2062802 :                 if (cur < 0)
    1297             :                         return EOF;
    1298     2062801 :                 if (!iswdigit(cur)) {
    1299     2062788 :                         utf8_putchar(lc, cur);
    1300     2062788 :                         return scanner_token( lc, '.');
    1301             :                 } else {
    1302          13 :                         utf8_putchar(lc, cur);
    1303          13 :                         cur = '.';
    1304          13 :                         return number(c, cur);
    1305             :                 }
    1306      211576 :         case '|': /* binary or or string concat */
    1307      211576 :                 lc->started = 1;
    1308      211576 :                 cur = scanner_getc(lc);
    1309      211576 :                 if (cur < 0)
    1310             :                         return EOF;
    1311      211576 :                 if (cur == '|') {
    1312      211547 :                         return scanner_token(lc, CONCATSTRING);
    1313          29 :                 } else if (cur == '&') {
    1314           0 :                         next = scanner_getc(lc);
    1315           0 :                         if (next < 0)
    1316             :                                 return EOF;
    1317           0 :                         if(next == '>') {
    1318           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
    1319             :                         } else {
    1320           0 :                                 utf8_putchar(lc, next); //put the char back
    1321           0 :                                 utf8_putchar(lc, cur); //put the char back
    1322           0 :                                 return scanner_token(lc, '|');
    1323             :                         }
    1324          29 :                 } else if (cur == '>') {
    1325           1 :                         next = scanner_getc(lc);
    1326           1 :                         if (next < 0)
    1327             :                                 return EOF;
    1328           1 :                         if(next == '>') {
    1329           1 :                                 return scanner_token(lc, GEOM_ABOVE);
    1330             :                         } else {
    1331           0 :                                 utf8_putchar(lc, next); //put the char back
    1332           0 :                                 utf8_putchar(lc, cur); //put the char back
    1333           0 :                                 return scanner_token(lc, '|');
    1334             :                         }
    1335             :                 } else {
    1336          28 :                         utf8_putchar(lc, cur);
    1337          28 :                         return scanner_token(lc, '|');
    1338             :                 }
    1339             :         }
    1340          34 :         (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
    1341          34 :         return LEX_ERROR;
    1342             : }
    1343             : 
    1344             : static int
    1345    27529008 : tokenize(mvc * c, int cur)
    1346             : {
    1347    27529008 :         struct scanner *lc = &c->scanner;
    1348    55293526 :         while (1) {
    1349    41411267 :                 if (cur == 0xFEFF) {
    1350             :                         /* on Linux at least, iswpunct returns TRUE
    1351             :                          * for U+FEFF, but we don't want that, we just
    1352             :                          * want to go to the scanner_error case
    1353             :                          * below */
    1354             :                         ;
    1355    41411366 :                 } else if (iswspace(cur)) {
    1356    13899155 :                         if ((cur = skip_white_space(lc)) == EOF)
    1357             :                                 return cur;
    1358    13882259 :                         continue;  /* try again */
    1359    27512211 :                 } else if (iswdigit(cur)) {
    1360     1860719 :                         return number(c, cur);
    1361    25651492 :                 } else if (iswalpha(cur) || cur == '_') {
    1362    13130700 :                         switch (cur) {
    1363      646966 :                         case 'e': /* string with escapes */
    1364             :                         case 'E':
    1365      646966 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1366      646966 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1367        3694 :                                         return scanner_string(c, scanner_getc(lc), true);
    1368             :                                 }
    1369             :                                 break;
    1370      401272 :                         case 'x': /* blob */
    1371             :                         case 'X':
    1372             :                         case 'r': /* raw string */
    1373             :                         case 'R':
    1374      401272 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1375      401272 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1376        3262 :                                         return scanner_string(c, scanner_getc(lc), false);
    1377             :                                 }
    1378             :                                 break;
    1379      150390 :                         case 'u': /* unicode string */
    1380             :                         case 'U':
    1381      150390 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1382      150407 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
    1383          17 :                                     scanner_read_more(lc, 2) != EOF &&
    1384          17 :                                     (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
    1385             :                                      lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
    1386          17 :                                         cur = scanner_getc(lc); /* '&' */
    1387          17 :                                         return scanner_string(c, scanner_getc(lc), false);
    1388             :                                 }
    1389             :                                 break;
    1390             :                         default:
    1391             :                                 break;
    1392             :                         }
    1393    13156762 :                         return keyword_or_ident(c, cur);
    1394    12487757 :                 } else if (iswpunct(cur)) {
    1395    12487626 :                         return scanner_symbol(c, cur);
    1396             :                 }
    1397          32 :                 if (cur == EOF) {
    1398           0 :                         if (lc->mode == LINE_1 || !lc->started )
    1399             :                                 return cur;
    1400           0 :                         return scanner_error(c, cur);
    1401             :                 }
    1402             :                 /* none of the above: error */
    1403          32 :                 return scanner_error(c, cur);
    1404             :         }
    1405             : }
    1406             : 
    1407             : /* SQL 'quoted' idents consist of a set of any character of
    1408             :  * the source language character set other than a 'quote'
    1409             :  *
    1410             :  * MonetDB has 3 restrictions:
    1411             :  *      1 we disallow '%' as the first character.
    1412             :  *      2 the length is limited to 1024 characters
    1413             :  *      3 the identifier 'TID%' is not allowed
    1414             :  */
    1415             : static bool
    1416     1241064 : valid_ident(const char *restrict s, char *restrict dst)
    1417             : {
    1418     1241064 :         int p = 0;
    1419             : 
    1420     1241064 :         if (*s == '%')
    1421             :                 return false;
    1422             : 
    1423     9204015 :         while (*s) {
    1424     7962951 :                 if ((dst[p++] = *s++) == '"' && *s == '"')
    1425          62 :                         s++;
    1426     7962951 :                 if (p >= 1024)
    1427             :                         return false;
    1428             :         }
    1429     1241064 :         dst[p] = '\0';
    1430     1241064 :         if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
    1431             :                 return false;
    1432             :         return true;
    1433             : }
    1434             : 
    1435             : static inline int
    1436    27632350 : sql_get_next_token(YYSTYPE *yylval, void *parm)
    1437             : {
    1438    27632350 :         mvc *c = (mvc*)parm;
    1439    27632350 :         struct scanner *lc = &c->scanner;
    1440    27632350 :         int token = 0, cur = 0;
    1441             : 
    1442    27632350 :         if (lc->rs->buf == NULL) /* malloc failure */
    1443             :                 return EOF;
    1444             : 
    1445    27632350 :         if (lc->yynext) {
    1446       59996 :                 int next = lc->yynext;
    1447             : 
    1448       59996 :                 lc->yynext = 0;
    1449       59996 :                 return(next);
    1450             :         }
    1451             : 
    1452    27572354 :         if (lc->yybak) {
    1453    26576283 :                 lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
    1454    26576283 :                 lc->yybak = 0;
    1455             :         }
    1456             : 
    1457    27572354 :         lc->yysval = lc->yycur;
    1458    27572354 :         lc->yylast = lc->yyval;
    1459    27572354 :         cur = scanner_getc(lc);
    1460    27570679 :         if (cur < 0)
    1461             :                 return EOF;
    1462    27459407 :         token = tokenize(c, cur);
    1463             : 
    1464    27458804 :         yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
    1465             : 
    1466             :         /* This is needed as ALIAS and aTYPE get defined too late, see
    1467             :            sql_keyword.h */
    1468    27458804 :         if (token == KW_ALIAS)
    1469             :                 token = ALIAS;
    1470             : 
    1471    27453223 :         if (token == KW_TYPE)
    1472       48609 :                 token = aTYPE;
    1473             : 
    1474    27458804 :         if (token == IDENT || token == COMPARISON ||
    1475    22280944 :             token == RANK || token == aTYPE || token == ALIAS || token == MARGFUNC) {
    1476     5241211 :                 yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
    1477     5241247 :                 lc->next_string_is_raw = false;
    1478    22217593 :         } else if (token == STRING) {
    1479     2080803 :                 char quote = *yylval->sval;
    1480     2080803 :                 char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
    1481     2080803 :                 char *dst;
    1482             : 
    1483     2080803 :                 assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
    1484             : 
    1485     2080803 :                 lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
    1486     2080803 :                 switch (quote) {
    1487     1241064 :                 case '"':
    1488     1241064 :                         if (valid_ident(yylval->sval+1,str)) {
    1489             :                                 token = IDENT;
    1490             :                         } else {
    1491           0 :                                 sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
    1492           0 :                                 return LEX_ERROR;
    1493             :                         }
    1494             :                         break;
    1495        3693 :                 case 'e':
    1496             :                 case 'E':
    1497        3693 :                         assert(yylval->sval[1] == '\'');
    1498        3693 :                         if (GDKstrFromStr((unsigned char *) str,
    1499             :                                                           (unsigned char *) yylval->sval + 2,
    1500        3693 :                                                           lc->yycur-lc->yysval - 2, '\'') < 0) {
    1501           1 :                                 char *err = GDKerrbuf;
    1502           1 :                                 if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
    1503           1 :                                         err += strlen(GDKERROR);
    1504           0 :                                 else if (*err == '!')
    1505           0 :                                         err++;
    1506           1 :                                 sql_error(c, 1, SQLSTATE(42000) "%s", err);
    1507           1 :                                 return LEX_ERROR;
    1508             :                         }
    1509             :                         quote = '\'';
    1510             :                         break;
    1511          17 :                 case 'u':
    1512             :                 case 'U':
    1513          17 :                         assert(yylval->sval[1] == '&');
    1514          17 :                         assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
    1515          17 :                         strcpy(str, yylval->sval + 3);
    1516          17 :                         token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
    1517          17 :                         quote = yylval->sval[2];
    1518          17 :                         lc->next_string_is_raw = true;
    1519          17 :                         break;
    1520           1 :                 case 'x':
    1521             :                 case 'X':
    1522           1 :                         assert(yylval->sval[1] == '\'');
    1523           1 :                         dst = str;
    1524           5 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1525           4 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1526           0 :                                         src++;
    1527           1 :                         *dst = 0;
    1528           1 :                         quote = '\'';
    1529           1 :                         token = XSTRING;
    1530           1 :                         lc->next_string_is_raw = true;
    1531           1 :                         break;
    1532        3254 :                 case 'r':
    1533             :                 case 'R':
    1534        3254 :                         assert(yylval->sval[1] == '\'');
    1535        3254 :                         dst = str;
    1536      448935 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1537      445681 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1538        2708 :                                         src++;
    1539        3254 :                         quote = '\'';
    1540        3254 :                         *dst = 0;
    1541        3254 :                         break;
    1542      832774 :                 default:
    1543      832774 :                         if (lc->raw_string_mode || lc->next_string_is_raw) {
    1544          57 :                                 dst = str;
    1545         524 :                                 for (char *src = yylval->sval + 1; *src; dst++)
    1546         467 :                                         if ((*dst = *src++) == '\'' && *src == '\'')
    1547           3 :                                                 src++;
    1548          57 :                                 *dst = 0;
    1549             :                         } else {
    1550      832717 :                                 if (GDKstrFromStr((unsigned char *)str,
    1551      832717 :                                                                   (unsigned char *)yylval->sval + 1,
    1552      832717 :                                                                   lc->yycur - lc->yysval - 1,
    1553             :                                                                   '\'') < 0) {
    1554           1 :                                         sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
    1555           1 :                                         return LEX_ERROR;
    1556             :                                 }
    1557             :                         }
    1558             :                         break;
    1559             :                 }
    1560     2080801 :                 yylval->sval = str;
    1561             : 
    1562             :                 /* reset original */
    1563     2080801 :                 lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
    1564             :         } else {
    1565    20136790 :                 lc->next_string_is_raw = false;
    1566             :         }
    1567             : 
    1568             :         return(token);
    1569             : }
    1570             : 
    1571             : static int scanner( YYSTYPE *yylval, void *m, bool log);
    1572             : 
    1573             : static int
    1574    27500666 : scanner(YYSTYPE * yylval, void *parm, bool log)
    1575             : {
    1576    27500666 :         int token;
    1577    27500666 :         mvc *c = (mvc *) parm;
    1578    27500666 :         struct scanner *lc = &c->scanner;
    1579    27500666 :         size_t pos;
    1580             : 
    1581             :         /* store position for when view's query ends */
    1582    27500666 :         pos = lc->rs->pos + lc->yycur;
    1583             : 
    1584    27500666 :         token = sql_get_next_token(yylval, parm);
    1585             : 
    1586    27497695 :         if (token == NOT) {
    1587       72543 :                 int next = scanner(yylval, parm, false);
    1588             : 
    1589       72543 :                 if (next == NOT) {
    1590           2 :                         return scanner(yylval, parm, false);
    1591             :                 } else if (next == EXISTS) {
    1592             :                         token = NOT_EXISTS;
    1593             :                 } else if (next == BETWEEN) {
    1594             :                         token = NOT_BETWEEN;
    1595             :                 } else if (next == sqlIN) {
    1596             :                         token = NOT_IN;
    1597             :                 } else if (next == LIKE) {
    1598             :                         token = NOT_LIKE;
    1599             :                 } else if (next == ILIKE) {
    1600             :                         token = NOT_ILIKE;
    1601             :                 } else {
    1602       59996 :                         lc->yynext = next;
    1603             :                 }
    1604    27425152 :         } else if (token == SCOLON) {
    1605             :                 /* ignore semi-colon(s) following a semi-colon */
    1606      957681 :                 if (lc->yylast == SCOLON) {
    1607      131961 :                         size_t prev = lc->yycur;
    1608      131962 :                         while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
    1609           1 :                                 prev = lc->yycur;
    1610             : 
    1611             :                         /* skip the skipped stuff also in the buffer */
    1612      131961 :                         lc->rs->pos += prev;
    1613      131961 :                         lc->yycur -= prev;
    1614             :                 }
    1615             :         }
    1616             : 
    1617    27497693 :         if (lc->log && log)
    1618           0 :                 mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
    1619             : 
    1620    27497693 :         lc->started += (token != EOF);
    1621    27497693 :         return token;
    1622             : }
    1623             : 
    1624             : /* also see sql_parser.y */
    1625             : extern int sqllex(YYSTYPE * yylval, void *parm);
    1626             : 
    1627             : int
    1628    27428436 : sqllex(YYSTYPE * yylval, void *parm)
    1629             : {
    1630    27428436 :         return scanner(yylval, parm, true);
    1631             : }

Generated by: LCOV version 1.14