LCOV - code coverage report
Current view: top level - sql/server - sql_scan.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1048 1113 94.2 %
Date: 2024-12-19 23:10:26 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include <wctype.h>
      15             : #include "sql_mem.h"
      16             : #include "sql_scan.h"
      17             : #include "sql_types.h"
      18             : #include "sql_symbol.h"
      19             : #include "sql_mvc.h"
      20             : #include "sql_parser.tab.h"
      21             : #include "sql_semantic.h"
      22             : #include "sql_parser.h"               /* for sql_error() */
      23             : 
      24             : #include "stream.h"
      25             : #include "mapi_prompt.h"
      26             : #include <unistd.h>
      27             : #include <string.h>
      28             : #include <ctype.h>
      29             : #include "sql_keyword.h"
      30             : 
      31             : /**
      32             :  * Removes all comments before the query. In query comments are kept.
      33             :  */
      34             : char *
      35      402717 : query_cleaned(allocator *sa, const char *query)
      36             : {
      37      402717 :         char *q, *r, *c = NULL;
      38      402717 :         int lines = 0;
      39      402717 :         int quote = 0;          /* inside quotes ('..', "..", {..}) */
      40      402717 :         bool bs = false;                /* seen a backslash in a quoted string */
      41      402717 :         bool incomment1 = false;        /* inside traditional C style comment */
      42      402717 :         bool incomment2 = false;        /* inside comment starting with --  */
      43      402717 :         bool inline_comment = false;
      44             : 
      45      402717 :         r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
      46      402930 :         if(!r)
      47             :                 return NULL;
      48             : 
      49    68287171 :         for (q = r; *query; query++) {
      50    67884241 :                 if (incomment1) {
      51       16186 :                         if (*query == '/' && query[-1] == '*') {
      52         234 :                                 incomment1 = false;
      53         234 :                                 if (c == r && lines > 0) {
      54         226 :                                         q = r; // reset to beginning
      55         226 :                                         lines = 0;
      56         226 :                                         continue;
      57             :                                 }
      58             :                         }
      59       15960 :                         if (*query == '\n') lines++;
      60       15960 :                         *q++ = *query;
      61    67868055 :                 } else if (incomment2) {
      62      831143 :                         if (*query == '\n') {
      63        2906 :                                 incomment2 = false;
      64        2906 :                                 inline_comment = false;
      65             :                                 /* add newline only if comment doesn't
      66             :                                  * occupy whole line */
      67        2906 :                                 if (q > r && q[-1] != '\n'){
      68        1000 :                                         *q++ = '\n';
      69        1000 :                                         lines++;
      70             :                                 }
      71      828237 :                         } else if (inline_comment){
      72       24007 :                                 *q++ = *query; // preserve in line query comments
      73             :                         }
      74    67036912 :                 } else if (quote) {
      75    22309373 :                         if (bs) {
      76             :                                 bs = false;
      77    22306082 :                         } else if (*query == '\\') {
      78             :                                 bs = true;
      79    22302778 :                         } else if (*query == quote) {
      80      680589 :                                 quote = 0;
      81             :                         }
      82    22309373 :                         *q++ = *query;
      83    44727539 :                 } else if (*query == '"' || *query == '\'') {
      84      680121 :                         quote = *query;
      85      680121 :                         *q++ = *query;
      86    44047418 :                 } else if (*query == '{') {
      87         513 :                         quote = '}';
      88         513 :                         *q++ = *query;
      89    44046905 :                 } else if (*query == '-' && query[1] == '-') {
      90        2906 :                         if (q > r && q[-1] != '\n') {
      91        1000 :                                 inline_comment = true;
      92        1000 :                                 *q++ = *query; // preserve in line query comments
      93             :                         }
      94             :                         incomment2 = true;
      95    44043999 :                 } else if (*query == '/' && query[1] == '*') {
      96         234 :                         incomment1 = true;
      97         234 :                         c = q;
      98         234 :                         *q++ = *query;
      99    44043765 :                 } else if (*query == '\n') {
     100             :                         /* collapse newlines */
     101      866883 :                         if (q > r && q[-1] != '\n') {
     102      825069 :                                 *q++ = '\n';
     103      825069 :                                 lines++;
     104             :                         }
     105    43176882 :                 } else if (*query == ' ' || *query == '\t') {
     106             :                         /* collapse white space */
     107     7096009 :                         if (q > r && q[-1] != ' ')
     108     5616045 :                                 *q++ = ' ';
     109             :                 } else {
     110    36080873 :                         *q++ = *query;
     111             :                 }
     112             :         }
     113      402930 :         *q = 0;
     114      402930 :         return r;
     115             : }
     116             : 
     117             : int
     118         352 : scanner_init_keywords(void)
     119             : {
     120         352 :         int failed = 0;
     121             : 
     122         352 :         failed += keywords_insert("false", BOOL_FALSE);
     123         352 :         failed += keywords_insert("true", BOOL_TRUE);
     124         352 :         failed += keywords_insert("bool", sqlBOOL);
     125             : 
     126         352 :         failed += keywords_insert("ALTER", ALTER);
     127         352 :         failed += keywords_insert("ADD", ADD);
     128         352 :         failed += keywords_insert("AND", AND);
     129             : 
     130         352 :         failed += keywords_insert("RANK", RANK);
     131         352 :         failed += keywords_insert("DENSE_RANK", RANK);
     132         352 :         failed += keywords_insert("PERCENT_RANK", RANK);
     133         352 :         failed += keywords_insert("CUME_DIST", RANK);
     134         352 :         failed += keywords_insert("ROW_NUMBER", RANK);
     135         352 :         failed += keywords_insert("NTILE", RANK);
     136         352 :         failed += keywords_insert("LAG", RANK);
     137         352 :         failed += keywords_insert("LEAD", RANK);
     138         352 :         failed += keywords_insert("FETCH", FETCH);
     139         352 :         failed += keywords_insert("FIRST_VALUE", RANK);
     140         352 :         failed += keywords_insert("LAST_VALUE", RANK);
     141         352 :         failed += keywords_insert("NTH_VALUE", RANK);
     142             : 
     143         352 :         failed += keywords_insert("BEST", BEST);
     144         352 :         failed += keywords_insert("EFFORT", EFFORT);
     145             : 
     146         352 :         failed += keywords_insert("AS", AS);
     147         352 :         failed += keywords_insert("ASC", ASC);
     148         352 :         failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
     149         352 :         failed += keywords_insert("BETWEEN", BETWEEN);
     150         352 :         failed += keywords_insert("SYMMETRIC", SYMMETRIC);
     151         352 :         failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
     152         352 :         failed += keywords_insert("BY", BY);
     153         352 :         failed += keywords_insert("CAST", CAST);
     154         352 :         failed += keywords_insert("CONVERT", CONVERT);
     155         352 :         failed += keywords_insert("CHARACTER", CHARACTER);
     156         352 :         failed += keywords_insert("CHAR", CHARACTER);
     157         352 :         failed += keywords_insert("VARYING", VARYING);
     158         352 :         failed += keywords_insert("VARCHAR", VARCHAR);
     159         352 :         failed += keywords_insert("BINARY", BINARY);
     160         352 :         failed += keywords_insert("LARGE", LARGE);
     161         352 :         failed += keywords_insert("OBJECT", OBJECT);
     162         352 :         failed += keywords_insert("CLOB", CLOB);
     163         352 :         failed += keywords_insert("BLOB", sqlBLOB);
     164         352 :         failed += keywords_insert("TEXT", sqlTEXT);
     165         352 :         failed += keywords_insert("TINYTEXT", sqlTEXT);
     166         352 :         failed += keywords_insert("STRING", CLOB);    /* ? */
     167         352 :         failed += keywords_insert("CHECK", CHECK);
     168         352 :         failed += keywords_insert("CLIENT", CLIENT);
     169         352 :         failed += keywords_insert("SERVER", SERVER);
     170         352 :         failed += keywords_insert("COMMENT", COMMENT);
     171         352 :         failed += keywords_insert("CONSTRAINT", CONSTRAINT);
     172         352 :         failed += keywords_insert("CREATE", CREATE);
     173         352 :         failed += keywords_insert("CROSS", CROSS);
     174         352 :         failed += keywords_insert("COPY", COPY);
     175         352 :         failed += keywords_insert("RECORDS", RECORDS);
     176         352 :         failed += keywords_insert("DELIMITERS", DELIMITERS);
     177         352 :         failed += keywords_insert("STDIN", STDIN);
     178         352 :         failed += keywords_insert("STDOUT", STDOUT);
     179             : 
     180         352 :         failed += keywords_insert("TINYINT", TINYINT);
     181         352 :         failed += keywords_insert("SMALLINT", SMALLINT);
     182         352 :         failed += keywords_insert("INTEGER", sqlINTEGER);
     183         352 :         failed += keywords_insert("INT", sqlINTEGER);
     184         352 :         failed += keywords_insert("MEDIUMINT", sqlINTEGER);
     185         352 :         failed += keywords_insert("BIGINT", BIGINT);
     186             : #ifdef HAVE_HGE
     187         352 :         failed += keywords_insert("HUGEINT", HUGEINT);
     188             : #endif
     189         352 :         failed += keywords_insert("DEC", sqlDECIMAL);
     190         352 :         failed += keywords_insert("DECIMAL", sqlDECIMAL);
     191         352 :         failed += keywords_insert("NUMERIC", sqlDECIMAL);
     192         352 :         failed += keywords_insert("DECLARE", DECLARE);
     193         352 :         failed += keywords_insert("DEFAULT", DEFAULT);
     194         352 :         failed += keywords_insert("DESC", DESC);
     195         352 :         failed += keywords_insert("DISTINCT", DISTINCT);
     196         352 :         failed += keywords_insert("DOUBLE", sqlDOUBLE);
     197         352 :         failed += keywords_insert("REAL", sqlREAL);
     198         352 :         failed += keywords_insert("DROP", DROP);
     199         352 :         failed += keywords_insert("ESCAPE", ESCAPE);
     200         352 :         failed += keywords_insert("EXISTS", EXISTS);
     201         352 :         failed += keywords_insert("UESCAPE", UESCAPE);
     202         352 :         failed += keywords_insert("EXTRACT", EXTRACT);
     203         352 :         failed += keywords_insert("FLOAT", sqlFLOAT);
     204         352 :         failed += keywords_insert("FOR", FOR);
     205         352 :         failed += keywords_insert("FOREIGN", FOREIGN);
     206         352 :         failed += keywords_insert("FROM", FROM);
     207         352 :         failed += keywords_insert("FWF", FWF);
     208             : 
     209         352 :         failed += keywords_insert("BIG", BIG);
     210         352 :         failed += keywords_insert("LITTLE", LITTLE);
     211         352 :         failed += keywords_insert("NATIVE", NATIVE);
     212         352 :         failed += keywords_insert("ENDIAN", ENDIAN);
     213             : 
     214         352 :         failed += keywords_insert("REFERENCES", REFERENCES);
     215             : 
     216         352 :         failed += keywords_insert("MATCH", MATCH);
     217         352 :         failed += keywords_insert("FULL", FULL);
     218         352 :         failed += keywords_insert("PARTIAL", PARTIAL);
     219         352 :         failed += keywords_insert("SIMPLE", SIMPLE);
     220             : 
     221         352 :         failed += keywords_insert("INSERT", INSERT);
     222         352 :         failed += keywords_insert("UPDATE", UPDATE);
     223         352 :         failed += keywords_insert("DELETE", sqlDELETE);
     224         352 :         failed += keywords_insert("TRUNCATE", TRUNCATE);
     225         352 :         failed += keywords_insert("MATCHED", MATCHED);
     226             : 
     227         352 :         failed += keywords_insert("ACTION", ACTION);
     228         352 :         failed += keywords_insert("CASCADE", CASCADE);
     229         352 :         failed += keywords_insert("RESTRICT", RESTRICT);
     230         352 :         failed += keywords_insert("FIRST", FIRST);
     231         352 :         failed += keywords_insert("GLOBAL", GLOBAL);
     232         352 :         failed += keywords_insert("GROUP", sqlGROUP);
     233         352 :         failed += keywords_insert("GROUPING", GROUPING);
     234         352 :         failed += keywords_insert("ROLLUP", ROLLUP);
     235         352 :         failed += keywords_insert("CUBE", CUBE);
     236         352 :         failed += keywords_insert("HAVING", HAVING);
     237         352 :         failed += keywords_insert("ILIKE", ILIKE);
     238         352 :         failed += keywords_insert("IMPRINTS", IMPRINTS);
     239         352 :         failed += keywords_insert("IN", sqlIN);
     240         352 :         failed += keywords_insert("INNER", INNER);
     241         352 :         failed += keywords_insert("INTO", INTO);
     242         352 :         failed += keywords_insert("IS", IS);
     243         352 :         failed += keywords_insert("JOIN", JOIN);
     244         352 :         failed += keywords_insert("KEY", KEY);
     245         352 :         failed += keywords_insert("LATERAL", LATERAL);
     246         352 :         failed += keywords_insert("LEFT", LEFT);
     247         352 :         failed += keywords_insert("LIKE", LIKE);
     248         352 :         failed += keywords_insert("LIMIT", LIMIT);
     249         352 :         failed += keywords_insert("SAMPLE", SAMPLE);
     250         352 :         failed += keywords_insert("SEED", SEED);
     251         352 :         failed += keywords_insert("LAST", LAST);
     252         352 :         failed += keywords_insert("LOCAL", LOCAL);
     253         352 :         failed += keywords_insert("NATURAL", NATURAL);
     254         352 :         failed += keywords_insert("NOT", NOT);
     255         352 :         failed += keywords_insert("NULL", sqlNULL);
     256         352 :         failed += keywords_insert("NULLS", NULLS);
     257         352 :         failed += keywords_insert("OFFSET", OFFSET);
     258         352 :         failed += keywords_insert("ON", ON);
     259         352 :         failed += keywords_insert("OPTIONS", OPTIONS);
     260         352 :         failed += keywords_insert("OPTION", OPTION);
     261         352 :         failed += keywords_insert("OR", OR);
     262         352 :         failed += keywords_insert("ORDER", ORDER);
     263         352 :         failed += keywords_insert("ORDERED", ORDERED);
     264         352 :         failed += keywords_insert("OUTER", OUTER);
     265         352 :         failed += keywords_insert("OVER", OVER);
     266         352 :         failed += keywords_insert("PARTITION", PARTITION);
     267         352 :         failed += keywords_insert("PATH", PATH);
     268         352 :         failed += keywords_insert("PRECISION", PRECISION);
     269         352 :         failed += keywords_insert("PRIMARY", PRIMARY);
     270             : 
     271         352 :         failed += keywords_insert("USER", USER);
     272         352 :         failed += keywords_insert("RENAME", RENAME);
     273         352 :         failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
     274         352 :         failed += keywords_insert("ENCRYPTED", ENCRYPTED);
     275         352 :         failed += keywords_insert("PASSWORD", PASSWORD);
     276         352 :         failed += keywords_insert("GRANT", GRANT);
     277         352 :         failed += keywords_insert("REVOKE", REVOKE);
     278         352 :         failed += keywords_insert("ROLE", ROLE);
     279         352 :         failed += keywords_insert("ADMIN", ADMIN);
     280         352 :         failed += keywords_insert("PRIVILEGES", PRIVILEGES);
     281         352 :         failed += keywords_insert("PUBLIC", PUBLIC);
     282         352 :         failed += keywords_insert("CURRENT_USER", CURRENT_USER);
     283         352 :         failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
     284         352 :         failed += keywords_insert("SESSION_USER", SESSION_USER);
     285         352 :         failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
     286         352 :         failed += keywords_insert("SESSION", sqlSESSION);
     287         352 :         failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
     288         352 :         failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
     289         352 :         failed += keywords_insert("OPTIMIZER", OPTIMIZER);
     290             : 
     291         352 :         failed += keywords_insert("RIGHT", RIGHT);
     292         352 :         failed += keywords_insert("SCHEMA", SCHEMA);
     293         352 :         failed += keywords_insert("SELECT", SELECT);
     294         352 :         failed += keywords_insert("SET", SET);
     295         352 :         failed += keywords_insert("SETS", SETS);
     296         352 :         failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
     297             : 
     298         352 :         failed += keywords_insert("ALL", ALL);
     299         352 :         failed += keywords_insert("ANY", ANY);
     300         352 :         failed += keywords_insert("SOME", SOME);
     301         352 :         failed += keywords_insert("EVERY", ANY);
     302             :         /*
     303             :            failed += keywords_insert("SQLCODE", SQLCODE );
     304             :          */
     305         352 :         failed += keywords_insert("COLUMN", COLUMN);
     306         352 :         failed += keywords_insert("TABLE", TABLE);
     307         352 :         failed += keywords_insert("TEMPORARY", TEMPORARY);
     308         352 :         failed += keywords_insert("TEMP", TEMP);
     309         352 :         failed += keywords_insert("REMOTE", REMOTE);
     310         352 :         failed += keywords_insert("MERGE", MERGE);
     311         352 :         failed += keywords_insert("REPLICA", REPLICA);
     312         352 :         failed += keywords_insert("UNLOGGED", UNLOGGED);
     313         352 :         failed += keywords_insert("TO", TO);
     314         352 :         failed += keywords_insert("UNION", UNION);
     315         352 :         failed += keywords_insert("EXCEPT", EXCEPT);
     316         352 :         failed += keywords_insert("INTERSECT", INTERSECT);
     317         352 :         failed += keywords_insert("CORRESPONDING", CORRESPONDING);
     318         352 :         failed += keywords_insert("UNIQUE", UNIQUE);
     319         352 :         failed += keywords_insert("USING", USING);
     320         352 :         failed += keywords_insert("VALUES", VALUES);
     321         352 :         failed += keywords_insert("VIEW", VIEW);
     322         352 :         failed += keywords_insert("WHERE", WHERE);
     323         352 :         failed += keywords_insert("WITH", WITH);
     324         352 :         failed += keywords_insert("WITHIN", WITHIN);
     325         352 :         failed += keywords_insert("WITHOUT", WITHOUT);
     326         352 :         failed += keywords_insert("DATA", DATA);
     327             : 
     328         352 :         failed += keywords_insert("DATE", sqlDATE);
     329         352 :         failed += keywords_insert("TIME", TIME);
     330         352 :         failed += keywords_insert("TIMESTAMP", TIMESTAMP);
     331         352 :         failed += keywords_insert("INTERVAL", INTERVAL);
     332         352 :         failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
     333         352 :         failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
     334         352 :         failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
     335         352 :         failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
     336         352 :         failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
     337         352 :         failed += keywords_insert("LOCALTIME", LOCALTIME);
     338         352 :         failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
     339         352 :         failed += keywords_insert("ZONE", ZONE);
     340             : 
     341         352 :         failed += keywords_insert("CENTURY", CENTURY);
     342         352 :         failed += keywords_insert("DECADE", DECADE);
     343         352 :         failed += keywords_insert("YEAR", YEAR);
     344         352 :         failed += keywords_insert("QUARTER", QUARTER);
     345         352 :         failed += keywords_insert("MONTH", MONTH);
     346         352 :         failed += keywords_insert("WEEK", WEEK);
     347         352 :         failed += keywords_insert("DOW", DOW);
     348         352 :         failed += keywords_insert("DOY", DOY);
     349         352 :         failed += keywords_insert("DAY", DAY);
     350         352 :         failed += keywords_insert("HOUR", HOUR);
     351         352 :         failed += keywords_insert("MINUTE", MINUTE);
     352         352 :         failed += keywords_insert("SECOND", SECOND);
     353         352 :         failed += keywords_insert("EPOCH", EPOCH);
     354             : 
     355         352 :         failed += keywords_insert("POSITION", POSITION);
     356         352 :         failed += keywords_insert("SUBSTRING", SUBSTRING);
     357         352 :         failed += keywords_insert("SPLIT_PART", SPLIT_PART);
     358         352 :         failed += keywords_insert("TRIM", TRIM);
     359         352 :         failed += keywords_insert("LEADING", LEADING);
     360         352 :         failed += keywords_insert("TRAILING", TRAILING);
     361         352 :         failed += keywords_insert("BOTH", BOTH);
     362             : 
     363         352 :         failed += keywords_insert("CASE", CASE);
     364         352 :         failed += keywords_insert("WHEN", WHEN);
     365         352 :         failed += keywords_insert("THEN", THEN);
     366         352 :         failed += keywords_insert("ELSE", ELSE);
     367         352 :         failed += keywords_insert("END", END);
     368         352 :         failed += keywords_insert("NULLIF", NULLIF);
     369         352 :         failed += keywords_insert("COALESCE", COALESCE);
     370         352 :         failed += keywords_insert("ELSEIF", ELSEIF);
     371         352 :         failed += keywords_insert("IF", IF);
     372         352 :         failed += keywords_insert("WHILE", WHILE);
     373         352 :         failed += keywords_insert("DO", DO);
     374             : 
     375         352 :         failed += keywords_insert("COMMIT", COMMIT);
     376         352 :         failed += keywords_insert("ROLLBACK", ROLLBACK);
     377         352 :         failed += keywords_insert("SAVEPOINT", SAVEPOINT);
     378         352 :         failed += keywords_insert("RELEASE", RELEASE);
     379         352 :         failed += keywords_insert("WORK", WORK);
     380         352 :         failed += keywords_insert("CHAIN", CHAIN);
     381         352 :         failed += keywords_insert("PRESERVE", PRESERVE);
     382         352 :         failed += keywords_insert("ROWS", ROWS);
     383         352 :         failed += keywords_insert("NO", NO);
     384         352 :         failed += keywords_insert("START", START);
     385         352 :         failed += keywords_insert("TRANSACTION", TRANSACTION);
     386         352 :         failed += keywords_insert("READ", READ);
     387         352 :         failed += keywords_insert("WRITE", WRITE);
     388         352 :         failed += keywords_insert("ONLY", ONLY);
     389         352 :         failed += keywords_insert("ISOLATION", ISOLATION);
     390         352 :         failed += keywords_insert("LEVEL", LEVEL);
     391         352 :         failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
     392         352 :         failed += keywords_insert("COMMITTED", COMMITTED);
     393         352 :         failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
     394         352 :         failed += keywords_insert("SNAPSHOT", SNAPSHOT);
     395         352 :         failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
     396         352 :         failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
     397         352 :         failed += keywords_insert("SIZE", sqlSIZE);
     398         352 :         failed += keywords_insert("STORAGE", STORAGE);
     399             : 
     400         352 :         failed += keywords_insert("TYPE", TYPE);
     401         352 :         failed += keywords_insert("PROCEDURE", PROCEDURE);
     402         352 :         failed += keywords_insert("FUNCTION", FUNCTION);
     403         352 :         failed += keywords_insert("LOADER", sqlLOADER);
     404         352 :         failed += keywords_insert("REPLACE", REPLACE);
     405             : 
     406         352 :         failed += keywords_insert("FIELD", FIELD);
     407         352 :         failed += keywords_insert("FILTER", FILTER);
     408         352 :         failed += keywords_insert("AGGREGATE", AGGREGATE);
     409         352 :         failed += keywords_insert("RETURNS", RETURNS);
     410         352 :         failed += keywords_insert("EXTERNAL", EXTERNAL);
     411         352 :         failed += keywords_insert("NAME", sqlNAME);
     412         352 :         failed += keywords_insert("RETURN", RETURN);
     413         352 :         failed += keywords_insert("CALL", CALL);
     414         352 :         failed += keywords_insert("LANGUAGE", LANGUAGE);
     415             : 
     416         352 :         failed += keywords_insert("ANALYZE", ANALYZE);
     417         352 :         failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
     418         352 :         failed += keywords_insert("PLAN", SQL_PLAN);
     419         352 :         failed += keywords_insert("TRACE", SQL_TRACE);
     420         352 :         failed += keywords_insert("PREPARE", PREPARE);
     421         352 :         failed += keywords_insert("PREP", PREP);
     422         352 :         failed += keywords_insert("EXECUTE", EXECUTE);
     423         352 :         failed += keywords_insert("EXEC", EXEC);
     424         352 :         failed += keywords_insert("DEALLOCATE", DEALLOCATE);
     425             : 
     426         352 :         failed += keywords_insert("INDEX", INDEX);
     427             : 
     428         352 :         failed += keywords_insert("SEQUENCE", SEQUENCE);
     429         352 :         failed += keywords_insert("RESTART", RESTART);
     430         352 :         failed += keywords_insert("INCREMENT", INCREMENT);
     431         352 :         failed += keywords_insert("MAXVALUE", MAXVALUE);
     432         352 :         failed += keywords_insert("MINVALUE", MINVALUE);
     433         352 :         failed += keywords_insert("CYCLE", CYCLE);
     434         352 :         failed += keywords_insert("CACHE", CACHE);
     435         352 :         failed += keywords_insert("NEXT", NEXT);
     436         352 :         failed += keywords_insert("VALUE", VALUE);
     437         352 :         failed += keywords_insert("GENERATED", GENERATED);
     438         352 :         failed += keywords_insert("ALWAYS", ALWAYS);
     439         352 :         failed += keywords_insert("IDENTITY", IDENTITY);
     440         352 :         failed += keywords_insert("SERIAL", SERIAL);
     441         352 :         failed += keywords_insert("BIGSERIAL", BIGSERIAL);
     442         352 :         failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
     443         352 :         failed += keywords_insert("CONTINUE", CONTINUE);
     444             : 
     445         352 :         failed += keywords_insert("TRIGGER", TRIGGER);
     446         352 :         failed += keywords_insert("ATOMIC", ATOMIC);
     447         352 :         failed += keywords_insert("BEGIN", BEGIN);
     448         352 :         failed += keywords_insert("OF", OF);
     449         352 :         failed += keywords_insert("BEFORE", BEFORE);
     450         352 :         failed += keywords_insert("AFTER", AFTER);
     451         352 :         failed += keywords_insert("ROW", ROW);
     452         352 :         failed += keywords_insert("STATEMENT", STATEMENT);
     453         352 :         failed += keywords_insert("NEW", sqlNEW);
     454         352 :         failed += keywords_insert("OLD", OLD);
     455         352 :         failed += keywords_insert("EACH", EACH);
     456         352 :         failed += keywords_insert("REFERENCING", REFERENCING);
     457             : 
     458         352 :         failed += keywords_insert("RANGE", RANGE);
     459         352 :         failed += keywords_insert("UNBOUNDED", UNBOUNDED);
     460         352 :         failed += keywords_insert("PRECEDING", PRECEDING);
     461         352 :         failed += keywords_insert("FOLLOWING", FOLLOWING);
     462         352 :         failed += keywords_insert("CURRENT", CURRENT);
     463         352 :         failed += keywords_insert("EXCLUDE", EXCLUDE);
     464         352 :         failed += keywords_insert("OTHERS", OTHERS);
     465         352 :         failed += keywords_insert("TIES", TIES);
     466         352 :         failed += keywords_insert("GROUPS", GROUPS);
     467         352 :         failed += keywords_insert("WINDOW", WINDOW);
     468             : 
     469             :         /* special SQL/XML keywords */
     470         352 :         failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
     471         352 :         failed += keywords_insert("XMLCONCAT", XMLCONCAT);
     472         352 :         failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
     473         352 :         failed += keywords_insert("XMLELEMENT", XMLELEMENT);
     474         352 :         failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
     475         352 :         failed += keywords_insert("XMLFOREST", XMLFOREST);
     476         352 :         failed += keywords_insert("XMLPARSE", XMLPARSE);
     477         352 :         failed += keywords_insert("STRIP", STRIP);
     478         352 :         failed += keywords_insert("WHITESPACE", WHITESPACE);
     479         352 :         failed += keywords_insert("XMLPI", XMLPI);
     480         352 :         failed += keywords_insert("XMLQUERY", XMLQUERY);
     481         352 :         failed += keywords_insert("PASSING", PASSING);
     482         352 :         failed += keywords_insert("XMLTEXT", XMLTEXT);
     483         352 :         failed += keywords_insert("NIL", NIL);
     484         352 :         failed += keywords_insert("REF", REF);
     485         352 :         failed += keywords_insert("ABSENT", ABSENT);
     486         352 :         failed += keywords_insert("DOCUMENT", DOCUMENT);
     487         352 :         failed += keywords_insert("ELEMENT", ELEMENT);
     488         352 :         failed += keywords_insert("CONTENT", CONTENT);
     489         352 :         failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
     490         352 :         failed += keywords_insert("NAMESPACE", NAMESPACE);
     491         352 :         failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
     492         352 :         failed += keywords_insert("RETURNING", RETURNING);
     493         352 :         failed += keywords_insert("RECURSIVE", RECURSIVE);
     494         352 :         failed += keywords_insert("LOCATION", LOCATION);
     495         352 :         failed += keywords_insert("ID", ID);
     496         352 :         failed += keywords_insert("ACCORDING", ACCORDING);
     497         352 :         failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
     498         352 :         failed += keywords_insert("URI", URI);
     499         352 :         failed += keywords_insert("XMLAGG", XMLAGG);
     500             : 
     501             :         /* keywords for opengis */
     502         352 :         failed += keywords_insert("GEOMETRY", GEOMETRY);
     503             : 
     504         352 :         failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
     505         352 :         failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
     506         352 :         failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
     507         352 :         failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
     508         352 :         failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
     509         352 :         failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
     510         352 :         failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
     511             : 
     512         352 :         failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
     513         352 :         failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
     514         352 :         failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
     515         352 :         failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
     516         352 :         failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
     517         352 :         failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
     518         352 :         failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
     519             : 
     520         352 :         failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
     521         352 :         failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
     522         352 :         failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
     523         352 :         failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
     524         352 :         failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
     525         352 :         failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
     526         352 :         failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
     527             : 
     528         352 :         failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
     529         352 :         failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
     530         352 :         failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
     531         352 :         failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
     532         352 :         failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
     533         352 :         failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
     534         352 :         failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
     535         352 :         failed += keywords_insert("LOGIN", LOGIN);
     536             :         // odbc keywords
     537         352 :         failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
     538         352 :         failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
     539         352 :         failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
     540         352 :         failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
     541         352 :         failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
     542         352 :         failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
     543         352 :         failed += keywords_insert("DAYNAME", DAYNAME);
     544         352 :         failed += keywords_insert("IFNULL", IFNULL);
     545         352 :         failed += keywords_insert("MONTHNAME", MONTHNAME);
     546         352 :         failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
     547         352 :         failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
     548         352 :         failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
     549         352 :         failed += keywords_insert("SQL_BINARY", SQL_BINARY);
     550         352 :         failed += keywords_insert("SQL_BIT", SQL_BIT);
     551         352 :         failed += keywords_insert("SQL_CHAR", SQL_CHAR);
     552         352 :         failed += keywords_insert("SQL_DATE", SQL_DATE);
     553         352 :         failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
     554         352 :         failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
     555         352 :         failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
     556         352 :         failed += keywords_insert("SQL_GUID", SQL_GUID);
     557         352 :         failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
     558         352 :         failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
     559         352 :         failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
     560         352 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
     561         352 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
     562         352 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
     563         352 :         failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
     564         352 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
     565         352 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
     566         352 :         failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
     567         352 :         failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
     568         352 :         failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
     569         352 :         failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
     570         352 :         failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
     571         352 :         failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
     572         352 :         failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
     573         352 :         failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
     574         352 :         failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
     575         352 :         failed += keywords_insert("SQL_REAL", SQL_REAL);
     576         352 :         failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
     577         352 :         failed += keywords_insert("SQL_TIME", SQL_TIME);
     578         352 :         failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
     579         352 :         failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
     580         352 :         failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
     581         352 :         failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
     582         352 :         failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
     583         352 :         failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
     584         352 :         failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
     585         352 :         failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
     586         352 :         failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
     587         352 :         failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
     588         352 :         failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
     589         352 :         failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
     590         352 :         failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
     591         352 :         failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
     592         352 :         failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
     593         352 :         failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
     594             : 
     595         352 :         failed += keywords_insert("LEAST", MARGFUNC);
     596         352 :         failed += keywords_insert("GREATEST", MARGFUNC);
     597         352 :         return failed;
     598             : }
     599             : 
     600             : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
     601             : 
     602             : void
     603      248586 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
     604             : {
     605      497172 :         *s = (struct scanner) {
     606             :                 .rs = rs,
     607             :                 .ws = ws,
     608             :                 .mode = LINE_N,
     609      248586 :                 .raw_string_mode = GDKgetenv_istrue("raw_strings"),
     610             :                 .aborted = false,
     611             :         };
     612      248586 : }
     613             : 
     614             : void
     615     1321283 : scanner_query_processed(struct scanner *s)
     616             : {
     617     1321283 :         int cur;
     618             : 
     619     1321283 :         if (s->yybak) {
     620      515750 :                 s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
     621      515750 :                 s->yybak = 0;
     622             :         }
     623     1321283 :         if (s->rs) {
     624     1321283 :                 s->rs->pos += s->yycur;
     625             :                 /* completely eat the query including white space after the ; */
     626     2482194 :                 while (s->rs->pos < s->rs->len &&
     627     2145384 :                            (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
     628     1160911 :                         s->rs->pos++;
     629             :                 }
     630             :         }
     631             :         /*assert(s->rs->pos <= s->rs->len);*/
     632     1321283 :         s->yycur = 0;
     633     1321283 :         s->started = 0;
     634     1321283 :         s->as = 0;
     635     1321283 :         s->schema = NULL;
     636     1321283 : }
     637             : 
     638             : static int
     639          33 : scanner_error(mvc *lc, int cur)
     640             : {
     641          33 :         switch (cur) {
     642           0 :         case EOF:
     643           0 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
     644           0 :                 return EOF;
     645          33 :         default:
     646             :                 /* on Windows at least, iswcntrl returns TRUE for
     647             :                  * U+FEFF, but we just want consistent error
     648             :                  * messages */
     649          33 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
     650             :         }
     651          33 :         return LEX_ERROR;
     652             : }
     653             : 
     654             : 
     655             : /*
     656             :    UTF-8 encoding is as follows:
     657             : U-00000000 - U-0000007F: 0xxxxxxx
     658             : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
     659             : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
     660             : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     661             : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     662             : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     663             : */
     664             : /* To be correctly coded UTF-8, the sequence should be the shortest
     665             :    possible encoding of the value being encoded.  This means that for
     666             :    an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
     667             :    utf8chkmsk[n] should be non-zero (else the encoding could be
     668             :    shorter).
     669             : */
     670             : static const int utf8chkmsk[] = {
     671             :         0x0000007f,
     672             :         0x00000780,
     673             :         0x0000f800,
     674             :         0x001f0000,
     675             :         0x03e00000,
     676             :         0x7c000000
     677             : };
     678             : 
     679             : static void
     680    32035939 : utf8_putchar(struct scanner *lc, int ch)
     681             : {
     682    32035939 :         if ((ch) < 0x80) {
     683    32035934 :                 lc->yycur--;
     684           5 :         } else if ((ch) < 0x800) {
     685           0 :                 lc->yycur -= 2;
     686           5 :         } else if ((ch) < 0x10000) {
     687           5 :                 lc->yycur -= 3;
     688             :         } else {
     689           0 :                 lc->yycur -= 4;
     690             :         }
     691    32035939 : }
     692             : 
     693             : static inline int
     694   139076486 : scanner_read_more(struct scanner *lc, size_t n)
     695             : {
     696   139076486 :         bstream *b = lc->rs;
     697   139076486 :         bool more = false;
     698             : 
     699             : 
     700   139076486 :         if (lc->aborted)
     701             :                 return EOF;
     702   139080791 :         while (b->len < b->pos + lc->yycur + n) {
     703             : 
     704      124849 :                 if (lc->mode == LINE_1 || !lc->started)
     705             :                         return EOF;
     706             : 
     707             :                 /* query is not finished ask for more */
     708           0 :                 if (b->eof || !isa_block_stream(b->s)) {
     709           0 :                         if (bstream_getoob(b)) {
     710           0 :                                 lc->aborted = true;
     711           0 :                                 return EOF;
     712             :                         }
     713        2156 :                         if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
     714        2156 :                                 mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
     715        2156 :                         b->eof = false;
     716        2156 :                         more = true;
     717             :                 }
     718             :                 /* we need more query text */
     719        4312 :                 if (bstream_next(b) < 0) {
     720           0 :                         if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
     721             :                                 // now what?
     722           0 :                                 lc->errstr = "Query aborted";
     723           0 :                                 lc->aborted = true;
     724           0 :                                 mnstr_clearerr(b->s);
     725             :                         }
     726           0 :                         return EOF;
     727        4312 :                 } else if (/* we asked for more data but didn't get any */
     728        2156 :                            (more && b->eof && b->len < b->pos + lc->yycur + n))
     729             :                         return EOF;
     730        4305 :                 if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
     731           0 :                         lc->errstr = "Query aborted";
     732           0 :                         b->len -= 2;
     733           0 :                         b->buf[b->len] = 0;
     734           0 :                         return EOF;
     735             :                 }
     736             :         }
     737             :         return 1;
     738             : }
     739             : 
     740             : static inline int
     741   137806405 : scanner_getc(struct scanner *lc)
     742             : {
     743   137806405 :         bstream *b = lc->rs;
     744   137806405 :         unsigned char *s = NULL;
     745   137806405 :         int c, m, n, mask;
     746             : 
     747   137806405 :         if (scanner_read_more(lc, 1) == EOF) {
     748             :                 //lc->errstr = SQLSTATE(42000) "end of input stream";
     749             :                 return EOF;
     750             :         }
     751   137718309 :         lc->errstr = NULL;
     752             : 
     753   137718309 :         s = (unsigned char *) b->buf + b->pos + lc->yycur++;
     754   137718309 :         if (((c = *s) & 0x80) == 0) {
     755             :                 /* 7-bit char */
     756             :                 return c;
     757             :         }
     758       88250 :         for (n = 0, m = 0x40; c & m; n++, m >>= 1)
     759             :                 ;
     760             :         /* n now is number of 10xxxxxx bytes that should follow */
     761       29443 :         if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
     762             :                 /* incorrect UTF-8 sequence */
     763             :                 /* n==0: c == 10xxxxxx */
     764             :                 /* n>=6: c == 1111111x */
     765           0 :                 lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
     766           0 :                 goto error;
     767             :         }
     768             : 
     769       29443 :         if (scanner_read_more(lc, (size_t) n) == EOF)
     770             :                 return EOF;
     771       29443 :         s = (unsigned char *) b->buf + b->pos + lc->yycur;
     772             : 
     773       29443 :         mask = utf8chkmsk[n];
     774       29443 :         c &= ~(0xFFC0 >> n);  /* remove non-x bits */
     775       88249 :         while (--n >= 0) {
     776       58807 :                 c <<= 6;
     777       58807 :                 lc->yycur++;
     778       58807 :                 if (((m = *s++) & 0xC0) != 0x80) {
     779             :                         /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
     780             :                         /* this includes end-of-string (m == 0) */
     781           1 :                         lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
     782           1 :                         goto error;
     783             :                 }
     784       58806 :                 c |= m & 0x3F;
     785             :         }
     786       29442 :         if ((c & mask) == 0) {
     787             :                 /* incorrect UTF-8 sequence: not shortest possible */
     788           0 :                 lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
     789           0 :                 goto error;
     790             :         }
     791             : 
     792             :         return c;
     793             : 
     794           1 : error:
     795           1 :         if (b->pos + lc->yycur < b->len)    /* skip bogus char */
     796           0 :                 lc->yycur++;
     797             :         return EOF;
     798             : }
     799             : 
     800             : static int
     801    28612864 : scanner_token(struct scanner *lc, int token)
     802             : {
     803    28612864 :         lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
     804    28612864 :         lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
     805    28612864 :         lc->yyval = token;
     806    28612864 :         return lc->yyval;
     807             : }
     808             : 
     809             : static int
     810     2134549 : scanner_string(mvc *c, int quote, bool escapes)
     811             : {
     812     2134549 :         struct scanner *lc = &c->scanner;
     813     2134549 :         bstream *rs = lc->rs;
     814     2134549 :         int cur = quote;
     815     2134549 :         bool escape = false;
     816     2134549 :         const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
     817             : 
     818     2134549 :         lc->started = 1;
     819     2172477 :         while (cur != EOF) {
     820     2172462 :                 size_t pos = 0;
     821     2172462 :                 const size_t yycur = rs->pos + lc->yycur;
     822             : 
     823    35509646 :                 while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
     824    33337184 :                        (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
     825    66644914 :                        cur && (cur != quote || escape)) {
     826    31164723 :                         if (escapes && cur == '\\')
     827        6762 :                                 escape = !escape;
     828             :                         else
     829             :                                 escape = false;
     830             :                 }
     831     2172462 :                 if (pos == limit) {
     832           0 :                         (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
     833           0 :                         return LEX_ERROR;
     834             :                 }
     835             :                 /* BOM character not allowed as an identifier */
     836     2172462 :                 if (cur == EOF || (quote == '"' && cur == 0xFEFF))
     837           1 :                         return scanner_error(c, cur);
     838     2172461 :                 lc->yycur += pos;
     839             :                 /* check for quote escaped quote: Obscure SQL Rule */
     840     2172461 :                 if (cur == quote && rs->buf[yycur + pos] == quote) {
     841        8487 :                         lc->yycur++;
     842        8487 :                         continue;
     843             :                 }
     844     2163974 :                 assert(yycur + pos <= rs->len + 1);
     845     2163974 :                 if (cur == quote && !escape) {
     846     2134519 :                         return scanner_token(lc, STRING);
     847             :                 }
     848       29455 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     849             :                 /* long utf8, if correct isn't the quote */
     850       29455 :                 if (!cur) {
     851          30 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     852          14 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     853          14 :                                 return LEX_ERROR;
     854             :                         }
     855          16 :                         cur = scanner_read_more(lc, 1);
     856             :                 } else {
     857       29425 :                         cur = scanner_getc(lc);
     858             :                 }
     859             :         }
     860          15 :         (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
     861          15 :         return EOF;
     862             : }
     863             : 
     864             : /* scan a structure {blah} into a string. We only count the matching {}
     865             :  * unless escaped. We do not consider embeddings in string literals yet
     866             :  */
     867             : 
     868             : static int
     869         234 : scanner_body(mvc *c)
     870             : {
     871         234 :         struct scanner *lc = &c->scanner;
     872         234 :         bstream *rs = lc->rs;
     873         234 :         int cur = (int) 'x';
     874         234 :         int blk = 1;
     875         234 :         bool escape = false;
     876             : 
     877         234 :         lc->started = 1;
     878         234 :         assert(rs->buf[rs->pos + lc->yycur-1] == '{');
     879         290 :         while (cur != EOF) {
     880         290 :                 size_t pos = rs->pos + lc->yycur;
     881             : 
     882       32350 :                 while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
     883       32060 :                         if (cur != '\\')
     884             :                                 escape = false;
     885             :                         else
     886          12 :                                 escape = !escape;
     887       32060 :                         blk += cur =='{';
     888       32060 :                         blk -= cur =='}';
     889             :                 }
     890         290 :                 lc->yycur = pos - rs->pos;
     891         290 :                 assert(pos <= rs->len + 1);
     892         290 :                 if (blk == 0 && !escape){
     893         234 :                         lc->yycur--; /* go back to current (possibly invalid) char */
     894         234 :                         return scanner_token(lc, X_BODY);
     895             :                 }
     896          56 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     897          56 :                 if (!cur) {
     898          56 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     899           0 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     900           0 :                                 return LEX_ERROR;
     901             :                         }
     902          56 :                         cur = scanner_read_more(lc, 1);
     903             :                 } else {
     904           0 :                         cur = scanner_getc(lc);
     905             :                 }
     906             :         }
     907           0 :         (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
     908           0 :         return EOF;
     909             : }
     910             : 
     911             : static int
     912    13748732 : keyword_or_ident(mvc * c, int cur)
     913             : {
     914    13748732 :         struct scanner *lc = &c->scanner;
     915    13748732 :         keyword *k = NULL;
     916    13748732 :         size_t s;
     917             : 
     918    13748732 :         lc->started = 1;
     919    13748732 :         utf8_putchar(lc, cur);
     920    13748718 :         s = lc->yycur;
     921    13748718 :         lc->yyval = IDENT;
     922    82229891 :         while ((cur = scanner_getc(lc)) != EOF) {
     923    82229783 :                 if (!iswalnum(cur) && cur != '_') {
     924    13748610 :                         utf8_putchar(lc, cur);
     925    13748642 :                         (void)scanner_token(lc, IDENT);
     926    13748642 :                         if ((k = find_keyword_bs(lc,s)))
     927     8431231 :                                 lc->yyval = k->token;
     928    13748909 :                         return lc->yyval;
     929             :                 }
     930             :         }
     931             :         if (cur < 0)
     932             :                 return cur;
     933             :         (void)scanner_token(lc, IDENT);
     934             :         if ((k = find_keyword_bs(lc,s)))
     935             :                 lc->yyval = k->token;
     936             :         return lc->yyval;
     937             : }
     938             : 
     939             : static int
     940    14354737 : skip_white_space(struct scanner * lc)
     941             : {
     942    17963539 :         int cur;
     943             : 
     944    17963539 :         do {
     945    17963539 :                 lc->yysval = lc->yycur;
     946    17963539 :         } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
     947    14353715 :         return cur;
     948             : }
     949             : 
     950             : static int
     951       70455 : skip_c_comment(struct scanner * lc)
     952             : {
     953       70455 :         int cur;
     954       70455 :         int prev = 0;
     955       70455 :         int started = lc->started;
     956       70455 :         int depth = 1;
     957             : 
     958       70455 :         lc->started = 1;
     959     1420738 :         while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
     960     1350283 :                 if (prev == '*' && cur == '/')
     961       70455 :                         depth--;
     962     1279828 :                 else if (prev == '/' && cur == '*') {
     963             :                         /* block comments can nest */
     964           0 :                         cur = 0; /* prevent slash-star-slash from matching */
     965           0 :                         depth++;
     966             :                 }
     967             :                 prev = cur;
     968             :         }
     969       70455 :         lc->yysval = lc->yycur;
     970       70455 :         lc->started = started;
     971             :         /* a comment is equivalent to a newline */
     972       70455 :         return cur == EOF ? cur : '\n';
     973             : }
     974             : 
     975             : static int
     976        3324 : skip_sql_comment(struct scanner * lc)
     977             : {
     978        3324 :         int cur;
     979        3324 :         int started = lc->started;
     980             : 
     981        3324 :         lc->started = 1;
     982      835132 :         while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
     983             :                 ;
     984        3324 :         lc->yysval = lc->yycur;
     985        3324 :         lc->started = started;
     986             :         /* a comment is equivalent to a newline */
     987        3324 :         return cur;
     988             : }
     989             : 
     990             : static int tokenize(mvc * lc, int cur);
     991             : 
     992     5776632 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
     993          13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
     994          10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
     995        3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
     996             : 
     997     1926537 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
     998     1926537 :         struct scanner *lc = &c->scanner;
     999     1926537 :         bool (*is_valid_n_ary_digit)(int);
    1000             : 
    1001     1926537 :         if (pcur == '_' && !initial_underscore_allowed)  /* ERROR: initial underscore not allowed */  {
    1002           0 :                 *token = 0;
    1003           0 :                 return '_';
    1004             :         }
    1005             : 
    1006     1926537 :         switch (type) {
    1007             :         case BINARYNUM:
    1008             :                 is_valid_n_ary_digit = &is_valid_binary_digit;
    1009             :                 break;
    1010           3 :         case OCTALNUM:
    1011           3 :                 is_valid_n_ary_digit = &is_valid_octal_digit;
    1012           3 :                 break;
    1013         280 :         case HEXADECIMALNUM:
    1014         280 :                 is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
    1015         280 :                 break;
    1016     1926252 :         default:
    1017     1926252 :                 is_valid_n_ary_digit = &is_valid_decimal_digit;
    1018     1926252 :                 break;
    1019             :         }
    1020             : 
    1021     1926537 :         if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
    1022          18 :                 *token = 0;
    1023          18 :                 return pcur;
    1024             :         }
    1025             : 
    1026     1926672 :         int cur = scanner_getc(lc);
    1027     1927196 :         *token = type;
    1028     3864059 :         while (cur != EOF) {
    1029     3863999 :                 if (cur == '_') {
    1030          25 :                         if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
    1031           2 :                                 *token = 0;
    1032           2 :                                 return '_';
    1033             :                         }
    1034             :                 }
    1035     3863974 :                 else if (!is_valid_n_ary_digit(cur))
    1036             :                         break;
    1037     1937484 :                 pcur = cur;
    1038     1937484 :                 cur = scanner_getc(lc);
    1039             :         }
    1040             : 
    1041     1926495 :         if (pcur == '_')  {
    1042           3 :                 *token = 0;
    1043           3 :                 if (iswalnum(cur))       /* ERROR: not a valid digit */
    1044             :                         return cur;
    1045             :                 else                            /* ERROR: number ends with underscore */
    1046             :                         return '_';
    1047             :         }
    1048             : 
    1049             :         return cur;
    1050             : }
    1051             : 
    1052             : static int
    1053     1912979 : number(mvc * c, int cur)
    1054             : {
    1055     1912979 :         struct scanner *lc = &c->scanner;
    1056     1912979 :         int token = sqlINT;
    1057             : 
    1058             :         /* a number has one of these forms (expressed in regular expressions):
    1059             :          * 0x[0-9A-Fa-f]+                   -- (hexadecimal) INTEGER
    1060             :          * \.[0-9]+                         -- DECIMAL
    1061             :          * [0-9]+\.[0-9]*                   -- DECIMAL
    1062             :          * [0-9]+@0                         -- OID
    1063             :          * [0-9]*\.[0-9]+[eE][-+]?[0-9]+    -- REAL
    1064             :          * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
    1065             :          * [0-9]+                           -- (decimal) INTEGER
    1066             :          */
    1067     1912979 :         lc->started = 1;
    1068     1912979 :         if (cur == '0') {
    1069      317825 :                 switch ((cur = scanner_getc(lc))) {
    1070           2 :                 case 'b':
    1071           2 :                         cur = scanner_getc(lc);
    1072           2 :                         if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
    1073             :                         break;
    1074           3 :                 case 'o':
    1075           3 :                         cur = scanner_getc(lc);
    1076           3 :                         if ((cur = check_validity_number(c,  cur, true, &token, OCTALNUM)) == EOF) return cur;
    1077             :                         break;
    1078         280 :                 case 'x':
    1079         280 :                         cur = scanner_getc(lc);
    1080         280 :                         if ((cur = check_validity_number(c,  cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
    1081             :                         break;
    1082      317542 :                 default:
    1083      317542 :                         utf8_putchar(lc, cur);
    1084      317542 :                         cur = '0';
    1085             :                 }
    1086             :         }
    1087     1912980 :         if (token == sqlINT) {
    1088     1913020 :                 if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
    1089     1912893 :                 if (cur == '@') {
    1090           0 :                         if (token == sqlINT) {
    1091           0 :                                 cur = scanner_getc(lc);
    1092           0 :                                 if (cur == EOF)
    1093             :                                         return cur;
    1094           0 :                                 if (cur == '0') {
    1095           0 :                                         cur = scanner_getc(lc);
    1096           0 :                                         if (cur == EOF)
    1097             :                                                 return cur;
    1098           0 :                                         token = OIDNUM;
    1099             :                                 } else {
    1100             :                                         /* number + '@' not followed by 0: show '@' as erroneous */
    1101           0 :                                         utf8_putchar(lc, cur);
    1102           0 :                                         cur = '@';
    1103           0 :                                         token = 0;
    1104             :                                 }
    1105             :                         }
    1106             :                 } else {
    1107     1912893 :                         if (cur == '.') {
    1108       11120 :                                 cur = scanner_getc(lc);
    1109       11120 :                                 if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
    1110       11113 :                                 if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
    1111             :                         }
    1112     1912893 :                         if (token != 0)
    1113     1912635 :                         if (cur == 'e' || cur == 'E') {
    1114        2226 :                                 cur = scanner_getc(lc);
    1115        2226 :                                 if (cur == '+' || cur == '-')
    1116        2111 :                                         cur = scanner_getc(lc);
    1117        2226 :                                 if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
    1118             :                         }
    1119             :                 }
    1120             :         }
    1121             : 
    1122     1910627 :         assert(cur != EOF);
    1123             : 
    1124     1912853 :         if (iswalnum(cur)) /* ERROR: not a valid digit */
    1125           6 :                 token = 0;
    1126             : 
    1127     1912853 :         utf8_putchar(lc, cur);
    1128             : 
    1129     1912871 :         if (token) {
    1130     1912861 :                 return scanner_token(lc, token);
    1131             :         } else {
    1132          10 :                 (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
    1133          10 :                 return LEX_ERROR;
    1134             :         }
    1135             : }
    1136             : 
    1137             : static
    1138    13018014 : int scanner_symbol(mvc * c, int cur)
    1139             : {
    1140    13018014 :         struct scanner *lc = &c->scanner;
    1141    13018014 :         int next = 0;
    1142    13018014 :         int started = lc->started;
    1143             : 
    1144    13018014 :         switch (cur) {
    1145       73105 :         case '/':
    1146       73105 :                 lc->started = 1;
    1147       73105 :                 next = scanner_getc(lc);
    1148       73105 :                 if (next < 0)
    1149             :                         return EOF;
    1150       73105 :                 if (next == '*') {
    1151       70455 :                         lc->started = started;
    1152       70455 :                         cur = skip_c_comment(lc);
    1153       70455 :                         if (cur < 0)
    1154             :                                 return EOF;
    1155       70455 :                         return tokenize(c, cur);
    1156             :                 } else {
    1157        2650 :                         utf8_putchar(lc, next);
    1158        2650 :                         return scanner_token(lc, cur);
    1159             :                 }
    1160           0 :         case '0':
    1161             :         case '1':
    1162             :         case '2':
    1163             :         case '3':
    1164             :         case '4':
    1165             :         case '5':
    1166             :         case '6':
    1167             :         case '7':
    1168             :         case '8':
    1169             :         case '9':
    1170           0 :                 return number(c, cur);
    1171           8 :         case '#':
    1172           8 :                 if ((cur = skip_sql_comment(lc)) == EOF)
    1173             :                         return cur;
    1174           8 :                 return tokenize(c, cur);
    1175      814454 :         case '\'':
    1176      814454 :                 if (lc->raw_string_mode || lc->next_string_is_raw)
    1177          50 :                         return scanner_string(c, cur, false);
    1178      814404 :                 return scanner_string(c, cur, true);
    1179     1312913 :         case '"':
    1180     1312913 :                 return scanner_string(c, cur, false);
    1181         500 :         case '{':
    1182             :                 // if previous tokens like LANGUAGE IDENT
    1183             :                 // TODO checking on IDENT only may not be enough
    1184         500 :                 if (lc->yylast == IDENT)
    1185         234 :                         return scanner_body(c);
    1186         266 :                 lc->started = 1;
    1187         266 :                 return scanner_token(lc, cur);
    1188         266 :         case '}':
    1189         266 :                 lc->started = 1;
    1190         266 :                 return scanner_token(lc, cur);
    1191       30325 :         case '-':
    1192       30325 :                 lc->started = 1;
    1193       30325 :                 next = scanner_getc(lc);
    1194       30325 :                 if (next < 0)
    1195             :                         return EOF;
    1196       30324 :                 if (next == '-') {
    1197        3316 :                         lc->started = started;
    1198        3316 :                         if ((cur = skip_sql_comment(lc)) == EOF)
    1199             :                                 return cur;
    1200        3316 :                         return tokenize(c, cur);
    1201             :                 }
    1202       27008 :                 lc->started = 1;
    1203       27008 :                 utf8_putchar(lc, next);
    1204       27008 :                 return scanner_token(lc, cur);
    1205          12 :         case '~': /* binary not */
    1206          12 :                 lc->started = 1;
    1207          12 :                 next = scanner_getc(lc);
    1208          12 :                 if (next < 0)
    1209             :                         return EOF;
    1210          12 :                 if (next == '=')
    1211           5 :                         return scanner_token(lc, GEOM_MBR_EQUAL);
    1212           7 :                 utf8_putchar(lc, next);
    1213           7 :                 return scanner_token(lc, cur);
    1214     7275887 :         case '^': /* binary xor */
    1215             :         case '*':
    1216             :         case '?':
    1217             :         case ':':
    1218             :         case '%':
    1219             :         case '+':
    1220             :         case '(':
    1221             :         case ')':
    1222             :         case ',':
    1223             :         case '=':
    1224             :         case '[':
    1225             :         case ']':
    1226     7275887 :                 lc->started = 1;
    1227     7275887 :                 return scanner_token(lc, cur);
    1228        6325 :         case '&':
    1229        6325 :                 lc->started = 1;
    1230        6325 :                 cur = scanner_getc(lc);
    1231        6325 :                 if (cur < 0)
    1232             :                         return EOF;
    1233        6325 :                 if (cur < 0)
    1234             :                         return EOF;
    1235        6325 :                 if(cur == '<') {
    1236           3 :                         next = scanner_getc(lc);
    1237           3 :                         if (next < 0)
    1238             :                                 return EOF;
    1239           3 :                         if(next == '|') {
    1240           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
    1241             :                         } else {
    1242           3 :                                 utf8_putchar(lc, next); //put the char back
    1243           3 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
    1244             :                         }
    1245        6322 :                 } else if(cur == '>')
    1246           3 :                         return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
    1247        6319 :                 else if(cur == '&')
    1248           3 :                         return scanner_token(lc, GEOM_OVERLAP);
    1249             :                 else {/* binary and */
    1250        6316 :                         utf8_putchar(lc, cur); //put the char back
    1251        6316 :                         return scanner_token(lc, '&');
    1252             :                 }
    1253          19 :         case '@':
    1254          19 :                 lc->started = 1;
    1255          19 :                 return scanner_token(lc, AT);
    1256     1000649 :         case ';':
    1257     1000649 :                 lc->started = 0;
    1258     1000649 :                 return scanner_token(lc, SCOLON);
    1259          27 :         case '!':
    1260          27 :                 lc->started = 1;
    1261          27 :                 cur = scanner_getc(lc);
    1262          27 :                 if (cur < 0)
    1263             :                         return EOF;
    1264          27 :                 else if (cur == '=') {
    1265          21 :                         lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
    1266          21 :                         lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
    1267          21 :                         return scanner_token( lc, COMPARISON);
    1268             :                 } else {
    1269           6 :                         utf8_putchar(lc, cur); //put the char back
    1270             :                 }
    1271           6 :                 return scanner_token(lc, '!');
    1272       52291 :         case '<':
    1273       52291 :                 lc->started = 1;
    1274       52291 :                 cur = scanner_getc(lc);
    1275       52291 :                 if (cur < 0)
    1276             :                         return EOF;
    1277       52291 :                 if (cur == '=') {
    1278        3128 :                         return scanner_token( lc, COMPARISON);
    1279       49163 :                 } else if (cur == '>') {
    1280       35507 :                         return scanner_token( lc, COMPARISON);
    1281       13656 :                 } else if (cur == '<') {
    1282          44 :                         next = scanner_getc(lc);
    1283          44 :                         if (next < 0)
    1284             :                                 return EOF;
    1285          44 :                         if (next == '=') {
    1286           4 :                                 return scanner_token( lc, LEFT_SHIFT_ASSIGN);
    1287          40 :                         } else if (next == '|') {
    1288           1 :                                 return scanner_token(lc, GEOM_BELOW);
    1289             :                         } else {
    1290          39 :                                 utf8_putchar(lc, next); //put the char back
    1291          39 :                                 return scanner_token( lc, LEFT_SHIFT);
    1292             :                         }
    1293       13612 :                 } else if(cur == '-') {
    1294          19 :                         next = scanner_getc(lc);
    1295          19 :                         if (next < 0)
    1296             :                                 return EOF;
    1297          19 :                         if(next == '>') {
    1298           7 :                                 return scanner_token(lc, GEOM_DIST);
    1299             :                         } else {
    1300             :                                 //put the characters back and fall in the next possible case
    1301          12 :                                 utf8_putchar(lc, next);
    1302          12 :                                 utf8_putchar(lc, cur);
    1303          12 :                                 return scanner_token( lc, COMPARISON);
    1304             :                         }
    1305             :                 } else {
    1306       13593 :                         utf8_putchar(lc, cur);
    1307       13593 :                         return scanner_token( lc, COMPARISON);
    1308             :                 }
    1309       47489 :         case '>':
    1310       47489 :                 lc->started = 1;
    1311       47489 :                 cur = scanner_getc(lc);
    1312       47489 :                 if (cur < 0)
    1313             :                         return EOF;
    1314       47489 :                 if (cur == '>') {
    1315        2683 :                         cur = scanner_getc(lc);
    1316        2683 :                         if (cur < 0)
    1317             :                                 return EOF;
    1318        2683 :                         if (cur == '=')
    1319           3 :                                 return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
    1320        2680 :                         utf8_putchar(lc, cur);
    1321        2680 :                         return scanner_token( lc, RIGHT_SHIFT);
    1322       44806 :                 } else if (cur != '=') {
    1323       42548 :                         utf8_putchar(lc, cur);
    1324       42548 :                         return scanner_token( lc, COMPARISON);
    1325             :                 } else {
    1326        2258 :                         return scanner_token( lc, COMPARISON);
    1327             :                 }
    1328     2217126 :         case '.':
    1329     2217126 :                 lc->started = 1;
    1330     2217126 :                 cur = scanner_getc(lc);
    1331     2217126 :                 if (cur < 0)
    1332             :                         return EOF;
    1333     2217125 :                 if (!iswdigit(cur)) {
    1334     2217111 :                         utf8_putchar(lc, cur);
    1335     2217111 :                         return scanner_token( lc, '.');
    1336             :                 } else {
    1337          14 :                         utf8_putchar(lc, cur);
    1338          14 :                         cur = '.';
    1339          14 :                         return number(c, cur);
    1340             :                 }
    1341      186608 :         case '|': /* binary or or string concat */
    1342      186608 :                 lc->started = 1;
    1343      186608 :                 cur = scanner_getc(lc);
    1344      186608 :                 if (cur < 0)
    1345             :                         return EOF;
    1346      186608 :                 if (cur == '|') {
    1347      186583 :                         return scanner_token(lc, CONCATSTRING);
    1348          25 :                 } else if (cur == '&') {
    1349           0 :                         next = scanner_getc(lc);
    1350           0 :                         if (next < 0)
    1351             :                                 return EOF;
    1352           0 :                         if(next == '>') {
    1353           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
    1354             :                         } else {
    1355           0 :                                 utf8_putchar(lc, next); //put the char back
    1356           0 :                                 utf8_putchar(lc, cur); //put the char back
    1357           0 :                                 return scanner_token(lc, '|');
    1358             :                         }
    1359          25 :                 } else if (cur == '>') {
    1360           1 :                         next = scanner_getc(lc);
    1361           1 :                         if (next < 0)
    1362             :                                 return EOF;
    1363           1 :                         if(next == '>') {
    1364           1 :                                 return scanner_token(lc, GEOM_ABOVE);
    1365             :                         } else {
    1366           0 :                                 utf8_putchar(lc, next); //put the char back
    1367           0 :                                 utf8_putchar(lc, cur); //put the char back
    1368           0 :                                 return scanner_token(lc, '|');
    1369             :                         }
    1370             :                 } else {
    1371          24 :                         utf8_putchar(lc, cur);
    1372          24 :                         return scanner_token(lc, '|');
    1373             :                 }
    1374             :         }
    1375          10 :         (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
    1376          10 :         return LEX_ERROR;
    1377             : }
    1378             : 
    1379             : static int
    1380    28705103 : tokenize(mvc * c, int cur)
    1381             : {
    1382    28705103 :         struct scanner *lc = &c->scanner;
    1383    57369655 :         while (1) {
    1384    43037379 :                 if (cur == 0xFEFF) {
    1385             :                         /* on Linux at least, iswpunct returns TRUE
    1386             :                          * for U+FEFF, but we don't want that, we just
    1387             :                          * want to go to the scanner_error case
    1388             :                          * below */
    1389             :                         ;
    1390    43037995 :                 } else if (iswspace(cur)) {
    1391    14350336 :                         if ((cur = skip_white_space(lc)) == EOF)
    1392             :                                 return cur;
    1393    14332276 :                         continue;  /* try again */
    1394    28687659 :                 } else if (iswdigit(cur)) {
    1395     1913611 :                         return number(c, cur);
    1396    26774048 :                 } else if (iswalpha(cur) || cur == '_') {
    1397    13719137 :                         switch (cur) {
    1398      657864 :                         case 'e': /* string with escapes */
    1399             :                         case 'E':
    1400      657864 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1401      657864 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1402        3885 :                                         return scanner_string(c, scanner_getc(lc), true);
    1403             :                                 }
    1404             :                                 break;
    1405      419551 :                         case 'x': /* blob */
    1406             :                         case 'X':
    1407             :                         case 'r': /* raw string */
    1408             :                         case 'R':
    1409      419551 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1410      419551 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1411        3280 :                                         return scanner_string(c, scanner_getc(lc), false);
    1412             :                                 }
    1413             :                                 break;
    1414      159854 :                         case 'u': /* unicode string */
    1415             :                         case 'U':
    1416      159854 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1417      159871 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
    1418          17 :                                     scanner_read_more(lc, 2) != EOF &&
    1419          17 :                                     (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
    1420             :                                      lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
    1421          17 :                                         cur = scanner_getc(lc); /* '&' */
    1422          17 :                                         return scanner_string(c, scanner_getc(lc), false);
    1423             :                                 }
    1424             :                                 break;
    1425             :                         default:
    1426             :                                 break;
    1427             :                         }
    1428    13748775 :                         return keyword_or_ident(c, cur);
    1429    13018091 :                 } else if (iswpunct(cur)) {
    1430    13017443 :                         return scanner_symbol(c, cur);
    1431             :                 }
    1432          32 :                 if (cur == EOF) {
    1433           0 :                         if (lc->mode == LINE_1 || !lc->started )
    1434             :                                 return cur;
    1435           0 :                         return scanner_error(c, cur);
    1436             :                 }
    1437             :                 /* none of the above: error */
    1438          32 :                 return scanner_error(c, cur);
    1439             :         }
    1440             : }
    1441             : 
    1442             : /* SQL 'quoted' idents consist of a set of any character of
    1443             :  * the source language character set other than a 'quote'
    1444             :  *
    1445             :  * MonetDB has 3 restrictions:
    1446             :  *      1 we disallow '%' as the first character.
    1447             :  *      2 the length is limited to 1024 characters
    1448             :  *      3 the identifier 'TID%' is not allowed
    1449             :  */
    1450             : static bool
    1451     1312902 : valid_ident(const char *restrict s, char *restrict dst)
    1452             : {
    1453     1312902 :         int p = 0;
    1454             : 
    1455     1312902 :         if (*s == '%')
    1456             :                 return false;
    1457             : 
    1458     9708597 :         while (*s) {
    1459     8395695 :                 if ((dst[p++] = *s++) == '"' && *s == '"')
    1460          68 :                         s++;
    1461     8395695 :                 if (p >= 1024)
    1462             :                         return false;
    1463             :         }
    1464     1312902 :         dst[p] = '\0';
    1465     1312902 :         if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
    1466             :                 return false;
    1467             :         return true;
    1468             : }
    1469             : 
    1470             : static inline int
    1471    28794837 : sql_get_next_token(YYSTYPE *yylval, void *parm)
    1472             : {
    1473    28794837 :         mvc *c = (mvc*)parm;
    1474    28794837 :         struct scanner *lc = &c->scanner;
    1475    28794837 :         int token = 0, cur = 0;
    1476             : 
    1477    28794837 :         if (lc->rs->buf == NULL) /* malloc failure */
    1478             :                 return EOF;
    1479             : 
    1480    28794837 :         if (lc->yynext) {
    1481       62693 :                 int next = lc->yynext;
    1482             : 
    1483       62693 :                 lc->yynext = 0;
    1484       62693 :                 return(next);
    1485             :         }
    1486             : 
    1487    28732144 :         if (lc->yybak) {
    1488    27701876 :                 lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
    1489    27701876 :                 lc->yybak = 0;
    1490             :         }
    1491             : 
    1492    28732144 :         lc->yysval = lc->yycur;
    1493    28732144 :         lc->yylast = lc->yyval;
    1494    28732144 :         cur = scanner_getc(lc);
    1495    28742959 :         if (cur < 0)
    1496             :                 return EOF;
    1497    28632063 :         token = tokenize(c, cur);
    1498             : 
    1499    28619786 :         yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
    1500             : 
    1501    28619786 :         if (token == KW_TYPE)
    1502       49319 :                 token = aTYPE;
    1503             : 
    1504    28619786 :         if (token == IDENT || token == COMPARISON ||
    1505    23205042 :             token == RANK || token == aTYPE || token == MARGFUNC) {
    1506     5474588 :                 yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
    1507     5474586 :                 lc->next_string_is_raw = false;
    1508    23145198 :         } else if (token == STRING) {
    1509     2134519 :                 char quote = *yylval->sval;
    1510     2134519 :                 char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
    1511     2134519 :                 char *dst;
    1512             : 
    1513     2134519 :                 assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
    1514             : 
    1515     2134519 :                 lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
    1516     2134519 :                 switch (quote) {
    1517     1312902 :                 case '"':
    1518     1312902 :                         if (valid_ident(yylval->sval+1,str)) {
    1519             :                                 token = IDENT;
    1520             :                         } else {
    1521           0 :                                 sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
    1522           0 :                                 return LEX_ERROR;
    1523             :                         }
    1524             :                         break;
    1525        3884 :                 case 'e':
    1526             :                 case 'E':
    1527        3884 :                         assert(yylval->sval[1] == '\'');
    1528        3884 :                         if (GDKstrFromStr((unsigned char *) str,
    1529             :                                                           (unsigned char *) yylval->sval + 2,
    1530        3884 :                                                           lc->yycur-lc->yysval - 2, '\'') < 0) {
    1531           1 :                                 char *err = GDKerrbuf;
    1532           1 :                                 if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
    1533           1 :                                         err += strlen(GDKERROR);
    1534           0 :                                 else if (*err == '!')
    1535           0 :                                         err++;
    1536           1 :                                 sql_error(c, 1, SQLSTATE(42000) "%s", err);
    1537           1 :                                 return LEX_ERROR;
    1538             :                         }
    1539             :                         quote = '\'';
    1540             :                         break;
    1541          17 :                 case 'u':
    1542             :                 case 'U':
    1543          17 :                         assert(yylval->sval[1] == '&');
    1544          17 :                         assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
    1545          17 :                         strcpy(str, yylval->sval + 3);
    1546          17 :                         token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
    1547          17 :                         quote = yylval->sval[2];
    1548          17 :                         lc->next_string_is_raw = true;
    1549          17 :                         break;
    1550           1 :                 case 'x':
    1551             :                 case 'X':
    1552           1 :                         assert(yylval->sval[1] == '\'');
    1553           1 :                         dst = str;
    1554           5 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1555           4 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1556           0 :                                         src++;
    1557           1 :                         *dst = 0;
    1558           1 :                         quote = '\'';
    1559           1 :                         token = XSTRING;
    1560           1 :                         lc->next_string_is_raw = true;
    1561           1 :                         break;
    1562        3272 :                 case 'r':
    1563             :                 case 'R':
    1564        3272 :                         assert(yylval->sval[1] == '\'');
    1565        3272 :                         dst = str;
    1566      450117 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1567      446845 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1568        2744 :                                         src++;
    1569        3272 :                         quote = '\'';
    1570        3272 :                         *dst = 0;
    1571        3272 :                         break;
    1572      814443 :                 default:
    1573      814443 :                         if (lc->raw_string_mode || lc->next_string_is_raw) {
    1574          50 :                                 dst = str;
    1575         479 :                                 for (char *src = yylval->sval + 1; *src; dst++)
    1576         429 :                                         if ((*dst = *src++) == '\'' && *src == '\'')
    1577           3 :                                                 src++;
    1578          50 :                                 *dst = 0;
    1579             :                         } else {
    1580      814392 :                                 if (GDKstrFromStr((unsigned char *)str,
    1581      814393 :                                                                   (unsigned char *)yylval->sval + 1,
    1582      814393 :                                                                   lc->yycur - lc->yysval - 1,
    1583             :                                                                   '\'') < 0) {
    1584           1 :                                         sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
    1585           1 :                                         return LEX_ERROR;
    1586             :                                 }
    1587             :                         }
    1588             :                         break;
    1589             :                 }
    1590     2134516 :                 yylval->sval = str;
    1591             : 
    1592             :                 /* reset original */
    1593     2134516 :                 lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
    1594             :         } else {
    1595    21010679 :                 lc->next_string_is_raw = false;
    1596             :         }
    1597             : 
    1598             :         return(token);
    1599             : }
    1600             : 
    1601             : static int scanner( YYSTYPE *yylval, void *m, bool log);
    1602             : 
    1603             : static int
    1604    28665582 : scanner(YYSTYPE * yylval, void *parm, bool log)
    1605             : {
    1606    28665582 :         int token;
    1607    28665582 :         mvc *c = (mvc *) parm;
    1608    28665582 :         struct scanner *lc = &c->scanner;
    1609    28665582 :         size_t pos;
    1610             : 
    1611             :         /* store position for when view's query ends */
    1612    28665582 :         pos = lc->rs->pos + lc->yycur;
    1613             : 
    1614    28665582 :         token = sql_get_next_token(yylval, parm);
    1615             : 
    1616    28659073 :         if (token == NOT) {
    1617       77052 :                 int next = scanner(yylval, parm, false);
    1618             : 
    1619       77052 :                 if (next == NOT) {
    1620           2 :                         return scanner(yylval, parm, false);
    1621             :                 } else if (next == EXISTS) {
    1622             :                         token = NOT_EXISTS;
    1623             :                 } else if (next == BETWEEN) {
    1624             :                         token = NOT_BETWEEN;
    1625             :                 } else if (next == sqlIN) {
    1626             :                         token = NOT_IN;
    1627             :                 } else if (next == LIKE) {
    1628             :                         token = NOT_LIKE;
    1629             :                 } else if (next == ILIKE) {
    1630             :                         token = NOT_ILIKE;
    1631             :                 } else {
    1632       62693 :                         lc->yynext = next;
    1633             :                 }
    1634    28582021 :         } else if (token == SCOLON) {
    1635             :                 /* ignore semi-colon(s) following a semi-colon */
    1636     1000680 :                 if (lc->yylast == SCOLON) {
    1637      131729 :                         size_t prev = lc->yycur;
    1638      131730 :                         while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
    1639           1 :                                 prev = lc->yycur;
    1640             : 
    1641             :                         /* skip the skipped stuff also in the buffer */
    1642      131641 :                         lc->rs->pos += prev;
    1643      131641 :                         lc->yycur -= prev;
    1644             :                 }
    1645             :         }
    1646             : 
    1647    28658983 :         if (lc->log && log)
    1648           0 :                 mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
    1649             : 
    1650    28658983 :         lc->started += (token != EOF);
    1651    28658983 :         return token;
    1652             : }
    1653             : 
    1654             : /* also see sql_parser.y */
    1655             : extern int sqllex(YYSTYPE * yylval, void *parm);
    1656             : 
    1657             : int
    1658    28589896 : sqllex(YYSTYPE * yylval, void *parm)
    1659             : {
    1660    28589896 :         return scanner(yylval, parm, true);
    1661             : }

Generated by: LCOV version 1.14