LCOV - code coverage report
Current view: top level - sql/server - sql_scan.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1049 1112 94.3 %
Date: 2024-11-13 22:44:48 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include <wctype.h>
      15             : #include "sql_mem.h"
      16             : #include "sql_scan.h"
      17             : #include "sql_types.h"
      18             : #include "sql_symbol.h"
      19             : #include "sql_mvc.h"
      20             : #include "sql_parser.tab.h"
      21             : #include "sql_semantic.h"
      22             : #include "sql_parser.h"               /* for sql_error() */
      23             : 
      24             : #include "stream.h"
      25             : #include "mapi_prompt.h"
      26             : #include <unistd.h>
      27             : #include <string.h>
      28             : #include <ctype.h>
      29             : #include "sql_keyword.h"
      30             : 
      31             : /**
      32             :  * Removes all comments before the query. In query comments are kept.
      33             :  */
      34             : char *
      35      395611 : query_cleaned(allocator *sa, const char *query)
      36             : {
      37      395611 :         char *q, *r, *c = NULL;
      38      395611 :         int lines = 0;
      39      395611 :         int quote = 0;          /* inside quotes ('..', "..", {..}) */
      40      395611 :         bool bs = false;                /* seen a backslash in a quoted string */
      41      395611 :         bool incomment1 = false;        /* inside traditional C style comment */
      42      395611 :         bool incomment2 = false;        /* inside comment starting with --  */
      43      395611 :         bool inline_comment = false;
      44             : 
      45      395611 :         r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
      46      395608 :         if(!r)
      47             :                 return NULL;
      48             : 
      49    66561768 :         for (q = r; *query; query++) {
      50    66166160 :                 if (incomment1) {
      51       16116 :                         if (*query == '/' && query[-1] == '*') {
      52         233 :                                 incomment1 = false;
      53         233 :                                 if (c == r && lines > 0) {
      54         225 :                                         q = r; // reset to beginning
      55         225 :                                         lines = 0;
      56         225 :                                         continue;
      57             :                                 }
      58             :                         }
      59       15891 :                         if (*query == '\n') lines++;
      60       15891 :                         *q++ = *query;
      61    66150044 :                 } else if (incomment2) {
      62      824353 :                         if (*query == '\n') {
      63        2778 :                                 incomment2 = false;
      64        2778 :                                 inline_comment = false;
      65             :                                 /* add newline only if comment doesn't
      66             :                                  * occupy whole line */
      67        2778 :                                 if (q > r && q[-1] != '\n'){
      68         883 :                                         *q++ = '\n';
      69         883 :                                         lines++;
      70             :                                 }
      71      821575 :                         } else if (inline_comment){
      72       17378 :                                 *q++ = *query; // preserve in line query comments
      73             :                         }
      74    65325691 :                 } else if (quote) {
      75    22099462 :                         if (bs) {
      76             :                                 bs = false;
      77    22096221 :                         } else if (*query == '\\') {
      78             :                                 bs = true;
      79    22092980 :                         } else if (*query == quote) {
      80      662611 :                                 quote = 0;
      81             :                         }
      82    22099462 :                         *q++ = *query;
      83    43226229 :                 } else if (*query == '"' || *query == '\'') {
      84      662143 :                         quote = *query;
      85      662143 :                         *q++ = *query;
      86    42564086 :                 } else if (*query == '{') {
      87         513 :                         quote = '}';
      88         513 :                         *q++ = *query;
      89    42563573 :                 } else if (*query == '-' && query[1] == '-') {
      90        2778 :                         if (q > r && q[-1] != '\n') {
      91         883 :                                 inline_comment = true;
      92         883 :                                 *q++ = *query; // preserve in line query comments
      93             :                         }
      94             :                         incomment2 = true;
      95    42560795 :                 } else if (*query == '/' && query[1] == '*') {
      96         233 :                         incomment1 = true;
      97         233 :                         c = q;
      98         233 :                         *q++ = *query;
      99    42560562 :                 } else if (*query == '\n') {
     100             :                         /* collapse newlines */
     101      854876 :                         if (q > r && q[-1] != '\n') {
     102      812899 :                                 *q++ = '\n';
     103      812899 :                                 lines++;
     104             :                         }
     105    41705686 :                 } else if (*query == ' ' || *query == '\t') {
     106             :                         /* collapse white space */
     107     6872554 :                         if (q > r && q[-1] != ' ')
     108     5396450 :                                 *q++ = ' ';
     109             :                 } else {
     110    34833132 :                         *q++ = *query;
     111             :                 }
     112             :         }
     113      395608 :         *q = 0;
     114      395608 :         return r;
     115             : }
     116             : 
     117             : int
     118         315 : scanner_init_keywords(void)
     119             : {
     120         315 :         int failed = 0;
     121             : 
     122         315 :         failed += keywords_insert("false", BOOL_FALSE);
     123         315 :         failed += keywords_insert("true", BOOL_TRUE);
     124         315 :         failed += keywords_insert("bool", sqlBOOL);
     125             : 
     126         315 :         failed += keywords_insert("ALTER", ALTER);
     127         315 :         failed += keywords_insert("ADD", ADD);
     128         315 :         failed += keywords_insert("AND", AND);
     129             : 
     130         315 :         failed += keywords_insert("RANK", RANK);
     131         315 :         failed += keywords_insert("DENSE_RANK", RANK);
     132         315 :         failed += keywords_insert("PERCENT_RANK", RANK);
     133         315 :         failed += keywords_insert("CUME_DIST", RANK);
     134         315 :         failed += keywords_insert("ROW_NUMBER", RANK);
     135         315 :         failed += keywords_insert("NTILE", RANK);
     136         315 :         failed += keywords_insert("LAG", RANK);
     137         315 :         failed += keywords_insert("LEAD", RANK);
     138         315 :         failed += keywords_insert("FETCH", FETCH);
     139         315 :         failed += keywords_insert("FIRST_VALUE", RANK);
     140         315 :         failed += keywords_insert("LAST_VALUE", RANK);
     141         315 :         failed += keywords_insert("NTH_VALUE", RANK);
     142             : 
     143         315 :         failed += keywords_insert("BEST", BEST);
     144         315 :         failed += keywords_insert("EFFORT", EFFORT);
     145             : 
     146         315 :         failed += keywords_insert("AS", AS);
     147         315 :         failed += keywords_insert("ASC", ASC);
     148         315 :         failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
     149         315 :         failed += keywords_insert("BETWEEN", BETWEEN);
     150         315 :         failed += keywords_insert("SYMMETRIC", SYMMETRIC);
     151         315 :         failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
     152         315 :         failed += keywords_insert("BY", BY);
     153         315 :         failed += keywords_insert("CAST", CAST);
     154         315 :         failed += keywords_insert("CONVERT", CONVERT);
     155         315 :         failed += keywords_insert("CHARACTER", CHARACTER);
     156         315 :         failed += keywords_insert("CHAR", CHARACTER);
     157         315 :         failed += keywords_insert("VARYING", VARYING);
     158         315 :         failed += keywords_insert("VARCHAR", VARCHAR);
     159         315 :         failed += keywords_insert("BINARY", BINARY);
     160         315 :         failed += keywords_insert("LARGE", LARGE);
     161         315 :         failed += keywords_insert("OBJECT", OBJECT);
     162         315 :         failed += keywords_insert("CLOB", CLOB);
     163         315 :         failed += keywords_insert("BLOB", sqlBLOB);
     164         315 :         failed += keywords_insert("TEXT", sqlTEXT);
     165         315 :         failed += keywords_insert("TINYTEXT", sqlTEXT);
     166         315 :         failed += keywords_insert("STRING", CLOB);    /* ? */
     167         315 :         failed += keywords_insert("CHECK", CHECK);
     168         315 :         failed += keywords_insert("CLIENT", CLIENT);
     169         315 :         failed += keywords_insert("SERVER", SERVER);
     170         315 :         failed += keywords_insert("COMMENT", COMMENT);
     171         315 :         failed += keywords_insert("CONSTRAINT", CONSTRAINT);
     172         315 :         failed += keywords_insert("CREATE", CREATE);
     173         315 :         failed += keywords_insert("CROSS", CROSS);
     174         315 :         failed += keywords_insert("COPY", COPY);
     175         315 :         failed += keywords_insert("RECORDS", RECORDS);
     176         315 :         failed += keywords_insert("DELIMITERS", DELIMITERS);
     177         315 :         failed += keywords_insert("STDIN", STDIN);
     178         315 :         failed += keywords_insert("STDOUT", STDOUT);
     179             : 
     180         315 :         failed += keywords_insert("TINYINT", TINYINT);
     181         315 :         failed += keywords_insert("SMALLINT", SMALLINT);
     182         315 :         failed += keywords_insert("INTEGER", sqlINTEGER);
     183         315 :         failed += keywords_insert("INT", sqlINTEGER);
     184         315 :         failed += keywords_insert("MEDIUMINT", sqlINTEGER);
     185         315 :         failed += keywords_insert("BIGINT", BIGINT);
     186             : #ifdef HAVE_HGE
     187         315 :         failed += keywords_insert("HUGEINT", HUGEINT);
     188             : #endif
     189         315 :         failed += keywords_insert("DEC", sqlDECIMAL);
     190         315 :         failed += keywords_insert("DECIMAL", sqlDECIMAL);
     191         315 :         failed += keywords_insert("NUMERIC", sqlDECIMAL);
     192         315 :         failed += keywords_insert("DECLARE", DECLARE);
     193         315 :         failed += keywords_insert("DEFAULT", DEFAULT);
     194         315 :         failed += keywords_insert("DESC", DESC);
     195         315 :         failed += keywords_insert("DISTINCT", DISTINCT);
     196         315 :         failed += keywords_insert("DOUBLE", sqlDOUBLE);
     197         315 :         failed += keywords_insert("REAL", sqlREAL);
     198         315 :         failed += keywords_insert("DROP", DROP);
     199         315 :         failed += keywords_insert("ESCAPE", ESCAPE);
     200         315 :         failed += keywords_insert("EXISTS", EXISTS);
     201         315 :         failed += keywords_insert("UESCAPE", UESCAPE);
     202         315 :         failed += keywords_insert("EXTRACT", EXTRACT);
     203         315 :         failed += keywords_insert("FLOAT", sqlFLOAT);
     204         315 :         failed += keywords_insert("FOR", FOR);
     205         315 :         failed += keywords_insert("FOREIGN", FOREIGN);
     206         315 :         failed += keywords_insert("FROM", FROM);
     207         315 :         failed += keywords_insert("FWF", FWF);
     208             : 
     209         315 :         failed += keywords_insert("BIG", BIG);
     210         315 :         failed += keywords_insert("LITTLE", LITTLE);
     211         315 :         failed += keywords_insert("NATIVE", NATIVE);
     212         315 :         failed += keywords_insert("ENDIAN", ENDIAN);
     213             : 
     214         315 :         failed += keywords_insert("REFERENCES", REFERENCES);
     215             : 
     216         315 :         failed += keywords_insert("MATCH", MATCH);
     217         315 :         failed += keywords_insert("FULL", FULL);
     218         315 :         failed += keywords_insert("PARTIAL", PARTIAL);
     219         315 :         failed += keywords_insert("SIMPLE", SIMPLE);
     220             : 
     221         315 :         failed += keywords_insert("INSERT", INSERT);
     222         315 :         failed += keywords_insert("UPDATE", UPDATE);
     223         315 :         failed += keywords_insert("DELETE", sqlDELETE);
     224         315 :         failed += keywords_insert("TRUNCATE", TRUNCATE);
     225         315 :         failed += keywords_insert("MATCHED", MATCHED);
     226             : 
     227         315 :         failed += keywords_insert("ACTION", ACTION);
     228         315 :         failed += keywords_insert("CASCADE", CASCADE);
     229         315 :         failed += keywords_insert("RESTRICT", RESTRICT);
     230         315 :         failed += keywords_insert("FIRST", FIRST);
     231         315 :         failed += keywords_insert("GLOBAL", GLOBAL);
     232         315 :         failed += keywords_insert("GROUP", sqlGROUP);
     233         315 :         failed += keywords_insert("GROUPING", GROUPING);
     234         315 :         failed += keywords_insert("ROLLUP", ROLLUP);
     235         315 :         failed += keywords_insert("CUBE", CUBE);
     236         315 :         failed += keywords_insert("HAVING", HAVING);
     237         315 :         failed += keywords_insert("ILIKE", ILIKE);
     238         315 :         failed += keywords_insert("IMPRINTS", IMPRINTS);
     239         315 :         failed += keywords_insert("IN", sqlIN);
     240         315 :         failed += keywords_insert("INNER", INNER);
     241         315 :         failed += keywords_insert("INTO", INTO);
     242         315 :         failed += keywords_insert("IS", IS);
     243         315 :         failed += keywords_insert("JOIN", JOIN);
     244         315 :         failed += keywords_insert("KEY", KEY);
     245         315 :         failed += keywords_insert("LATERAL", LATERAL);
     246         315 :         failed += keywords_insert("LEFT", LEFT);
     247         315 :         failed += keywords_insert("LIKE", LIKE);
     248         315 :         failed += keywords_insert("LIMIT", LIMIT);
     249         315 :         failed += keywords_insert("SAMPLE", SAMPLE);
     250         315 :         failed += keywords_insert("SEED", SEED);
     251         315 :         failed += keywords_insert("LAST", LAST);
     252         315 :         failed += keywords_insert("LOCAL", LOCAL);
     253         315 :         failed += keywords_insert("NATURAL", NATURAL);
     254         315 :         failed += keywords_insert("NOT", NOT);
     255         315 :         failed += keywords_insert("NULL", sqlNULL);
     256         315 :         failed += keywords_insert("NULLS", NULLS);
     257         315 :         failed += keywords_insert("OFFSET", OFFSET);
     258         315 :         failed += keywords_insert("ON", ON);
     259         315 :         failed += keywords_insert("OPTIONS", OPTIONS);
     260         315 :         failed += keywords_insert("OPTION", OPTION);
     261         315 :         failed += keywords_insert("OR", OR);
     262         315 :         failed += keywords_insert("ORDER", ORDER);
     263         315 :         failed += keywords_insert("ORDERED", ORDERED);
     264         315 :         failed += keywords_insert("OUTER", OUTER);
     265         315 :         failed += keywords_insert("OVER", OVER);
     266         315 :         failed += keywords_insert("PARTITION", PARTITION);
     267         315 :         failed += keywords_insert("PATH", PATH);
     268         315 :         failed += keywords_insert("PRECISION", PRECISION);
     269         315 :         failed += keywords_insert("PRIMARY", PRIMARY);
     270             : 
     271         315 :         failed += keywords_insert("USER", USER);
     272         315 :         failed += keywords_insert("RENAME", RENAME);
     273         315 :         failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
     274         315 :         failed += keywords_insert("ENCRYPTED", ENCRYPTED);
     275         315 :         failed += keywords_insert("PASSWORD", PASSWORD);
     276         315 :         failed += keywords_insert("GRANT", GRANT);
     277         315 :         failed += keywords_insert("REVOKE", REVOKE);
     278         315 :         failed += keywords_insert("ROLE", ROLE);
     279         315 :         failed += keywords_insert("ADMIN", ADMIN);
     280         315 :         failed += keywords_insert("PRIVILEGES", PRIVILEGES);
     281         315 :         failed += keywords_insert("PUBLIC", PUBLIC);
     282         315 :         failed += keywords_insert("CURRENT_USER", CURRENT_USER);
     283         315 :         failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
     284         315 :         failed += keywords_insert("SESSION_USER", SESSION_USER);
     285         315 :         failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
     286         315 :         failed += keywords_insert("SESSION", sqlSESSION);
     287         315 :         failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
     288         315 :         failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
     289         315 :         failed += keywords_insert("OPTIMIZER", OPTIMIZER);
     290             : 
     291         315 :         failed += keywords_insert("RIGHT", RIGHT);
     292         315 :         failed += keywords_insert("SCHEMA", SCHEMA);
     293         315 :         failed += keywords_insert("SELECT", SELECT);
     294         315 :         failed += keywords_insert("SET", SET);
     295         315 :         failed += keywords_insert("SETS", SETS);
     296         315 :         failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
     297             : 
     298         315 :         failed += keywords_insert("ALL", ALL);
     299         315 :         failed += keywords_insert("ANY", ANY);
     300         315 :         failed += keywords_insert("SOME", SOME);
     301         315 :         failed += keywords_insert("EVERY", ANY);
     302             :         /*
     303             :            failed += keywords_insert("SQLCODE", SQLCODE );
     304             :          */
     305         315 :         failed += keywords_insert("COLUMN", COLUMN);
     306         315 :         failed += keywords_insert("TABLE", TABLE);
     307         315 :         failed += keywords_insert("TEMPORARY", TEMPORARY);
     308         315 :         failed += keywords_insert("TEMP", TEMP);
     309         315 :         failed += keywords_insert("REMOTE", REMOTE);
     310         315 :         failed += keywords_insert("MERGE", MERGE);
     311         315 :         failed += keywords_insert("REPLICA", REPLICA);
     312         315 :         failed += keywords_insert("UNLOGGED", UNLOGGED);
     313         315 :         failed += keywords_insert("TO", TO);
     314         315 :         failed += keywords_insert("UNION", UNION);
     315         315 :         failed += keywords_insert("EXCEPT", EXCEPT);
     316         315 :         failed += keywords_insert("INTERSECT", INTERSECT);
     317         315 :         failed += keywords_insert("CORRESPONDING", CORRESPONDING);
     318         315 :         failed += keywords_insert("UNIQUE", UNIQUE);
     319         315 :         failed += keywords_insert("USING", USING);
     320         315 :         failed += keywords_insert("VALUES", VALUES);
     321         315 :         failed += keywords_insert("VIEW", VIEW);
     322         315 :         failed += keywords_insert("WHERE", WHERE);
     323         315 :         failed += keywords_insert("WITH", WITH);
     324         315 :         failed += keywords_insert("WITHIN", WITHIN);
     325         315 :         failed += keywords_insert("WITHOUT", WITHOUT);
     326         315 :         failed += keywords_insert("DATA", DATA);
     327             : 
     328         315 :         failed += keywords_insert("DATE", sqlDATE);
     329         315 :         failed += keywords_insert("TIME", TIME);
     330         315 :         failed += keywords_insert("TIMESTAMP", TIMESTAMP);
     331         315 :         failed += keywords_insert("INTERVAL", INTERVAL);
     332         315 :         failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
     333         315 :         failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
     334         315 :         failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
     335         315 :         failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
     336         315 :         failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
     337         315 :         failed += keywords_insert("LOCALTIME", LOCALTIME);
     338         315 :         failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
     339         315 :         failed += keywords_insert("ZONE", ZONE);
     340             : 
     341         315 :         failed += keywords_insert("CENTURY", CENTURY);
     342         315 :         failed += keywords_insert("DECADE", DECADE);
     343         315 :         failed += keywords_insert("YEAR", YEAR);
     344         315 :         failed += keywords_insert("QUARTER", QUARTER);
     345         315 :         failed += keywords_insert("MONTH", MONTH);
     346         315 :         failed += keywords_insert("WEEK", WEEK);
     347         315 :         failed += keywords_insert("DOW", DOW);
     348         315 :         failed += keywords_insert("DOY", DOY);
     349         315 :         failed += keywords_insert("DAY", DAY);
     350         315 :         failed += keywords_insert("HOUR", HOUR);
     351         315 :         failed += keywords_insert("MINUTE", MINUTE);
     352         315 :         failed += keywords_insert("SECOND", SECOND);
     353         315 :         failed += keywords_insert("EPOCH", EPOCH);
     354             : 
     355         315 :         failed += keywords_insert("POSITION", POSITION);
     356         315 :         failed += keywords_insert("SUBSTRING", SUBSTRING);
     357         315 :         failed += keywords_insert("SPLIT_PART", SPLIT_PART);
     358         315 :         failed += keywords_insert("TRIM", TRIM);
     359         315 :         failed += keywords_insert("LEADING", LEADING);
     360         315 :         failed += keywords_insert("TRAILING", TRAILING);
     361         315 :         failed += keywords_insert("BOTH", BOTH);
     362             : 
     363         315 :         failed += keywords_insert("CASE", CASE);
     364         315 :         failed += keywords_insert("WHEN", WHEN);
     365         315 :         failed += keywords_insert("THEN", THEN);
     366         315 :         failed += keywords_insert("ELSE", ELSE);
     367         315 :         failed += keywords_insert("END", END);
     368         315 :         failed += keywords_insert("NULLIF", NULLIF);
     369         315 :         failed += keywords_insert("COALESCE", COALESCE);
     370         315 :         failed += keywords_insert("ELSEIF", ELSEIF);
     371         315 :         failed += keywords_insert("IF", IF);
     372         315 :         failed += keywords_insert("WHILE", WHILE);
     373         315 :         failed += keywords_insert("DO", DO);
     374             : 
     375         315 :         failed += keywords_insert("COMMIT", COMMIT);
     376         315 :         failed += keywords_insert("ROLLBACK", ROLLBACK);
     377         315 :         failed += keywords_insert("SAVEPOINT", SAVEPOINT);
     378         315 :         failed += keywords_insert("RELEASE", RELEASE);
     379         315 :         failed += keywords_insert("WORK", WORK);
     380         315 :         failed += keywords_insert("CHAIN", CHAIN);
     381         315 :         failed += keywords_insert("PRESERVE", PRESERVE);
     382         315 :         failed += keywords_insert("ROWS", ROWS);
     383         315 :         failed += keywords_insert("NO", NO);
     384         315 :         failed += keywords_insert("START", START);
     385         315 :         failed += keywords_insert("TRANSACTION", TRANSACTION);
     386         315 :         failed += keywords_insert("READ", READ);
     387         315 :         failed += keywords_insert("WRITE", WRITE);
     388         315 :         failed += keywords_insert("ONLY", ONLY);
     389         315 :         failed += keywords_insert("ISOLATION", ISOLATION);
     390         315 :         failed += keywords_insert("LEVEL", LEVEL);
     391         315 :         failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
     392         315 :         failed += keywords_insert("COMMITTED", COMMITTED);
     393         315 :         failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
     394         315 :         failed += keywords_insert("SNAPSHOT", SNAPSHOT);
     395         315 :         failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
     396         315 :         failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
     397         315 :         failed += keywords_insert("SIZE", sqlSIZE);
     398         315 :         failed += keywords_insert("STORAGE", STORAGE);
     399             : 
     400         315 :         failed += keywords_insert("TYPE", TYPE);
     401         315 :         failed += keywords_insert("PROCEDURE", PROCEDURE);
     402         315 :         failed += keywords_insert("FUNCTION", FUNCTION);
     403         315 :         failed += keywords_insert("LOADER", sqlLOADER);
     404         315 :         failed += keywords_insert("REPLACE", REPLACE);
     405             : 
     406         315 :         failed += keywords_insert("FIELD", FIELD);
     407         315 :         failed += keywords_insert("FILTER", FILTER);
     408         315 :         failed += keywords_insert("AGGREGATE", AGGREGATE);
     409         315 :         failed += keywords_insert("RETURNS", RETURNS);
     410         315 :         failed += keywords_insert("EXTERNAL", EXTERNAL);
     411         315 :         failed += keywords_insert("NAME", sqlNAME);
     412         315 :         failed += keywords_insert("RETURN", RETURN);
     413         315 :         failed += keywords_insert("CALL", CALL);
     414         315 :         failed += keywords_insert("LANGUAGE", LANGUAGE);
     415             : 
     416         315 :         failed += keywords_insert("ANALYZE", ANALYZE);
     417         315 :         failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
     418         315 :         failed += keywords_insert("PLAN", SQL_PLAN);
     419         315 :         failed += keywords_insert("TRACE", SQL_TRACE);
     420         315 :         failed += keywords_insert("PREPARE", PREPARE);
     421         315 :         failed += keywords_insert("PREP", PREP);
     422         315 :         failed += keywords_insert("EXECUTE", EXECUTE);
     423         315 :         failed += keywords_insert("EXEC", EXEC);
     424         315 :         failed += keywords_insert("DEALLOCATE", DEALLOCATE);
     425             : 
     426         315 :         failed += keywords_insert("INDEX", INDEX);
     427             : 
     428         315 :         failed += keywords_insert("SEQUENCE", SEQUENCE);
     429         315 :         failed += keywords_insert("RESTART", RESTART);
     430         315 :         failed += keywords_insert("INCREMENT", INCREMENT);
     431         315 :         failed += keywords_insert("MAXVALUE", MAXVALUE);
     432         315 :         failed += keywords_insert("MINVALUE", MINVALUE);
     433         315 :         failed += keywords_insert("CYCLE", CYCLE);
     434         315 :         failed += keywords_insert("CACHE", CACHE);
     435         315 :         failed += keywords_insert("NEXT", NEXT);
     436         315 :         failed += keywords_insert("VALUE", VALUE);
     437         315 :         failed += keywords_insert("GENERATED", GENERATED);
     438         315 :         failed += keywords_insert("ALWAYS", ALWAYS);
     439         315 :         failed += keywords_insert("IDENTITY", IDENTITY);
     440         315 :         failed += keywords_insert("SERIAL", SERIAL);
     441         315 :         failed += keywords_insert("BIGSERIAL", BIGSERIAL);
     442         315 :         failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
     443         315 :         failed += keywords_insert("CONTINUE", CONTINUE);
     444             : 
     445         315 :         failed += keywords_insert("TRIGGER", TRIGGER);
     446         315 :         failed += keywords_insert("ATOMIC", ATOMIC);
     447         315 :         failed += keywords_insert("BEGIN", BEGIN);
     448         315 :         failed += keywords_insert("OF", OF);
     449         315 :         failed += keywords_insert("BEFORE", BEFORE);
     450         315 :         failed += keywords_insert("AFTER", AFTER);
     451         315 :         failed += keywords_insert("ROW", ROW);
     452         315 :         failed += keywords_insert("STATEMENT", STATEMENT);
     453         315 :         failed += keywords_insert("NEW", sqlNEW);
     454         315 :         failed += keywords_insert("OLD", OLD);
     455         315 :         failed += keywords_insert("EACH", EACH);
     456         315 :         failed += keywords_insert("REFERENCING", REFERENCING);
     457             : 
     458         315 :         failed += keywords_insert("RANGE", RANGE);
     459         315 :         failed += keywords_insert("UNBOUNDED", UNBOUNDED);
     460         315 :         failed += keywords_insert("PRECEDING", PRECEDING);
     461         315 :         failed += keywords_insert("FOLLOWING", FOLLOWING);
     462         315 :         failed += keywords_insert("CURRENT", CURRENT);
     463         315 :         failed += keywords_insert("EXCLUDE", EXCLUDE);
     464         315 :         failed += keywords_insert("OTHERS", OTHERS);
     465         315 :         failed += keywords_insert("TIES", TIES);
     466         315 :         failed += keywords_insert("GROUPS", GROUPS);
     467         315 :         failed += keywords_insert("WINDOW", WINDOW);
     468             : 
     469             :         /* special SQL/XML keywords */
     470         315 :         failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
     471         315 :         failed += keywords_insert("XMLCONCAT", XMLCONCAT);
     472         315 :         failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
     473         315 :         failed += keywords_insert("XMLELEMENT", XMLELEMENT);
     474         315 :         failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
     475         315 :         failed += keywords_insert("XMLFOREST", XMLFOREST);
     476         315 :         failed += keywords_insert("XMLPARSE", XMLPARSE);
     477         315 :         failed += keywords_insert("STRIP", STRIP);
     478         315 :         failed += keywords_insert("WHITESPACE", WHITESPACE);
     479         315 :         failed += keywords_insert("XMLPI", XMLPI);
     480         315 :         failed += keywords_insert("XMLQUERY", XMLQUERY);
     481         315 :         failed += keywords_insert("PASSING", PASSING);
     482         315 :         failed += keywords_insert("XMLTEXT", XMLTEXT);
     483         315 :         failed += keywords_insert("NIL", NIL);
     484         315 :         failed += keywords_insert("REF", REF);
     485         315 :         failed += keywords_insert("ABSENT", ABSENT);
     486         315 :         failed += keywords_insert("DOCUMENT", DOCUMENT);
     487         315 :         failed += keywords_insert("ELEMENT", ELEMENT);
     488         315 :         failed += keywords_insert("CONTENT", CONTENT);
     489         315 :         failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
     490         315 :         failed += keywords_insert("NAMESPACE", NAMESPACE);
     491         315 :         failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
     492         315 :         failed += keywords_insert("RETURNING", RETURNING);
     493         315 :         failed += keywords_insert("LOCATION", LOCATION);
     494         315 :         failed += keywords_insert("ID", ID);
     495         315 :         failed += keywords_insert("ACCORDING", ACCORDING);
     496         315 :         failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
     497         315 :         failed += keywords_insert("URI", URI);
     498         315 :         failed += keywords_insert("XMLAGG", XMLAGG);
     499             : 
     500             :         /* keywords for opengis */
     501         315 :         failed += keywords_insert("GEOMETRY", GEOMETRY);
     502             : 
     503         315 :         failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
     504         315 :         failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
     505         315 :         failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
     506         315 :         failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
     507         315 :         failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
     508         315 :         failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
     509         315 :         failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
     510             : 
     511         315 :         failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
     512         315 :         failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
     513         315 :         failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
     514         315 :         failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
     515         315 :         failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
     516         315 :         failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
     517         315 :         failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
     518             : 
     519         315 :         failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
     520         315 :         failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
     521         315 :         failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
     522         315 :         failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
     523         315 :         failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
     524         315 :         failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
     525         315 :         failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
     526             : 
     527         315 :         failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
     528         315 :         failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
     529         315 :         failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
     530         315 :         failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
     531         315 :         failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
     532         315 :         failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
     533         315 :         failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
     534         315 :         failed += keywords_insert("LOGIN", LOGIN);
     535             :         // odbc keywords
     536         315 :         failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
     537         315 :         failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
     538         315 :         failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
     539         315 :         failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
     540         315 :         failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
     541         315 :         failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
     542         315 :         failed += keywords_insert("DAYNAME", DAYNAME);
     543         315 :         failed += keywords_insert("IFNULL", IFNULL);
     544         315 :         failed += keywords_insert("MONTHNAME", MONTHNAME);
     545         315 :         failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
     546         315 :         failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
     547         315 :         failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
     548         315 :         failed += keywords_insert("SQL_BINARY", SQL_BINARY);
     549         315 :         failed += keywords_insert("SQL_BIT", SQL_BIT);
     550         315 :         failed += keywords_insert("SQL_CHAR", SQL_CHAR);
     551         315 :         failed += keywords_insert("SQL_DATE", SQL_DATE);
     552         315 :         failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
     553         315 :         failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
     554         315 :         failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
     555         315 :         failed += keywords_insert("SQL_GUID", SQL_GUID);
     556         315 :         failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
     557         315 :         failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
     558         315 :         failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
     559         315 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
     560         315 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
     561         315 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
     562         315 :         failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
     563         315 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
     564         315 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
     565         315 :         failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
     566         315 :         failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
     567         315 :         failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
     568         315 :         failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
     569         315 :         failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
     570         315 :         failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
     571         315 :         failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
     572         315 :         failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
     573         315 :         failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
     574         315 :         failed += keywords_insert("SQL_REAL", SQL_REAL);
     575         315 :         failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
     576         315 :         failed += keywords_insert("SQL_TIME", SQL_TIME);
     577         315 :         failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
     578         315 :         failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
     579         315 :         failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
     580         315 :         failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
     581         315 :         failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
     582         315 :         failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
     583         315 :         failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
     584         315 :         failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
     585         315 :         failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
     586         315 :         failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
     587         315 :         failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
     588         315 :         failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
     589         315 :         failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
     590         315 :         failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
     591         315 :         failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
     592         315 :         failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
     593             : 
     594         315 :         failed += keywords_insert("LEAST", MARGFUNC);
     595         315 :         failed += keywords_insert("GREATEST", MARGFUNC);
     596         315 :         return failed;
     597             : }
     598             : 
     599             : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
     600             : 
     601             : void
     602      242727 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
     603             : {
     604      485454 :         *s = (struct scanner) {
     605             :                 .rs = rs,
     606             :                 .ws = ws,
     607             :                 .mode = LINE_N,
     608      242727 :                 .raw_string_mode = GDKgetenv_istrue("raw_strings"),
     609             :                 .aborted = false,
     610             :         };
     611      242727 : }
     612             : 
     613             : void
     614     1216974 : scanner_query_processed(struct scanner *s)
     615             : {
     616     1216974 :         int cur;
     617             : 
     618     1216974 :         if (s->yybak) {
     619      464259 :                 s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
     620      464259 :                 s->yybak = 0;
     621             :         }
     622     1216974 :         if (s->rs) {
     623     1216974 :                 s->rs->pos += s->yycur;
     624             :                 /* completely eat the query including white space after the ; */
     625     2325876 :                 while (s->rs->pos < s->rs->len &&
     626     2004062 :                            (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
     627     1108902 :                         s->rs->pos++;
     628             :                 }
     629             :         }
     630             :         /*assert(s->rs->pos <= s->rs->len);*/
     631     1216974 :         s->yycur = 0;
     632     1216974 :         s->started = 0;
     633     1216974 :         s->as = 0;
     634     1216974 :         s->schema = NULL;
     635     1216974 : }
     636             : 
     637             : static int
     638          33 : scanner_error(mvc *lc, int cur)
     639             : {
     640          33 :         switch (cur) {
     641           0 :         case EOF:
     642           0 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
     643           0 :                 return EOF;
     644          33 :         default:
     645             :                 /* on Windows at least, iswcntrl returns TRUE for
     646             :                  * U+FEFF, but we just want consistent error
     647             :                  * messages */
     648          33 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
     649             :         }
     650          33 :         return LEX_ERROR;
     651             : }
     652             : 
     653             : 
     654             : /*
     655             :    UTF-8 encoding is as follows:
     656             : U-00000000 - U-0000007F: 0xxxxxxx
     657             : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
     658             : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
     659             : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     660             : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     661             : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     662             : */
     663             : /* To be correctly coded UTF-8, the sequence should be the shortest
     664             :    possible encoding of the value being encoded.  This means that for
     665             :    an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
     666             :    utf8chkmsk[n] should be non-zero (else the encoding could be
     667             :    shorter).
     668             : */
     669             : static const int utf8chkmsk[] = {
     670             :         0x0000007f,
     671             :         0x00000780,
     672             :         0x0000f800,
     673             :         0x001f0000,
     674             :         0x03e00000,
     675             :         0x7c000000
     676             : };
     677             : 
     678             : static void
     679    29817949 : utf8_putchar(struct scanner *lc, int ch)
     680             : {
     681    29817949 :         if ((ch) < 0x80) {
     682    29817944 :                 lc->yycur--;
     683           5 :         } else if ((ch) < 0x800) {
     684           0 :                 lc->yycur -= 2;
     685           5 :         } else if ((ch) < 0x10000) {
     686           5 :                 lc->yycur -= 3;
     687             :         } else {
     688           0 :                 lc->yycur -= 4;
     689             :         }
     690    29817949 : }
     691             : 
     692             : static inline int
     693   130276457 : scanner_read_more(struct scanner *lc, size_t n)
     694             : {
     695   130276457 :         bstream *b = lc->rs;
     696   130276457 :         bool more = false;
     697             : 
     698             : 
     699   130276457 :         if (lc->aborted)
     700             :                 return EOF;
     701   130280764 :         while (b->len < b->pos + lc->yycur + n) {
     702             : 
     703      139369 :                 if (lc->mode == LINE_1 || !lc->started)
     704             :                         return EOF;
     705             : 
     706             :                 /* query is not finished ask for more */
     707        6919 :                 if (b->eof || !isa_block_stream(b->s)) {
     708        4762 :                         if (bstream_getoob(b)) {
     709           0 :                                 lc->aborted = true;
     710           0 :                                 return EOF;
     711             :                         }
     712        2157 :                         if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
     713        2157 :                                 mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
     714        2157 :                         b->eof = false;
     715        2157 :                         more = true;
     716             :                 }
     717             :                 /* we need more query text */
     718        4314 :                 if (bstream_next(b) < 0) {
     719           0 :                         if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
     720             :                                 // now what?
     721           0 :                                 lc->errstr = "Query aborted";
     722           0 :                                 lc->aborted = true;
     723           0 :                                 mnstr_clearerr(b->s);
     724             :                         }
     725           0 :                         return EOF;
     726        4314 :                 } else if (/* we asked for more data but didn't get any */
     727        2157 :                            (more && b->eof && b->len < b->pos + lc->yycur + n))
     728             :                         return EOF;
     729        4307 :                 if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
     730           0 :                         lc->errstr = "Query aborted";
     731           0 :                         b->len -= 2;
     732           0 :                         b->buf[b->len] = 0;
     733           0 :                         return EOF;
     734             :                 }
     735             :         }
     736             :         return 1;
     737             : }
     738             : 
     739             : static inline int
     740   129038105 : scanner_getc(struct scanner *lc)
     741             : {
     742   129038105 :         bstream *b = lc->rs;
     743   129038105 :         unsigned char *s = NULL;
     744   129038105 :         int c, m, n, mask;
     745             : 
     746   129038105 :         if (scanner_read_more(lc, 1) == EOF) {
     747             :                 //lc->errstr = SQLSTATE(42000) "end of input stream";
     748             :                 return EOF;
     749             :         }
     750   128905618 :         lc->errstr = NULL;
     751             : 
     752   128905618 :         s = (unsigned char *) b->buf + b->pos + lc->yycur++;
     753   128905618 :         if (((c = *s) & 0x80) == 0) {
     754             :                 /* 7-bit char */
     755             :                 return c;
     756             :         }
     757       88250 :         for (n = 0, m = 0x40; c & m; n++, m >>= 1)
     758             :                 ;
     759             :         /* n now is number of 10xxxxxx bytes that should follow */
     760       29443 :         if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
     761             :                 /* incorrect UTF-8 sequence */
     762             :                 /* n==0: c == 10xxxxxx */
     763             :                 /* n>=6: c == 1111111x */
     764           0 :                 lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
     765           0 :                 goto error;
     766             :         }
     767             : 
     768       29443 :         if (scanner_read_more(lc, (size_t) n) == EOF)
     769             :                 return EOF;
     770       29443 :         s = (unsigned char *) b->buf + b->pos + lc->yycur;
     771             : 
     772       29443 :         mask = utf8chkmsk[n];
     773       29443 :         c &= ~(0xFFC0 >> n);  /* remove non-x bits */
     774       88249 :         while (--n >= 0) {
     775       58807 :                 c <<= 6;
     776       58807 :                 lc->yycur++;
     777       58807 :                 if (((m = *s++) & 0xC0) != 0x80) {
     778             :                         /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
     779             :                         /* this includes end-of-string (m == 0) */
     780           1 :                         lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
     781           1 :                         goto error;
     782             :                 }
     783       58806 :                 c |= m & 0x3F;
     784             :         }
     785       29442 :         if ((c & mask) == 0) {
     786             :                 /* incorrect UTF-8 sequence: not shortest possible */
     787           0 :                 lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
     788           0 :                 goto error;
     789             :         }
     790             : 
     791             :         return c;
     792             : 
     793           1 : error:
     794           1 :         if (b->pos + lc->yycur < b->len)    /* skip bogus char */
     795           0 :                 lc->yycur++;
     796             :         return EOF;
     797             : }
     798             : 
     799             : static int
     800    26424563 : scanner_token(struct scanner *lc, int token)
     801             : {
     802    26424563 :         lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
     803    26424563 :         lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
     804    26424563 :         lc->yyval = token;
     805    26424563 :         return lc->yyval;
     806             : }
     807             : 
     808             : static int
     809     1961536 : scanner_string(mvc *c, int quote, bool escapes)
     810             : {
     811     1961536 :         struct scanner *lc = &c->scanner;
     812     1961536 :         bstream *rs = lc->rs;
     813     1961536 :         int cur = quote;
     814     1961536 :         bool escape = false;
     815     1961536 :         const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
     816             : 
     817     1961536 :         lc->started = 1;
     818     1999144 :         while (cur != EOF) {
     819     1999129 :                 size_t pos = 0;
     820     1999129 :                 const size_t yycur = rs->pos + lc->yycur;
     821             : 
     822    34034720 :                 while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
     823    32035591 :                        (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
     824    64041728 :                        cur && (cur != quote || escape)) {
     825    30036463 :                         if (escapes && cur == '\\')
     826        6556 :                                 escape = !escape;
     827             :                         else
     828             :                                 escape = false;
     829             :                 }
     830     1999129 :                 if (pos == limit) {
     831           0 :                         (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
     832           0 :                         return LEX_ERROR;
     833             :                 }
     834             :                 /* BOM character not allowed as an identifier */
     835     1999129 :                 if (cur == EOF || (quote == '"' && cur == 0xFEFF))
     836           1 :                         return scanner_error(c, cur);
     837     1999128 :                 lc->yycur += pos;
     838             :                 /* check for quote escaped quote: Obscure SQL Rule */
     839     1999128 :                 if (cur == quote && rs->buf[yycur + pos] == quote) {
     840        8167 :                         lc->yycur++;
     841        8167 :                         continue;
     842             :                 }
     843     1990961 :                 assert(yycur + pos <= rs->len + 1);
     844     1990961 :                 if (cur == quote && !escape) {
     845     1961506 :                         return scanner_token(lc, STRING);
     846             :                 }
     847       29455 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     848             :                 /* long utf8, if correct isn't the quote */
     849       29455 :                 if (!cur) {
     850          30 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     851          14 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     852          14 :                                 return LEX_ERROR;
     853             :                         }
     854          16 :                         cur = scanner_read_more(lc, 1);
     855             :                 } else {
     856       29425 :                         cur = scanner_getc(lc);
     857             :                 }
     858             :         }
     859          15 :         (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
     860          15 :         return EOF;
     861             : }
     862             : 
     863             : /* scan a structure {blah} into a string. We only count the matching {}
     864             :  * unless escaped. We do not consider embeddings in string literals yet
     865             :  */
     866             : 
     867             : static int
     868         234 : scanner_body(mvc *c)
     869             : {
     870         234 :         struct scanner *lc = &c->scanner;
     871         234 :         bstream *rs = lc->rs;
     872         234 :         int cur = (int) 'x';
     873         234 :         int blk = 1;
     874         234 :         bool escape = false;
     875             : 
     876         234 :         lc->started = 1;
     877         234 :         assert(rs->buf[rs->pos + lc->yycur-1] == '{');
     878         290 :         while (cur != EOF) {
     879         290 :                 size_t pos = rs->pos + lc->yycur;
     880             : 
     881       32350 :                 while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
     882       32060 :                         if (cur != '\\')
     883             :                                 escape = false;
     884             :                         else
     885          12 :                                 escape = !escape;
     886       32060 :                         blk += cur =='{';
     887       32060 :                         blk -= cur =='}';
     888             :                 }
     889         290 :                 lc->yycur = pos - rs->pos;
     890         290 :                 assert(pos <= rs->len + 1);
     891         290 :                 if (blk == 0 && !escape){
     892         234 :                         lc->yycur--; /* go back to current (possibly invalid) char */
     893         234 :                         return scanner_token(lc, X_BODY);
     894             :                 }
     895          56 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     896          56 :                 if (!cur) {
     897          56 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     898           0 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     899           0 :                                 return LEX_ERROR;
     900             :                         }
     901          56 :                         cur = scanner_read_more(lc, 1);
     902             :                 } else {
     903           0 :                         cur = scanner_getc(lc);
     904             :                 }
     905             :         }
     906           0 :         (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
     907           0 :         return EOF;
     908             : }
     909             : 
     910             : static int
     911    12902338 : keyword_or_ident(mvc * c, int cur)
     912             : {
     913    12902338 :         struct scanner *lc = &c->scanner;
     914    12902338 :         keyword *k = NULL;
     915    12902338 :         size_t s;
     916             : 
     917    12902338 :         lc->started = 1;
     918    12902338 :         utf8_putchar(lc, cur);
     919    12902305 :         s = lc->yycur;
     920    12902305 :         lc->yyval = IDENT;
     921    77515310 :         while ((cur = scanner_getc(lc)) != EOF) {
     922    77515226 :                 if (!iswalnum(cur) && cur != '_') {
     923    12902221 :                         utf8_putchar(lc, cur);
     924    12902233 :                         (void)scanner_token(lc, IDENT);
     925    12902233 :                         if ((k = find_keyword_bs(lc,s)))
     926     7921675 :                                 lc->yyval = k->token;
     927    12902366 :                         return lc->yyval;
     928             :                 }
     929             :         }
     930             :         if (cur < 0)
     931             :                 return cur;
     932             :         (void)scanner_token(lc, IDENT);
     933             :         if ((k = find_keyword_bs(lc,s)))
     934             :                 lc->yyval = k->token;
     935             :         return lc->yyval;
     936             : }
     937             : 
     938             : static int
     939    13269378 : skip_white_space(struct scanner * lc)
     940             : {
     941    16845128 :         int cur;
     942             : 
     943    16845128 :         do {
     944    16845128 :                 lc->yysval = lc->yycur;
     945    16845128 :         } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
     946    13269413 :         return cur;
     947             : }
     948             : 
     949             : static int
     950       66810 : skip_c_comment(struct scanner * lc)
     951             : {
     952       66810 :         int cur;
     953       66810 :         int prev = 0;
     954       66810 :         int started = lc->started;
     955       66810 :         int depth = 1;
     956             : 
     957       66810 :         lc->started = 1;
     958     1347788 :         while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
     959     1280978 :                 if (prev == '*' && cur == '/')
     960       66810 :                         depth--;
     961     1214168 :                 else if (prev == '/' && cur == '*') {
     962             :                         /* block comments can nest */
     963           0 :                         cur = 0; /* prevent slash-star-slash from matching */
     964           0 :                         depth++;
     965             :                 }
     966             :                 prev = cur;
     967             :         }
     968       66810 :         lc->yysval = lc->yycur;
     969       66810 :         lc->started = started;
     970             :         /* a comment is equivalent to a newline */
     971       66810 :         return cur == EOF ? cur : '\n';
     972             : }
     973             : 
     974             : static int
     975        2762 : skip_sql_comment(struct scanner * lc)
     976             : {
     977        2762 :         int cur;
     978        2762 :         int started = lc->started;
     979             : 
     980        2762 :         lc->started = 1;
     981      816068 :         while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
     982             :                 ;
     983        2762 :         lc->yysval = lc->yycur;
     984        2762 :         lc->started = started;
     985             :         /* a comment is equivalent to a newline */
     986        2762 :         return cur;
     987             : }
     988             : 
     989             : static int tokenize(mvc * lc, int cur);
     990             : 
     991     5010605 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
     992          13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
     993          10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
     994        3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
     995             : 
     996     1664320 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
     997     1664320 :         struct scanner *lc = &c->scanner;
     998     1664320 :         bool (*is_valid_n_ary_digit)(int);
     999             : 
    1000     1664320 :         if (pcur == '_' && !initial_underscore_allowed)  /* ERROR: initial underscore not allowed */  {
    1001           0 :                 *token = 0;
    1002           0 :                 return '_';
    1003             :         }
    1004             : 
    1005     1664320 :         switch (type) {
    1006             :         case BINARYNUM:
    1007             :                 is_valid_n_ary_digit = &is_valid_binary_digit;
    1008             :                 break;
    1009           3 :         case OCTALNUM:
    1010           3 :                 is_valid_n_ary_digit = &is_valid_octal_digit;
    1011           3 :                 break;
    1012         280 :         case HEXADECIMALNUM:
    1013         280 :                 is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
    1014         280 :                 break;
    1015     1664035 :         default:
    1016     1664035 :                 is_valid_n_ary_digit = &is_valid_decimal_digit;
    1017     1664035 :                 break;
    1018             :         }
    1019             : 
    1020     1664320 :         if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
    1021          17 :                 *token = 0;
    1022          17 :                 return pcur;
    1023             :         }
    1024             : 
    1025     1664187 :         int cur = scanner_getc(lc);
    1026     1664139 :         *token = type;
    1027     3351893 :         while (cur != EOF) {
    1028     3351896 :                 if (cur == '_') {
    1029          25 :                         if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
    1030           2 :                                 *token = 0;
    1031           2 :                                 return '_';
    1032             :                         }
    1033             :                 }
    1034     3351871 :                 else if (!is_valid_n_ary_digit(cur))
    1035             :                         break;
    1036     1687882 :                 pcur = cur;
    1037     1687882 :                 cur = scanner_getc(lc);
    1038             :         }
    1039             : 
    1040     1663810 :         if (pcur == '_')  {
    1041           3 :                 *token = 0;
    1042           3 :                 if (iswalnum(cur))       /* ERROR: not a valid digit */
    1043             :                         return cur;
    1044             :                 else                            /* ERROR: number ends with underscore */
    1045             :                         return '_';
    1046             :         }
    1047             : 
    1048             :         return cur;
    1049             : }
    1050             : 
    1051             : static int
    1052     1651059 : number(mvc * c, int cur)
    1053             : {
    1054     1651059 :         struct scanner *lc = &c->scanner;
    1055     1651059 :         int token = sqlINT;
    1056             : 
    1057             :         /* a number has one of these forms (expressed in regular expressions):
    1058             :          * 0x[0-9A-Fa-f]+                   -- (hexadecimal) INTEGER
    1059             :          * \.[0-9]+                         -- DECIMAL
    1060             :          * [0-9]+\.[0-9]*                   -- DECIMAL
    1061             :          * [0-9]+@0                         -- OID
    1062             :          * [0-9]*\.[0-9]+[eE][-+]?[0-9]+    -- REAL
    1063             :          * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
    1064             :          * [0-9]+                           -- (decimal) INTEGER
    1065             :          */
    1066     1651059 :         lc->started = 1;
    1067     1651059 :         if (cur == '0') {
    1068      229856 :                 switch ((cur = scanner_getc(lc))) {
    1069           2 :                 case 'b':
    1070           2 :                         cur = scanner_getc(lc);
    1071           2 :                         if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
    1072             :                         break;
    1073           3 :                 case 'o':
    1074           3 :                         cur = scanner_getc(lc);
    1075           3 :                         if ((cur = check_validity_number(c,  cur, true, &token, OCTALNUM)) == EOF) return cur;
    1076             :                         break;
    1077         280 :                 case 'x':
    1078         280 :                         cur = scanner_getc(lc);
    1079         280 :                         if ((cur = check_validity_number(c,  cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
    1080             :                         break;
    1081      229570 :                 default:
    1082      229570 :                         utf8_putchar(lc, cur);
    1083      229570 :                         cur = '0';
    1084             :                 }
    1085             :         }
    1086     1651058 :         if (token == sqlINT) {
    1087     1650757 :                 if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
    1088     1650281 :                 if (cur == '@') {
    1089           0 :                         if (token == sqlINT) {
    1090           0 :                                 cur = scanner_getc(lc);
    1091           0 :                                 if (cur == EOF)
    1092             :                                         return cur;
    1093           0 :                                 if (cur == '0') {
    1094           0 :                                         cur = scanner_getc(lc);
    1095           0 :                                         if (cur == EOF)
    1096             :                                                 return cur;
    1097           0 :                                         token = OIDNUM;
    1098             :                                 } else {
    1099             :                                         /* number + '@' not followed by 0: show '@' as erroneous */
    1100           0 :                                         utf8_putchar(lc, cur);
    1101           0 :                                         cur = '@';
    1102           0 :                                         token = 0;
    1103             :                                 }
    1104             :                         }
    1105             :                 } else {
    1106     1650281 :                         if (cur == '.') {
    1107       11049 :                                 cur = scanner_getc(lc);
    1108       11049 :                                 if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
    1109       11043 :                                 if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
    1110             :                         }
    1111     1650281 :                         if (token != 0)
    1112     1650285 :                         if (cur == 'e' || cur == 'E') {
    1113        2225 :                                 cur = scanner_getc(lc);
    1114        2225 :                                 if (cur == '+' || cur == '-')
    1115        2111 :                                         cur = scanner_getc(lc);
    1116        2225 :                                 if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
    1117             :                         }
    1118             :                 }
    1119             :         }
    1120             : 
    1121     1648357 :         assert(cur != EOF);
    1122             : 
    1123     1650582 :         if (iswalnum(cur)) /* ERROR: not a valid digit */
    1124           6 :                 token = 0;
    1125             : 
    1126     1650582 :         utf8_putchar(lc, cur);
    1127             : 
    1128     1650520 :         if (token) {
    1129     1650510 :                 return scanner_token(lc, token);
    1130             :         } else {
    1131          10 :                 (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
    1132          10 :                 return LEX_ERROR;
    1133             :         }
    1134             : }
    1135             : 
    1136             : static
    1137    11934447 : int scanner_symbol(mvc * c, int cur)
    1138             : {
    1139    11934447 :         struct scanner *lc = &c->scanner;
    1140    11934447 :         int next = 0;
    1141    11934447 :         int started = lc->started;
    1142             : 
    1143    11934447 :         switch (cur) {
    1144       69451 :         case '/':
    1145       69451 :                 lc->started = 1;
    1146       69451 :                 next = scanner_getc(lc);
    1147       69451 :                 if (next < 0)
    1148             :                         return EOF;
    1149       69451 :                 if (next == '*') {
    1150       66810 :                         lc->started = started;
    1151       66810 :                         cur = skip_c_comment(lc);
    1152       66810 :                         if (cur < 0)
    1153             :                                 return EOF;
    1154       66810 :                         return tokenize(c, cur);
    1155             :                 } else {
    1156        2641 :                         utf8_putchar(lc, next);
    1157        2641 :                         return scanner_token(lc, cur);
    1158             :                 }
    1159           0 :         case '0':
    1160             :         case '1':
    1161             :         case '2':
    1162             :         case '3':
    1163             :         case '4':
    1164             :         case '5':
    1165             :         case '6':
    1166             :         case '7':
    1167             :         case '8':
    1168             :         case '9':
    1169           0 :                 return number(c, cur);
    1170           5 :         case '#':
    1171           5 :                 if ((cur = skip_sql_comment(lc)) == EOF)
    1172             :                         return cur;
    1173           5 :                 return tokenize(c, cur);
    1174      686833 :         case '\'':
    1175      686833 :                 if (lc->raw_string_mode || lc->next_string_is_raw)
    1176          46 :                         return scanner_string(c, cur, false);
    1177      686787 :                 return scanner_string(c, cur, true);
    1178     1267703 :         case '"':
    1179     1267703 :                 return scanner_string(c, cur, false);
    1180         500 :         case '{':
    1181             :                 // if previous tokens like LANGUAGE IDENT
    1182             :                 // TODO checking on IDENT only may not be enough
    1183         500 :                 if (lc->yylast == IDENT)
    1184         234 :                         return scanner_body(c);
    1185         266 :                 lc->started = 1;
    1186         266 :                 return scanner_token(lc, cur);
    1187         266 :         case '}':
    1188         266 :                 lc->started = 1;
    1189         266 :                 return scanner_token(lc, cur);
    1190       29353 :         case '-':
    1191       29353 :                 lc->started = 1;
    1192       29353 :                 next = scanner_getc(lc);
    1193       29353 :                 if (next < 0)
    1194             :                         return EOF;
    1195       29352 :                 if (next == '-') {
    1196        2757 :                         lc->started = started;
    1197        2757 :                         if ((cur = skip_sql_comment(lc)) == EOF)
    1198             :                                 return cur;
    1199        2757 :                         return tokenize(c, cur);
    1200             :                 }
    1201       26595 :                 lc->started = 1;
    1202       26595 :                 utf8_putchar(lc, next);
    1203       26595 :                 return scanner_token(lc, cur);
    1204          12 :         case '~': /* binary not */
    1205          12 :                 lc->started = 1;
    1206          12 :                 next = scanner_getc(lc);
    1207          12 :                 if (next < 0)
    1208             :                         return EOF;
    1209          12 :                 if (next == '=')
    1210           5 :                         return scanner_token(lc, GEOM_MBR_EQUAL);
    1211           7 :                 utf8_putchar(lc, next);
    1212           7 :                 return scanner_token(lc, cur);
    1213     6614380 :         case '^': /* binary xor */
    1214             :         case '*':
    1215             :         case '?':
    1216             :         case ':':
    1217             :         case '%':
    1218             :         case '+':
    1219             :         case '(':
    1220             :         case ')':
    1221             :         case ',':
    1222             :         case '=':
    1223             :         case '[':
    1224             :         case ']':
    1225     6614380 :                 lc->started = 1;
    1226     6614380 :                 return scanner_token(lc, cur);
    1227        5749 :         case '&':
    1228        5749 :                 lc->started = 1;
    1229        5749 :                 cur = scanner_getc(lc);
    1230        5749 :                 if (cur < 0)
    1231             :                         return EOF;
    1232        5749 :                 if (cur < 0)
    1233             :                         return EOF;
    1234        5749 :                 if(cur == '<') {
    1235           3 :                         next = scanner_getc(lc);
    1236           3 :                         if (next < 0)
    1237             :                                 return EOF;
    1238           3 :                         if(next == '|') {
    1239           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
    1240             :                         } else {
    1241           3 :                                 utf8_putchar(lc, next); //put the char back
    1242           3 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
    1243             :                         }
    1244        5746 :                 } else if(cur == '>')
    1245           3 :                         return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
    1246        5743 :                 else if(cur == '&')
    1247           3 :                         return scanner_token(lc, GEOM_OVERLAP);
    1248             :                 else {/* binary and */
    1249        5740 :                         utf8_putchar(lc, cur); //put the char back
    1250        5740 :                         return scanner_token(lc, '&');
    1251             :                 }
    1252          19 :         case '@':
    1253          19 :                 lc->started = 1;
    1254          19 :                 return scanner_token(lc, AT);
    1255      942580 :         case ';':
    1256      942580 :                 lc->started = 0;
    1257      942580 :                 return scanner_token(lc, SCOLON);
    1258          27 :         case '!':
    1259          27 :                 lc->started = 1;
    1260          27 :                 cur = scanner_getc(lc);
    1261          27 :                 if (cur < 0)
    1262             :                         return EOF;
    1263          27 :                 else if (cur == '=') {
    1264          21 :                         lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
    1265          21 :                         lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
    1266          21 :                         return scanner_token( lc, COMPARISON);
    1267             :                 } else {
    1268           6 :                         utf8_putchar(lc, cur); //put the char back
    1269             :                 }
    1270           6 :                 return scanner_token(lc, '!');
    1271       50039 :         case '<':
    1272       50039 :                 lc->started = 1;
    1273       50039 :                 cur = scanner_getc(lc);
    1274       50039 :                 if (cur < 0)
    1275             :                         return EOF;
    1276       50039 :                 if (cur == '=') {
    1277        3123 :                         return scanner_token( lc, COMPARISON);
    1278       46916 :                 } else if (cur == '>') {
    1279       33566 :                         return scanner_token( lc, COMPARISON);
    1280       13350 :                 } else if (cur == '<') {
    1281          44 :                         next = scanner_getc(lc);
    1282          44 :                         if (next < 0)
    1283             :                                 return EOF;
    1284          44 :                         if (next == '=') {
    1285           4 :                                 return scanner_token( lc, LEFT_SHIFT_ASSIGN);
    1286          40 :                         } else if (next == '|') {
    1287           1 :                                 return scanner_token(lc, GEOM_BELOW);
    1288             :                         } else {
    1289          39 :                                 utf8_putchar(lc, next); //put the char back
    1290          39 :                                 return scanner_token( lc, LEFT_SHIFT);
    1291             :                         }
    1292       13306 :                 } else if(cur == '-') {
    1293          19 :                         next = scanner_getc(lc);
    1294          19 :                         if (next < 0)
    1295             :                                 return EOF;
    1296          19 :                         if(next == '>') {
    1297           7 :                                 return scanner_token(lc, GEOM_DIST);
    1298             :                         } else {
    1299             :                                 //put the characters back and fall in the next possible case
    1300          12 :                                 utf8_putchar(lc, next);
    1301          12 :                                 utf8_putchar(lc, cur);
    1302          12 :                                 return scanner_token( lc, COMPARISON);
    1303             :                         }
    1304             :                 } else {
    1305       13287 :                         utf8_putchar(lc, cur);
    1306       13287 :                         return scanner_token( lc, COMPARISON);
    1307             :                 }
    1308       45217 :         case '>':
    1309       45217 :                 lc->started = 1;
    1310       45217 :                 cur = scanner_getc(lc);
    1311       45217 :                 if (cur < 0)
    1312             :                         return EOF;
    1313       45217 :                 if (cur == '>') {
    1314        2409 :                         cur = scanner_getc(lc);
    1315        2409 :                         if (cur < 0)
    1316             :                                 return EOF;
    1317        2409 :                         if (cur == '=')
    1318           3 :                                 return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
    1319        2406 :                         utf8_putchar(lc, cur);
    1320        2406 :                         return scanner_token( lc, RIGHT_SHIFT);
    1321       42808 :                 } else if (cur != '=') {
    1322       40572 :                         utf8_putchar(lc, cur);
    1323       40572 :                         return scanner_token( lc, COMPARISON);
    1324             :                 } else {
    1325        2236 :                         return scanner_token( lc, COMPARISON);
    1326             :                 }
    1327     2043742 :         case '.':
    1328     2043742 :                 lc->started = 1;
    1329     2043742 :                 cur = scanner_getc(lc);
    1330     2043742 :                 if (cur < 0)
    1331             :                         return EOF;
    1332     2043741 :                 if (!iswdigit(cur)) {
    1333     2043728 :                         utf8_putchar(lc, cur);
    1334     2043728 :                         return scanner_token( lc, '.');
    1335             :                 } else {
    1336          13 :                         utf8_putchar(lc, cur);
    1337          13 :                         cur = '.';
    1338          13 :                         return number(c, cur);
    1339             :                 }
    1340      178561 :         case '|': /* binary or or string concat */
    1341      178561 :                 lc->started = 1;
    1342      178561 :                 cur = scanner_getc(lc);
    1343      178561 :                 if (cur < 0)
    1344             :                         return EOF;
    1345      178561 :                 if (cur == '|') {
    1346      178536 :                         return scanner_token(lc, CONCATSTRING);
    1347          25 :                 } else if (cur == '&') {
    1348           0 :                         next = scanner_getc(lc);
    1349           0 :                         if (next < 0)
    1350             :                                 return EOF;
    1351           0 :                         if(next == '>') {
    1352           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
    1353             :                         } else {
    1354           0 :                                 utf8_putchar(lc, next); //put the char back
    1355           0 :                                 utf8_putchar(lc, cur); //put the char back
    1356           0 :                                 return scanner_token(lc, '|');
    1357             :                         }
    1358          25 :                 } else if (cur == '>') {
    1359           1 :                         next = scanner_getc(lc);
    1360           1 :                         if (next < 0)
    1361             :                                 return EOF;
    1362           1 :                         if(next == '>') {
    1363           1 :                                 return scanner_token(lc, GEOM_ABOVE);
    1364             :                         } else {
    1365           0 :                                 utf8_putchar(lc, next); //put the char back
    1366           0 :                                 utf8_putchar(lc, cur); //put the char back
    1367           0 :                                 return scanner_token(lc, '|');
    1368             :                         }
    1369             :                 } else {
    1370          24 :                         utf8_putchar(lc, cur);
    1371          24 :                         return scanner_token(lc, '|');
    1372             :                 }
    1373             :         }
    1374          10 :         (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
    1375          10 :         return LEX_ERROR;
    1376             : }
    1377             : 
    1378             : static int
    1379    26511914 : tokenize(mvc * c, int cur)
    1380             : {
    1381    26511914 :         struct scanner *lc = &c->scanner;
    1382    53008454 :         while (1) {
    1383    39760184 :                 if (cur == 0xFEFF) {
    1384             :                         /* on Linux at least, iswpunct returns TRUE
    1385             :                          * for U+FEFF, but we don't want that, we just
    1386             :                          * want to go to the scanner_error case
    1387             :                          * below */
    1388             :                         ;
    1389    39760215 :                 } else if (iswspace(cur)) {
    1390    13265588 :                         if ((cur = skip_white_space(lc)) == EOF)
    1391             :                                 return cur;
    1392    13248270 :                         continue;  /* try again */
    1393    26494627 :                 } else if (iswdigit(cur)) {
    1394     1651046 :                         return number(c, cur);
    1395    24843581 :                 } else if (iswalpha(cur) || cur == '_') {
    1396    12878600 :                         switch (cur) {
    1397      644264 :                         case 'e': /* string with escapes */
    1398             :                         case 'E':
    1399      644264 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1400      644264 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1401        3707 :                                         return scanner_string(c, scanner_getc(lc), true);
    1402             :                                 }
    1403             :                                 break;
    1404      412610 :                         case 'x': /* blob */
    1405             :                         case 'X':
    1406             :                         case 'r': /* raw string */
    1407             :                         case 'R':
    1408      412610 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1409      412610 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1410        3276 :                                         return scanner_string(c, scanner_getc(lc), false);
    1411             :                                 }
    1412             :                                 break;
    1413      151433 :                         case 'u': /* unicode string */
    1414             :                         case 'U':
    1415      151433 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1416      151450 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
    1417          17 :                                     scanner_read_more(lc, 2) != EOF &&
    1418          17 :                                     (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
    1419             :                                      lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
    1420          17 :                                         cur = scanner_getc(lc); /* '&' */
    1421          17 :                                         return scanner_string(c, scanner_getc(lc), false);
    1422             :                                 }
    1423             :                                 break;
    1424             :                         default:
    1425             :                                 break;
    1426             :                         }
    1427    12902340 :                         return keyword_or_ident(c, cur);
    1428    11934241 :                 } else if (iswpunct(cur)) {
    1429    11934178 :                         return scanner_symbol(c, cur);
    1430             :                 }
    1431          32 :                 if (cur == EOF) {
    1432           0 :                         if (lc->mode == LINE_1 || !lc->started )
    1433             :                                 return cur;
    1434           0 :                         return scanner_error(c, cur);
    1435             :                 }
    1436             :                 /* none of the above: error */
    1437          32 :                 return scanner_error(c, cur);
    1438             :         }
    1439             : }
    1440             : 
    1441             : /* SQL 'quoted' idents consist of a set of any character of
    1442             :  * the source language character set other than a 'quote'
    1443             :  *
    1444             :  * MonetDB has 3 restrictions:
    1445             :  *      1 we disallow '%' as the first character.
    1446             :  *      2 the length is limited to 1024 characters
    1447             :  *      3 the identifier 'TID%' is not allowed
    1448             :  */
    1449             : static bool
    1450     1267692 : valid_ident(const char *restrict s, char *restrict dst)
    1451             : {
    1452     1267692 :         int p = 0;
    1453             : 
    1454     1267692 :         if (*s == '%')
    1455             :                 return false;
    1456             : 
    1457     9384008 :         while (*s) {
    1458     8116316 :                 if ((dst[p++] = *s++) == '"' && *s == '"')
    1459          64 :                         s++;
    1460     8116316 :                 if (p >= 1024)
    1461             :                         return false;
    1462             :         }
    1463     1267692 :         dst[p] = '\0';
    1464     1267692 :         if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
    1465             :                 return false;
    1466             :         return true;
    1467             : }
    1468             : 
    1469             : static inline int
    1470    26615521 : sql_get_next_token(YYSTYPE *yylval, void *parm)
    1471             : {
    1472    26615521 :         mvc *c = (mvc*)parm;
    1473    26615521 :         struct scanner *lc = &c->scanner;
    1474    26615521 :         int token = 0, cur = 0;
    1475             : 
    1476    26615521 :         if (lc->rs->buf == NULL) /* malloc failure */
    1477             :                 return EOF;
    1478             : 
    1479    26615521 :         if (lc->yynext) {
    1480       60349 :                 int next = lc->yynext;
    1481             : 
    1482       60349 :                 lc->yynext = 0;
    1483       60349 :                 return(next);
    1484             :         }
    1485             : 
    1486    26555172 :         if (lc->yybak) {
    1487    25575441 :                 lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
    1488    25575441 :                 lc->yybak = 0;
    1489             :         }
    1490             : 
    1491    26555172 :         lc->yysval = lc->yycur;
    1492    26555172 :         lc->yylast = lc->yyval;
    1493    26555172 :         cur = scanner_getc(lc);
    1494    26554096 :         if (cur < 0)
    1495             :                 return EOF;
    1496    26442789 :         token = tokenize(c, cur);
    1497             : 
    1498    26443309 :         yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
    1499             : 
    1500    26443309 :         if (token == KW_TYPE)
    1501       49328 :                 token = aTYPE;
    1502             : 
    1503    26443309 :         if (token == IDENT || token == COMPARISON ||
    1504    21369801 :             token == RANK || token == aTYPE || token == MARGFUNC) {
    1505     5132410 :                 yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
    1506     5132420 :                 lc->next_string_is_raw = false;
    1507    21310899 :         } else if (token == STRING) {
    1508     1961506 :                 char quote = *yylval->sval;
    1509     1961506 :                 char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
    1510     1961506 :                 char *dst;
    1511             : 
    1512     1961506 :                 assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
    1513             : 
    1514     1961506 :                 lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
    1515     1961506 :                 switch (quote) {
    1516     1267692 :                 case '"':
    1517     1267692 :                         if (valid_ident(yylval->sval+1,str)) {
    1518             :                                 token = IDENT;
    1519             :                         } else {
    1520           0 :                                 sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
    1521           0 :                                 return LEX_ERROR;
    1522             :                         }
    1523             :                         break;
    1524        3706 :                 case 'e':
    1525             :                 case 'E':
    1526        3706 :                         assert(yylval->sval[1] == '\'');
    1527        3706 :                         if (GDKstrFromStr((unsigned char *) str,
    1528             :                                                           (unsigned char *) yylval->sval + 2,
    1529        3706 :                                                           lc->yycur-lc->yysval - 2, '\'') < 0) {
    1530           1 :                                 char *err = GDKerrbuf;
    1531           1 :                                 if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
    1532           1 :                                         err += strlen(GDKERROR);
    1533           0 :                                 else if (*err == '!')
    1534           0 :                                         err++;
    1535           1 :                                 sql_error(c, 1, SQLSTATE(42000) "%s", err);
    1536           1 :                                 return LEX_ERROR;
    1537             :                         }
    1538             :                         quote = '\'';
    1539             :                         break;
    1540          17 :                 case 'u':
    1541             :                 case 'U':
    1542          17 :                         assert(yylval->sval[1] == '&');
    1543          17 :                         assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
    1544          17 :                         strcpy(str, yylval->sval + 3);
    1545          17 :                         token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
    1546          17 :                         quote = yylval->sval[2];
    1547          17 :                         lc->next_string_is_raw = true;
    1548          17 :                         break;
    1549           1 :                 case 'x':
    1550             :                 case 'X':
    1551           1 :                         assert(yylval->sval[1] == '\'');
    1552           1 :                         dst = str;
    1553           5 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1554           4 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1555           0 :                                         src++;
    1556           1 :                         *dst = 0;
    1557           1 :                         quote = '\'';
    1558           1 :                         token = XSTRING;
    1559           1 :                         lc->next_string_is_raw = true;
    1560           1 :                         break;
    1561        3268 :                 case 'r':
    1562             :                 case 'R':
    1563        3268 :                         assert(yylval->sval[1] == '\'');
    1564        3268 :                         dst = str;
    1565      449799 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1566      446531 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1567        2732 :                                         src++;
    1568        3268 :                         quote = '\'';
    1569        3268 :                         *dst = 0;
    1570        3268 :                         break;
    1571      686822 :                 default:
    1572      686822 :                         if (lc->raw_string_mode || lc->next_string_is_raw) {
    1573          46 :                                 dst = str;
    1574         436 :                                 for (char *src = yylval->sval + 1; *src; dst++)
    1575         390 :                                         if ((*dst = *src++) == '\'' && *src == '\'')
    1576           1 :                                                 src++;
    1577          46 :                                 *dst = 0;
    1578             :                         } else {
    1579      686776 :                                 if (GDKstrFromStr((unsigned char *)str,
    1580      686776 :                                                                   (unsigned char *)yylval->sval + 1,
    1581      686776 :                                                                   lc->yycur - lc->yysval - 1,
    1582             :                                                                   '\'') < 0) {
    1583           1 :                                         sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
    1584           1 :                                         return LEX_ERROR;
    1585             :                                 }
    1586             :                         }
    1587             :                         break;
    1588             :                 }
    1589     1961504 :                 yylval->sval = str;
    1590             : 
    1591             :                 /* reset original */
    1592     1961504 :                 lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
    1593             :         } else {
    1594    19349393 :                 lc->next_string_is_raw = false;
    1595             :         }
    1596             : 
    1597             :         return(token);
    1598             : }
    1599             : 
    1600             : static int scanner( YYSTYPE *yylval, void *m, bool log);
    1601             : 
    1602             : static int
    1603    26483797 : scanner(YYSTYPE * yylval, void *parm, bool log)
    1604             : {
    1605    26483797 :         int token;
    1606    26483797 :         mvc *c = (mvc *) parm;
    1607    26483797 :         struct scanner *lc = &c->scanner;
    1608    26483797 :         size_t pos;
    1609             : 
    1610             :         /* store position for when view's query ends */
    1611    26483797 :         pos = lc->rs->pos + lc->yycur;
    1612             : 
    1613    26483797 :         token = sql_get_next_token(yylval, parm);
    1614             : 
    1615    26482398 :         if (token == NOT) {
    1616       73568 :                 int next = scanner(yylval, parm, false);
    1617             : 
    1618       73568 :                 if (next == NOT) {
    1619           2 :                         return scanner(yylval, parm, false);
    1620             :                 } else if (next == EXISTS) {
    1621             :                         token = NOT_EXISTS;
    1622             :                 } else if (next == BETWEEN) {
    1623             :                         token = NOT_BETWEEN;
    1624             :                 } else if (next == sqlIN) {
    1625             :                         token = NOT_IN;
    1626             :                 } else if (next == LIKE) {
    1627             :                         token = NOT_LIKE;
    1628             :                 } else if (next == ILIKE) {
    1629             :                         token = NOT_ILIKE;
    1630             :                 } else {
    1631       60349 :                         lc->yynext = next;
    1632             :                 }
    1633    26408830 :         } else if (token == SCOLON) {
    1634             :                 /* ignore semi-colon(s) following a semi-colon */
    1635      942556 :                 if (lc->yylast == SCOLON) {
    1636      131999 :                         size_t prev = lc->yycur;
    1637      132000 :                         while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
    1638           1 :                                 prev = lc->yycur;
    1639             : 
    1640             :                         /* skip the skipped stuff also in the buffer */
    1641      131999 :                         lc->rs->pos += prev;
    1642      131999 :                         lc->yycur -= prev;
    1643             :                 }
    1644             :         }
    1645             : 
    1646    26482396 :         if (lc->log && log)
    1647           0 :                 mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
    1648             : 
    1649    26482396 :         lc->started += (token != EOF);
    1650    26482396 :         return token;
    1651             : }
    1652             : 
    1653             : /* also see sql_parser.y */
    1654             : extern int sqllex(YYSTYPE * yylval, void *parm);
    1655             : 
    1656             : int
    1657    26410633 : sqllex(YYSTYPE * yylval, void *parm)
    1658             : {
    1659    26410633 :         return scanner(yylval, parm, true);
    1660             : }

Generated by: LCOV version 1.14