LCOV - code coverage report
Current view: top level - sql/server - sql_scan.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1147 1222 93.9 %
Date: 2025-03-24 21:28:01 Functions: 27 27 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include <wctype.h>
      15             : #include "sql_mem.h"
      16             : #include "sql_scan.h"
      17             : #include "sql_types.h"
      18             : #include "sql_symbol.h"
      19             : #include "sql_mvc.h"
      20             : #include "sql_parser.tab.h"
      21             : #include "sql_semantic.h"
      22             : #include "sql_parser.h"               /* for sql_error() */
      23             : 
      24             : #include "stream.h"
      25             : #include "mapi_prompt.h"
      26             : #include <unistd.h>
      27             : #include <string.h>
      28             : #include <ctype.h>
      29             : #include "sql_keyword.h"
      30             : 
      31             : static char *
      32          17 : uescape_xform(char *restrict s, const char *restrict esc)
      33             : {
      34          17 :         size_t i, j;
      35             : 
      36          60 :         for (i = j = 0; s[i]; i++) {
      37          43 :                 if (s[i] == *esc) {
      38          43 :                         if (s[i + 1] == *esc) {
      39           0 :                                 s[j++] = *esc;
      40           0 :                                 i++;
      41             :                         } else {
      42          43 :                                 int c = 0;
      43          43 :                                 int n;
      44          43 :                                 if (s[i + 1] == '+') {
      45          26 :                                         n = 6;
      46          26 :                                         i++;
      47             :                                 } else {
      48             :                                         n = 4;
      49             :                                 }
      50         224 :                                 do {
      51         224 :                                         i++;
      52         224 :                                         c <<= 4;
      53         224 :                                         if ('0' <= s[i] && s[i] <= '9')
      54         170 :                                                 c |= s[i] - '0';
      55          54 :                                         else if ('a' <= s[i] && s[i] <= 'f')
      56          24 :                                                 c |= s[i] - 'a' + 10;
      57          30 :                                         else if ('A' <= s[i] && s[i] <= 'F')
      58          30 :                                                 c |= s[i] - 'A' + 10;
      59             :                                         else
      60             :                                                 return NULL;
      61         224 :                                 } while (--n > 0);
      62          43 :                                 if (c == 0 || c > 0x10FFFF || (c & 0xFFF800) == 0xD800)
      63             :                                         return NULL;
      64          43 :                                 if (c < 0x80) {
      65          14 :                                         s[j++] = c;
      66             :                                 } else {
      67          29 :                                         if (c < 0x800) {
      68           5 :                                                 s[j++] = 0xC0 | (c >> 6);
      69             :                                         } else {
      70          24 :                                                 if (c < 0x10000) {
      71          12 :                                                         s[j++] = 0xE0 | (c >> 12);
      72             :                                                 } else {
      73          12 :                                                         s[j++] = 0xF0 | (c >> 18);
      74          12 :                                                         s[j++] = 0x80 | ((c >> 12) & 0x3F);
      75             :                                                 }
      76          24 :                                                 s[j++] = 0x80 | ((c >> 6) & 0x3F);
      77             :                                         }
      78          29 :                                         s[j++] = 0x80 | (c & 0x3F);
      79             :                                 }
      80             :                         }
      81             :                 } else {
      82           0 :                         s[j++] = s[i];
      83             :                 }
      84             :         }
      85          17 :         s[j] = 0;
      86          17 :         return s;
      87             : }
      88             : 
      89             : /**
      90             :  * Removes all comments before the query. In query comments are kept.
      91             :  */
      92             : char *
      93      441454 : query_cleaned(allocator *sa, const char *query)
      94             : {
      95      441454 :         char *q, *r, *c = NULL;
      96      441454 :         int lines = 0;
      97      441454 :         int quote = 0;          /* inside quotes ('..', "..", {..}) */
      98      441454 :         bool bs = false;                /* seen a backslash in a quoted string */
      99      441454 :         bool incomment1 = false;        /* inside traditional C style comment */
     100      441454 :         bool incomment2 = false;        /* inside comment starting with --  */
     101      441454 :         bool inline_comment = false;
     102             : 
     103      441454 :         r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
     104      441582 :         if(!r)
     105             :                 return NULL;
     106             : 
     107    70208981 :         for (q = r; *query; query++) {
     108    69767399 :                 if (incomment1) {
     109       16396 :                         if (*query == '/' && query[-1] == '*') {
     110         237 :                                 incomment1 = false;
     111         237 :                                 if (c == r && lines > 0) {
     112         229 :                                         q = r; // reset to beginning
     113         229 :                                         lines = 0;
     114         229 :                                         continue;
     115             :                                 }
     116             :                         }
     117       16167 :                         if (*query == '\n') lines++;
     118       16167 :                         *q++ = *query;
     119    69751003 :                 } else if (incomment2) {
     120      831313 :                         if (*query == '\n') {
     121        2914 :                                 incomment2 = false;
     122        2914 :                                 inline_comment = false;
     123             :                                 /* add newline only if comment doesn't
     124             :                                  * occupy whole line */
     125        2914 :                                 if (q > r && q[-1] != '\n'){
     126         998 :                                         *q++ = '\n';
     127         998 :                                         lines++;
     128             :                                 }
     129      828399 :                         } else if (inline_comment){
     130       23850 :                                 *q++ = *query; // preserve in line query comments
     131             :                         }
     132    68919690 :                 } else if (quote) {
     133    22407157 :                         if (bs) {
     134             :                                 bs = false;
     135    22403845 :                         } else if (*query == '\\') {
     136             :                                 bs = true;
     137    22400533 :                         } else if (*query == quote) {
     138      691562 :                                 quote = 0;
     139             :                         }
     140    22407157 :                         *q++ = *query;
     141    46512533 :                 } else if (*query == '"' || *query == '\'') {
     142      691094 :                         quote = *query;
     143      691094 :                         *q++ = *query;
     144    45821439 :                 } else if (*query == '{') {
     145         513 :                         quote = '}';
     146         513 :                         *q++ = *query;
     147    45820926 :                 } else if (*query == '-' && query[1] == '-') {
     148        2914 :                         if (q > r && q[-1] != '\n') {
     149         998 :                                 inline_comment = true;
     150         998 :                                 *q++ = *query; // preserve in line query comments
     151             :                         }
     152             :                         incomment2 = true;
     153    45818012 :                 } else if (*query == '/' && query[1] == '*') {
     154         237 :                         incomment1 = true;
     155         237 :                         c = q;
     156         237 :                         *q++ = *query;
     157    45817775 :                 } else if (*query == '\n') {
     158             :                         /* collapse newlines */
     159      910081 :                         if (q > r && q[-1] != '\n') {
     160      868197 :                                 *q++ = '\n';
     161      868197 :                                 lines++;
     162             :                         }
     163    44907694 :                 } else if (*query == ' ' || *query == '\t') {
     164             :                         /* collapse white space */
     165     7222435 :                         if (q > r && q[-1] != ' ')
     166     5737086 :                                 *q++ = ' ';
     167             :                 } else {
     168    37685259 :                         *q++ = *query;
     169             :                 }
     170             :         }
     171      441582 :         *q = 0;
     172      441582 :         return r;
     173             : }
     174             : 
     175             : int
     176         358 : scanner_init_keywords(void)
     177             : {
     178         358 :         int failed = 0;
     179             : 
     180         358 :         failed += keywords_insert("false", BOOL_FALSE);
     181         358 :         failed += keywords_insert("true", BOOL_TRUE);
     182         358 :         failed += keywords_insert("bool", sqlBOOL);
     183             : 
     184         358 :         failed += keywords_insert("ALTER", ALTER);
     185         358 :         failed += keywords_insert("ADD", ADD);
     186         358 :         failed += keywords_insert("AND", AND);
     187             : 
     188         358 :         failed += keywords_insert("RANK", RANK);
     189         358 :         failed += keywords_insert("DENSE_RANK", RANK);
     190         358 :         failed += keywords_insert("PERCENT_RANK", RANK);
     191         358 :         failed += keywords_insert("CUME_DIST", RANK);
     192         358 :         failed += keywords_insert("ROW_NUMBER", RANK);
     193         358 :         failed += keywords_insert("NTILE", RANK);
     194         358 :         failed += keywords_insert("LAG", RANK);
     195         358 :         failed += keywords_insert("LEAD", RANK);
     196         358 :         failed += keywords_insert("FETCH", FETCH);
     197         358 :         failed += keywords_insert("FIRST_VALUE", RANK);
     198         358 :         failed += keywords_insert("LAST_VALUE", RANK);
     199         358 :         failed += keywords_insert("NTH_VALUE", RANK);
     200             : 
     201         358 :         failed += keywords_insert("BEST", BEST);
     202         358 :         failed += keywords_insert("EFFORT", EFFORT);
     203             : 
     204         358 :         failed += keywords_insert("AS", AS);
     205         358 :         failed += keywords_insert("ASC", ASC);
     206         358 :         failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
     207         358 :         failed += keywords_insert("BETWEEN", BETWEEN);
     208         358 :         failed += keywords_insert("SYMMETRIC", SYMMETRIC);
     209         358 :         failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
     210         358 :         failed += keywords_insert("BY", BY);
     211         358 :         failed += keywords_insert("CAST", CAST);
     212         358 :         failed += keywords_insert("CONVERT", CONVERT);
     213         358 :         failed += keywords_insert("CHARACTER", CHARACTER);
     214         358 :         failed += keywords_insert("CHAR", CHARACTER);
     215         358 :         failed += keywords_insert("VARYING", VARYING);
     216         358 :         failed += keywords_insert("VARCHAR", VARCHAR);
     217         358 :         failed += keywords_insert("BINARY", BINARY);
     218         358 :         failed += keywords_insert("LARGE", LARGE);
     219         358 :         failed += keywords_insert("OBJECT", OBJECT);
     220         358 :         failed += keywords_insert("CLOB", CLOB);
     221         358 :         failed += keywords_insert("BLOB", sqlBLOB);
     222         358 :         failed += keywords_insert("TEXT", sqlTEXT);
     223         358 :         failed += keywords_insert("TINYTEXT", sqlTEXT);
     224         358 :         failed += keywords_insert("STRING", CLOB);    /* ? */
     225         358 :         failed += keywords_insert("CHECK", CHECK);
     226         358 :         failed += keywords_insert("CLIENT", CLIENT);
     227         358 :         failed += keywords_insert("SERVER", SERVER);
     228         358 :         failed += keywords_insert("COMMENT", COMMENT);
     229         358 :         failed += keywords_insert("CONSTRAINT", CONSTRAINT);
     230         358 :         failed += keywords_insert("CREATE", CREATE);
     231         358 :         failed += keywords_insert("CROSS", CROSS);
     232         358 :         failed += keywords_insert("COPY", COPY);
     233         358 :         failed += keywords_insert("RECORDS", RECORDS);
     234         358 :         failed += keywords_insert("DELIMITERS", DELIMITERS);
     235         358 :         failed += keywords_insert("STDIN", STDIN);
     236         358 :         failed += keywords_insert("STDOUT", STDOUT);
     237             : 
     238         358 :         failed += keywords_insert("TINYINT", TINYINT);
     239         358 :         failed += keywords_insert("SMALLINT", SMALLINT);
     240         358 :         failed += keywords_insert("INTEGER", sqlINTEGER);
     241         358 :         failed += keywords_insert("INT", sqlINTEGER);
     242         358 :         failed += keywords_insert("MEDIUMINT", sqlINTEGER);
     243         358 :         failed += keywords_insert("BIGINT", BIGINT);
     244             : #ifdef HAVE_HGE
     245         358 :         failed += keywords_insert("HUGEINT", HUGEINT);
     246             : #endif
     247         358 :         failed += keywords_insert("DEC", sqlDECIMAL);
     248         358 :         failed += keywords_insert("DECIMAL", sqlDECIMAL);
     249         358 :         failed += keywords_insert("NUMERIC", sqlDECIMAL);
     250         358 :         failed += keywords_insert("DECLARE", DECLARE);
     251         358 :         failed += keywords_insert("DEFAULT", DEFAULT);
     252         358 :         failed += keywords_insert("DESC", DESC);
     253         358 :         failed += keywords_insert("DISTINCT", DISTINCT);
     254         358 :         failed += keywords_insert("DOUBLE", sqlDOUBLE);
     255         358 :         failed += keywords_insert("REAL", sqlREAL);
     256         358 :         failed += keywords_insert("DROP", DROP);
     257         358 :         failed += keywords_insert("ESCAPE", ESCAPE);
     258         358 :         failed += keywords_insert("EXISTS", EXISTS);
     259         358 :         failed += keywords_insert("UESCAPE", UESCAPE);
     260         358 :         failed += keywords_insert("EXTRACT", EXTRACT);
     261         358 :         failed += keywords_insert("FLOAT", sqlFLOAT);
     262         358 :         failed += keywords_insert("FOR", FOR);
     263         358 :         failed += keywords_insert("FOREIGN", FOREIGN);
     264         358 :         failed += keywords_insert("FROM", FROM);
     265         358 :         failed += keywords_insert("FWF", FWF);
     266             : 
     267         358 :         failed += keywords_insert("BIG", BIG);
     268         358 :         failed += keywords_insert("LITTLE", LITTLE);
     269         358 :         failed += keywords_insert("NATIVE", NATIVE);
     270         358 :         failed += keywords_insert("ENDIAN", ENDIAN);
     271             : 
     272         358 :         failed += keywords_insert("REFERENCES", REFERENCES);
     273             : 
     274         358 :         failed += keywords_insert("MATCH", MATCH);
     275         358 :         failed += keywords_insert("FULL", FULL);
     276         358 :         failed += keywords_insert("PARTIAL", PARTIAL);
     277         358 :         failed += keywords_insert("SIMPLE", SIMPLE);
     278             : 
     279         358 :         failed += keywords_insert("INSERT", INSERT);
     280         358 :         failed += keywords_insert("UPDATE", UPDATE);
     281         358 :         failed += keywords_insert("DELETE", sqlDELETE);
     282         358 :         failed += keywords_insert("TRUNCATE", TRUNCATE);
     283         358 :         failed += keywords_insert("MATCHED", MATCHED);
     284             : 
     285         358 :         failed += keywords_insert("ACTION", ACTION);
     286         358 :         failed += keywords_insert("CASCADE", CASCADE);
     287         358 :         failed += keywords_insert("RESTRICT", RESTRICT);
     288         358 :         failed += keywords_insert("FIRST", FIRST);
     289         358 :         failed += keywords_insert("GLOBAL", GLOBAL);
     290         358 :         failed += keywords_insert("GROUP", sqlGROUP);
     291         358 :         failed += keywords_insert("GROUPING", GROUPING);
     292         358 :         failed += keywords_insert("ROLLUP", ROLLUP);
     293         358 :         failed += keywords_insert("CUBE", CUBE);
     294         358 :         failed += keywords_insert("HAVING", HAVING);
     295         358 :         failed += keywords_insert("ILIKE", ILIKE);
     296         358 :         failed += keywords_insert("IMPRINTS", IMPRINTS);
     297         358 :         failed += keywords_insert("IN", sqlIN);
     298         358 :         failed += keywords_insert("INNER", INNER);
     299         358 :         failed += keywords_insert("INTO", INTO);
     300         358 :         failed += keywords_insert("IS", IS);
     301         358 :         failed += keywords_insert("JOIN", JOIN);
     302         358 :         failed += keywords_insert("KEY", KEY);
     303         358 :         failed += keywords_insert("LATERAL", LATERAL);
     304         358 :         failed += keywords_insert("LEFT", LEFT);
     305         358 :         failed += keywords_insert("LIKE", LIKE);
     306         358 :         failed += keywords_insert("LIMIT", LIMIT);
     307         358 :         failed += keywords_insert("SAMPLE", SAMPLE);
     308         358 :         failed += keywords_insert("SEED", SEED);
     309         358 :         failed += keywords_insert("LAST", LAST);
     310         358 :         failed += keywords_insert("LOCAL", LOCAL);
     311         358 :         failed += keywords_insert("NATURAL", NATURAL);
     312         358 :         failed += keywords_insert("NOT", NOT);
     313         358 :         failed += keywords_insert("NULL", sqlNULL);
     314         358 :         failed += keywords_insert("NULLS", NULLS);
     315         358 :         failed += keywords_insert("OFFSET", OFFSET);
     316         358 :         failed += keywords_insert("ON", ON);
     317         358 :         failed += keywords_insert("OPTIONS", OPTIONS);
     318         358 :         failed += keywords_insert("OPTION", OPTION);
     319         358 :         failed += keywords_insert("OR", OR);
     320         358 :         failed += keywords_insert("ORDER", ORDER);
     321         358 :         failed += keywords_insert("ORDERED", ORDERED);
     322         358 :         failed += keywords_insert("OUTER", OUTER);
     323         358 :         failed += keywords_insert("OVER", OVER);
     324         358 :         failed += keywords_insert("PARTITION", PARTITION);
     325         358 :         failed += keywords_insert("PATH", PATH);
     326         358 :         failed += keywords_insert("PRECISION", PRECISION);
     327         358 :         failed += keywords_insert("PRIMARY", PRIMARY);
     328             : 
     329         358 :         failed += keywords_insert("USER", USER);
     330         358 :         failed += keywords_insert("RENAME", RENAME);
     331         358 :         failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
     332         358 :         failed += keywords_insert("ENCRYPTED", ENCRYPTED);
     333         358 :         failed += keywords_insert("PASSWORD", PASSWORD);
     334         358 :         failed += keywords_insert("GRANT", GRANT);
     335         358 :         failed += keywords_insert("REVOKE", REVOKE);
     336         358 :         failed += keywords_insert("ROLE", ROLE);
     337         358 :         failed += keywords_insert("ADMIN", ADMIN);
     338         358 :         failed += keywords_insert("PRIVILEGES", PRIVILEGES);
     339         358 :         failed += keywords_insert("PUBLIC", PUBLIC);
     340         358 :         failed += keywords_insert("CURRENT_USER", CURRENT_USER);
     341         358 :         failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
     342         358 :         failed += keywords_insert("SESSION_USER", SESSION_USER);
     343         358 :         failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
     344         358 :         failed += keywords_insert("SESSION", sqlSESSION);
     345         358 :         failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
     346         358 :         failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
     347         358 :         failed += keywords_insert("OPTIMIZER", OPTIMIZER);
     348             : 
     349         358 :         failed += keywords_insert("RIGHT", RIGHT);
     350         358 :         failed += keywords_insert("SCHEMA", SCHEMA);
     351         358 :         failed += keywords_insert("SELECT", SELECT);
     352         358 :         failed += keywords_insert("SET", SET);
     353         358 :         failed += keywords_insert("SETS", SETS);
     354         358 :         failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
     355             : 
     356         358 :         failed += keywords_insert("ALL", ALL);
     357         358 :         failed += keywords_insert("ANY", ANY);
     358         358 :         failed += keywords_insert("SOME", SOME);
     359         358 :         failed += keywords_insert("EVERY", ANY);
     360             :         /*
     361             :            failed += keywords_insert("SQLCODE", SQLCODE );
     362             :          */
     363         358 :         failed += keywords_insert("COLUMN", COLUMN);
     364         358 :         failed += keywords_insert("TABLE", TABLE);
     365         358 :         failed += keywords_insert("TEMPORARY", TEMPORARY);
     366         358 :         failed += keywords_insert("TEMP", TEMP);
     367         358 :         failed += keywords_insert("REMOTE", REMOTE);
     368         358 :         failed += keywords_insert("MERGE", MERGE);
     369         358 :         failed += keywords_insert("REPLICA", REPLICA);
     370         358 :         failed += keywords_insert("UNLOGGED", UNLOGGED);
     371         358 :         failed += keywords_insert("TO", TO);
     372         358 :         failed += keywords_insert("UNION", UNION);
     373         358 :         failed += keywords_insert("EXCEPT", EXCEPT);
     374         358 :         failed += keywords_insert("INTERSECT", INTERSECT);
     375         358 :         failed += keywords_insert("CORRESPONDING", CORRESPONDING);
     376         358 :         failed += keywords_insert("UNIQUE", UNIQUE);
     377         358 :         failed += keywords_insert("USING", USING);
     378         358 :         failed += keywords_insert("VALUES", VALUES);
     379         358 :         failed += keywords_insert("VIEW", VIEW);
     380         358 :         failed += keywords_insert("WHERE", WHERE);
     381         358 :         failed += keywords_insert("WITH", WITH);
     382         358 :         failed += keywords_insert("WITHIN", WITHIN);
     383         358 :         failed += keywords_insert("WITHOUT", WITHOUT);
     384         358 :         failed += keywords_insert("DATA", DATA);
     385             : 
     386         358 :         failed += keywords_insert("DATE", sqlDATE);
     387         358 :         failed += keywords_insert("TIME", TIME);
     388         358 :         failed += keywords_insert("TIMESTAMP", TIMESTAMP);
     389         358 :         failed += keywords_insert("INTERVAL", INTERVAL);
     390         358 :         failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
     391         358 :         failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
     392         358 :         failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
     393         358 :         failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
     394         358 :         failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
     395         358 :         failed += keywords_insert("LOCALTIME", LOCALTIME);
     396         358 :         failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
     397         358 :         failed += keywords_insert("ZONE", ZONE);
     398             : 
     399         358 :         failed += keywords_insert("CENTURY", CENTURY);
     400         358 :         failed += keywords_insert("DECADE", DECADE);
     401         358 :         failed += keywords_insert("YEAR", YEAR);
     402         358 :         failed += keywords_insert("QUARTER", QUARTER);
     403         358 :         failed += keywords_insert("MONTH", MONTH);
     404         358 :         failed += keywords_insert("WEEK", WEEK);
     405         358 :         failed += keywords_insert("DOW", DOW);
     406         358 :         failed += keywords_insert("DOY", DOY);
     407         358 :         failed += keywords_insert("DAY", DAY);
     408         358 :         failed += keywords_insert("HOUR", HOUR);
     409         358 :         failed += keywords_insert("MINUTE", MINUTE);
     410         358 :         failed += keywords_insert("SECOND", SECOND);
     411         358 :         failed += keywords_insert("EPOCH", EPOCH);
     412             : 
     413         358 :         failed += keywords_insert("POSITION", POSITION);
     414         358 :         failed += keywords_insert("SUBSTRING", SUBSTRING);
     415         358 :         failed += keywords_insert("SPLIT_PART", SPLIT_PART);
     416         358 :         failed += keywords_insert("TRIM", TRIM);
     417         358 :         failed += keywords_insert("LEADING", LEADING);
     418         358 :         failed += keywords_insert("TRAILING", TRAILING);
     419         358 :         failed += keywords_insert("BOTH", BOTH);
     420             : 
     421         358 :         failed += keywords_insert("CASE", CASE);
     422         358 :         failed += keywords_insert("WHEN", WHEN);
     423         358 :         failed += keywords_insert("THEN", THEN);
     424         358 :         failed += keywords_insert("ELSE", ELSE);
     425         358 :         failed += keywords_insert("END", END);
     426         358 :         failed += keywords_insert("NULLIF", NULLIF);
     427         358 :         failed += keywords_insert("COALESCE", COALESCE);
     428         358 :         failed += keywords_insert("ELSEIF", ELSEIF);
     429         358 :         failed += keywords_insert("IF", IF);
     430         358 :         failed += keywords_insert("WHILE", WHILE);
     431         358 :         failed += keywords_insert("DO", DO);
     432             : 
     433         358 :         failed += keywords_insert("COMMIT", COMMIT);
     434         358 :         failed += keywords_insert("ROLLBACK", ROLLBACK);
     435         358 :         failed += keywords_insert("SAVEPOINT", SAVEPOINT);
     436         358 :         failed += keywords_insert("RELEASE", RELEASE);
     437         358 :         failed += keywords_insert("WORK", WORK);
     438         358 :         failed += keywords_insert("CHAIN", CHAIN);
     439         358 :         failed += keywords_insert("PRESERVE", PRESERVE);
     440         358 :         failed += keywords_insert("ROWS", ROWS);
     441         358 :         failed += keywords_insert("NO", NO);
     442         358 :         failed += keywords_insert("START", START);
     443         358 :         failed += keywords_insert("TRANSACTION", TRANSACTION);
     444         358 :         failed += keywords_insert("READ", READ);
     445         358 :         failed += keywords_insert("WRITE", WRITE);
     446         358 :         failed += keywords_insert("ONLY", ONLY);
     447         358 :         failed += keywords_insert("ISOLATION", ISOLATION);
     448         358 :         failed += keywords_insert("LEVEL", LEVEL);
     449         358 :         failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
     450         358 :         failed += keywords_insert("COMMITTED", COMMITTED);
     451         358 :         failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
     452         358 :         failed += keywords_insert("SNAPSHOT", SNAPSHOT);
     453         358 :         failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
     454         358 :         failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
     455         358 :         failed += keywords_insert("SIZE", sqlSIZE);
     456         358 :         failed += keywords_insert("STORAGE", STORAGE);
     457             : 
     458         358 :         failed += keywords_insert("TYPE", TYPE);
     459         358 :         failed += keywords_insert("PROCEDURE", PROCEDURE);
     460         358 :         failed += keywords_insert("FUNCTION", FUNCTION);
     461         358 :         failed += keywords_insert("LOADER", sqlLOADER);
     462         358 :         failed += keywords_insert("REPLACE", REPLACE);
     463             : 
     464             :         //failed += keywords_insert("FIELD", FIELD);
     465         358 :         failed += keywords_insert("FILTER", FILTER);
     466         358 :         failed += keywords_insert("AGGREGATE", AGGREGATE);
     467         358 :         failed += keywords_insert("RETURNS", RETURNS);
     468         358 :         failed += keywords_insert("EXTERNAL", EXTERNAL);
     469         358 :         failed += keywords_insert("NAME", sqlNAME);
     470         358 :         failed += keywords_insert("RETURN", RETURN);
     471         358 :         failed += keywords_insert("CALL", CALL);
     472         358 :         failed += keywords_insert("LANGUAGE", LANGUAGE);
     473             : 
     474         358 :         failed += keywords_insert("ANALYZE", ANALYZE);
     475         358 :         failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
     476         358 :         failed += keywords_insert("PLAN", SQL_PLAN);
     477         358 :         failed += keywords_insert("TRACE", SQL_TRACE);
     478         358 :         failed += keywords_insert("PREPARE", PREPARE);
     479         358 :         failed += keywords_insert("PREP", PREP);
     480         358 :         failed += keywords_insert("EXECUTE", EXECUTE);
     481         358 :         failed += keywords_insert("EXEC", EXEC);
     482         358 :         failed += keywords_insert("DEALLOCATE", DEALLOCATE);
     483             : 
     484         358 :         failed += keywords_insert("INDEX", INDEX);
     485             : 
     486         358 :         failed += keywords_insert("SEQUENCE", SEQUENCE);
     487         358 :         failed += keywords_insert("RESTART", RESTART);
     488         358 :         failed += keywords_insert("INCREMENT", INCREMENT);
     489         358 :         failed += keywords_insert("MAXVALUE", MAXVALUE);
     490         358 :         failed += keywords_insert("MINVALUE", MINVALUE);
     491         358 :         failed += keywords_insert("CYCLE", CYCLE);
     492         358 :         failed += keywords_insert("CACHE", CACHE);
     493         358 :         failed += keywords_insert("NEXT", NEXT);
     494         358 :         failed += keywords_insert("VALUE", VALUE);
     495         358 :         failed += keywords_insert("GENERATED", GENERATED);
     496         358 :         failed += keywords_insert("ALWAYS", ALWAYS);
     497         358 :         failed += keywords_insert("IDENTITY", IDENTITY);
     498         358 :         failed += keywords_insert("SERIAL", SERIAL);
     499         358 :         failed += keywords_insert("BIGSERIAL", BIGSERIAL);
     500         358 :         failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
     501         358 :         failed += keywords_insert("CONTINUE", CONTINUE);
     502             : 
     503         358 :         failed += keywords_insert("TRIGGER", TRIGGER);
     504         358 :         failed += keywords_insert("ATOMIC", ATOMIC);
     505         358 :         failed += keywords_insert("BEGIN", BEGIN);
     506         358 :         failed += keywords_insert("OF", OF);
     507         358 :         failed += keywords_insert("BEFORE", BEFORE);
     508         358 :         failed += keywords_insert("AFTER", AFTER);
     509         358 :         failed += keywords_insert("ROW", ROW);
     510         358 :         failed += keywords_insert("STATEMENT", STATEMENT);
     511         358 :         failed += keywords_insert("NEW", sqlNEW);
     512         358 :         failed += keywords_insert("OLD", OLD);
     513         358 :         failed += keywords_insert("EACH", EACH);
     514         358 :         failed += keywords_insert("REFERENCING", REFERENCING);
     515             : 
     516         358 :         failed += keywords_insert("RANGE", RANGE);
     517         358 :         failed += keywords_insert("UNBOUNDED", UNBOUNDED);
     518         358 :         failed += keywords_insert("PRECEDING", PRECEDING);
     519         358 :         failed += keywords_insert("FOLLOWING", FOLLOWING);
     520         358 :         failed += keywords_insert("CURRENT", CURRENT);
     521         358 :         failed += keywords_insert("EXCLUDE", EXCLUDE);
     522         358 :         failed += keywords_insert("OTHERS", OTHERS);
     523         358 :         failed += keywords_insert("TIES", TIES);
     524         358 :         failed += keywords_insert("GROUPS", GROUPS);
     525         358 :         failed += keywords_insert("WINDOW", WINDOW);
     526         358 :         failed += keywords_insert("QUALIFY", QUALIFY);
     527             : 
     528             :         /* special SQL/XML keywords */
     529         358 :         failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
     530         358 :         failed += keywords_insert("XMLCONCAT", XMLCONCAT);
     531         358 :         failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
     532         358 :         failed += keywords_insert("XMLELEMENT", XMLELEMENT);
     533         358 :         failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
     534         358 :         failed += keywords_insert("XMLFOREST", XMLFOREST);
     535         358 :         failed += keywords_insert("XMLPARSE", XMLPARSE);
     536         358 :         failed += keywords_insert("STRIP", STRIP);
     537         358 :         failed += keywords_insert("WHITESPACE", WHITESPACE);
     538         358 :         failed += keywords_insert("XMLPI", XMLPI);
     539         358 :         failed += keywords_insert("XMLQUERY", XMLQUERY);
     540         358 :         failed += keywords_insert("PASSING", PASSING);
     541         358 :         failed += keywords_insert("XMLTEXT", XMLTEXT);
     542         358 :         failed += keywords_insert("NIL", NIL);
     543         358 :         failed += keywords_insert("REF", REF);
     544         358 :         failed += keywords_insert("ABSENT", ABSENT);
     545         358 :         failed += keywords_insert("DOCUMENT", DOCUMENT);
     546         358 :         failed += keywords_insert("ELEMENT", ELEMENT);
     547         358 :         failed += keywords_insert("CONTENT", CONTENT);
     548         358 :         failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
     549         358 :         failed += keywords_insert("NAMESPACE", NAMESPACE);
     550         358 :         failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
     551         358 :         failed += keywords_insert("RETURNING", RETURNING);
     552         358 :         failed += keywords_insert("RECURSIVE", RECURSIVE);
     553         358 :         failed += keywords_insert("LOCATION", LOCATION);
     554         358 :         failed += keywords_insert("ID", ID);
     555         358 :         failed += keywords_insert("ACCORDING", ACCORDING);
     556         358 :         failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
     557         358 :         failed += keywords_insert("URI", URI);
     558         358 :         failed += keywords_insert("XMLAGG", XMLAGG);
     559             : 
     560             :         /* keywords for opengis */
     561         358 :         failed += keywords_insert("GEOMETRY", GEOMETRY);
     562             : 
     563         358 :         failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
     564         358 :         failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
     565         358 :         failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
     566         358 :         failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
     567         358 :         failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
     568         358 :         failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
     569         358 :         failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
     570             : 
     571         358 :         failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
     572         358 :         failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
     573         358 :         failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
     574         358 :         failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
     575         358 :         failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
     576         358 :         failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
     577         358 :         failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
     578             : 
     579         358 :         failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
     580         358 :         failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
     581         358 :         failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
     582         358 :         failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
     583         358 :         failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
     584         358 :         failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
     585         358 :         failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
     586             : 
     587         358 :         failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
     588         358 :         failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
     589         358 :         failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
     590         358 :         failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
     591         358 :         failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
     592         358 :         failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
     593         358 :         failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
     594         358 :         failed += keywords_insert("LOGIN", LOGIN);
     595             :         // odbc keywords
     596         358 :         failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
     597         358 :         failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
     598         358 :         failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
     599         358 :         failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
     600         358 :         failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
     601         358 :         failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
     602         358 :         failed += keywords_insert("DAYNAME", DAYNAME);
     603         358 :         failed += keywords_insert("IFNULL", IFNULL);
     604         358 :         failed += keywords_insert("MONTHNAME", MONTHNAME);
     605         358 :         failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
     606         358 :         failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
     607         358 :         failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
     608         358 :         failed += keywords_insert("SQL_BINARY", SQL_BINARY);
     609         358 :         failed += keywords_insert("SQL_BIT", SQL_BIT);
     610         358 :         failed += keywords_insert("SQL_CHAR", SQL_CHAR);
     611         358 :         failed += keywords_insert("SQL_DATE", SQL_DATE);
     612         358 :         failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
     613         358 :         failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
     614         358 :         failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
     615         358 :         failed += keywords_insert("SQL_GUID", SQL_GUID);
     616         358 :         failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
     617         358 :         failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
     618         358 :         failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
     619         358 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
     620         358 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
     621         358 :         failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
     622         358 :         failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
     623         358 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
     624         358 :         failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
     625         358 :         failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
     626         358 :         failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
     627         358 :         failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
     628         358 :         failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
     629         358 :         failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
     630         358 :         failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
     631         358 :         failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
     632         358 :         failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
     633         358 :         failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
     634         358 :         failed += keywords_insert("SQL_REAL", SQL_REAL);
     635         358 :         failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
     636         358 :         failed += keywords_insert("SQL_TIME", SQL_TIME);
     637         358 :         failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
     638         358 :         failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
     639         358 :         failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
     640         358 :         failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
     641         358 :         failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
     642         358 :         failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
     643         358 :         failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
     644         358 :         failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
     645         358 :         failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
     646         358 :         failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
     647         358 :         failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
     648         358 :         failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
     649         358 :         failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
     650         358 :         failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
     651         358 :         failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
     652         358 :         failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
     653             : 
     654         358 :         failed += keywords_insert("LEAST", MARGFUNC);
     655         358 :         failed += keywords_insert("GREATEST", MARGFUNC);
     656             : 
     657         358 :         failed += keywords_insert("SETOF", SETOF);
     658         358 :         failed += keywords_insert("ARRAY", ARRAY);
     659         358 :         return failed;
     660             : }
     661             : 
     662             : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
     663             : 
     664             : void
     665      252745 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
     666             : {
     667      505490 :         *s = (struct scanner) {
     668             :                 .rs = rs,
     669             :                 .ws = ws,
     670             :                 .mode = LINE_N,
     671      252745 :                 .raw_string_mode = GDKgetenv_istrue("raw_strings"),
     672             :                 .aborted = false,
     673             :         };
     674      252745 : }
     675             : 
     676             : void
     677     1403563 : scanner_query_processed(struct scanner *s)
     678             : {
     679     1403563 :         int cur;
     680             : 
     681     1403563 :         if (s->yybak) {
     682      521021 :                 s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
     683      521021 :                 s->yybak = 0;
     684             :         }
     685     1403563 :         if (s->rs) {
     686     1403563 :                 s->rs->pos += s->yycur;
     687             :                 /* completely eat the query including white space after the ; */
     688     2570023 :                 while (s->rs->pos < s->rs->len &&
     689     2161054 :                            (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
     690     1166460 :                         s->rs->pos++;
     691             :                 }
     692             :         }
     693             :         /*assert(s->rs->pos <= s->rs->len);*/
     694     1403563 :         s->yycur = 0;
     695     1403563 :         s->started = 0;
     696     1403563 :         s->as = 0;
     697     1403563 :         s->schema = NULL;
     698     1403563 :         s->brackets = 0;
     699     1403563 : }
     700             : 
     701             : static int
     702          33 : scanner_error(mvc *lc, int cur)
     703             : {
     704          33 :         switch (cur) {
     705           0 :         case EOF:
     706           0 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
     707           0 :                 return EOF;
     708          33 :         default:
     709             :                 /* on Windows at least, iswcntrl returns TRUE for
     710             :                  * U+FEFF, but we just want consistent error
     711             :                  * messages */
     712          33 :                 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
     713             :         }
     714          33 :         return LEX_ERROR;
     715             : }
     716             : 
     717             : 
     718             : /*
     719             :    UTF-8 encoding is as follows:
     720             : U-00000000 - U-0000007F: 0xxxxxxx
     721             : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
     722             : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
     723             : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     724             : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     725             : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     726             : */
     727             : /* To be correctly coded UTF-8, the sequence should be the shortest
     728             :    possible encoding of the value being encoded.  This means that for
     729             :    an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
     730             :    utf8chkmsk[n] should be non-zero (else the encoding could be
     731             :    shorter).
     732             : */
     733             : static const int utf8chkmsk[] = {
     734             :         0x0000007f,
     735             :         0x00000780,
     736             :         0x0000f800,
     737             :         0x001f0000,
     738             :         0x03e00000,
     739             :         0x7c000000
     740             : };
     741             : 
     742             : static void
     743    32844368 : utf8_putchar(struct scanner *lc, int ch)
     744             : {
     745    32844368 :         if ((ch) < 0x80) {
     746    32844363 :                 lc->yycur--;
     747           5 :         } else if ((ch) < 0x800) {
     748           0 :                 lc->yycur -= 2;
     749           5 :         } else if ((ch) < 0x10000) {
     750           5 :                 lc->yycur -= 3;
     751             :         } else {
     752           0 :                 lc->yycur -= 4;
     753             :         }
     754    32844368 : }
     755             : 
     756             : static inline int
     757   142615254 : scanner_read_more(struct scanner *lc, size_t n)
     758             : {
     759   142615254 :         bstream *b = lc->rs;
     760   142615254 :         bool more = false;
     761             : 
     762             : 
     763   142615254 :         if (lc->aborted)
     764             :                 return EOF;
     765   142618847 :         while (b->len < b->pos + lc->yycur + n) {
     766             : 
     767      126291 :                 if (lc->mode == LINE_1 || !lc->started)
     768             :                         return EOF;
     769             : 
     770             :                 /* query is not finished ask for more */
     771           0 :                 if (b->eof || !isa_block_stream(b->s)) {
     772           0 :                         if (bstream_getoob(b)) {
     773           0 :                                 lc->aborted = true;
     774           0 :                                 return EOF;
     775             :                         }
     776        1800 :                         if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
     777        1800 :                                 mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
     778        1800 :                         b->eof = false;
     779        1800 :                         more = true;
     780             :                 }
     781             :                 /* we need more query text */
     782        3600 :                 if (bstream_next(b) < 0) {
     783           0 :                         if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
     784             :                                 // now what?
     785           0 :                                 lc->errstr = "Query aborted";
     786           0 :                                 lc->aborted = true;
     787           0 :                                 mnstr_clearerr(b->s);
     788             :                         }
     789           0 :                         return EOF;
     790        3600 :                 } else if (/* we asked for more data but didn't get any */
     791        1800 :                            (more && b->eof && b->len < b->pos + lc->yycur + n))
     792             :                         return EOF;
     793        3593 :                 if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
     794           0 :                         lc->errstr = "Query aborted";
     795           0 :                         b->len -= 2;
     796           0 :                         b->buf[b->len] = 0;
     797           0 :                         return EOF;
     798             :                 }
     799             :         }
     800             :         return 1;
     801             : }
     802             : 
     803             : static inline int
     804   141326244 : scanner_getc(struct scanner *lc)
     805             : {
     806   141326244 :         bstream *b = lc->rs;
     807   141326244 :         unsigned char *s = NULL;
     808   141326244 :         int c, m, n, mask;
     809             : 
     810   141326244 :         if (scanner_read_more(lc, 1) == EOF) {
     811             :                 //lc->errstr = SQLSTATE(42000) "end of input stream";
     812             :                 return EOF;
     813             :         }
     814   141213127 :         lc->errstr = NULL;
     815             : 
     816   141213127 :         s = (unsigned char *) b->buf + b->pos + lc->yycur++;
     817   141213127 :         if (((c = *s) & 0x80) == 0) {
     818             :                 /* 7-bit char */
     819             :                 return c;
     820             :         }
     821       88250 :         for (n = 0, m = 0x40; c & m; n++, m >>= 1)
     822             :                 ;
     823             :         /* n now is number of 10xxxxxx bytes that should follow */
     824       29443 :         if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
     825             :                 /* incorrect UTF-8 sequence */
     826             :                 /* n==0: c == 10xxxxxx */
     827             :                 /* n>=6: c == 1111111x */
     828           0 :                 lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
     829           0 :                 goto error;
     830             :         }
     831             : 
     832       29443 :         if (scanner_read_more(lc, (size_t) n) == EOF)
     833             :                 return EOF;
     834       29443 :         s = (unsigned char *) b->buf + b->pos + lc->yycur;
     835             : 
     836       29443 :         mask = utf8chkmsk[n];
     837       29443 :         c &= ~(0xFFC0 >> n);  /* remove non-x bits */
     838       88249 :         while (--n >= 0) {
     839       58807 :                 c <<= 6;
     840       58807 :                 lc->yycur++;
     841       58807 :                 if (((m = *s++) & 0xC0) != 0x80) {
     842             :                         /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
     843             :                         /* this includes end-of-string (m == 0) */
     844           1 :                         lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
     845           1 :                         goto error;
     846             :                 }
     847       58806 :                 c |= m & 0x3F;
     848             :         }
     849       29442 :         if ((c & mask) == 0) {
     850             :                 /* incorrect UTF-8 sequence: not shortest possible */
     851           0 :                 lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
     852           0 :                 goto error;
     853             :         }
     854             : 
     855             :         return c;
     856             : 
     857           1 : error:
     858           1 :         if (b->pos + lc->yycur < b->len)    /* skip bogus char */
     859           0 :                 lc->yycur++;
     860             :         return EOF;
     861             : }
     862             : 
     863             : static int
     864    29324200 : scanner_token(struct scanner *lc, int token)
     865             : {
     866    29324200 :         lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
     867    29324200 :         lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
     868    29324200 :         lc->yyval = token;
     869    29324200 :         return lc->yyval;
     870             : }
     871             : 
     872             : static int
     873     2162976 : scanner_string(mvc *c, int quote, bool escapes)
     874             : {
     875     2162976 :         struct scanner *lc = &c->scanner;
     876     2162976 :         bstream *rs = lc->rs;
     877     2162976 :         int cur = quote;
     878     2162976 :         bool escape = false;
     879     2162976 :         const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
     880             : 
     881     2162976 :         lc->started = 1;
     882     2201125 :         while (cur != EOF) {
     883     2201110 :                 size_t pos = 0;
     884     2201110 :                 const size_t yycur = rs->pos + lc->yycur;
     885             : 
     886    35769415 :                 while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
     887    33568305 :                        (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
     888    67107156 :                        cur && (cur != quote || escape)) {
     889    31367196 :                         if (escapes && cur == '\\')
     890        6840 :                                 escape = !escape;
     891             :                         else
     892             :                                 escape = false;
     893             :                 }
     894     2201110 :                 if (pos == limit) {
     895           0 :                         (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
     896           0 :                         return LEX_ERROR;
     897             :                 }
     898             :                 /* BOM character not allowed as an identifier */
     899     2201110 :                 if (cur == EOF || (quote == '"' && cur == 0xFEFF))
     900           1 :                         return scanner_error(c, cur);
     901     2201109 :                 lc->yycur += pos;
     902             :                 /* check for quote escaped quote: Obscure SQL Rule */
     903     2201109 :                 if (cur == quote && rs->buf[yycur + pos] == quote) {
     904        8708 :                         lc->yycur++;
     905        8708 :                         continue;
     906             :                 }
     907     2192401 :                 assert(yycur + pos <= rs->len + 1);
     908     2192401 :                 if (cur == quote && !escape) {
     909     2162946 :                         return scanner_token(lc, STRING);
     910             :                 }
     911       29455 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     912             :                 /* long utf8, if correct isn't the quote */
     913       29455 :                 if (!cur) {
     914          30 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     915          14 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     916          14 :                                 return LEX_ERROR;
     917             :                         }
     918          16 :                         cur = scanner_read_more(lc, 1);
     919             :                 } else {
     920       29425 :                         cur = scanner_getc(lc);
     921             :                 }
     922             :         }
     923          15 :         (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
     924          15 :         return EOF;
     925             : }
     926             : 
     927             : /* scan a structure {blah} into a string. We only count the matching {}
     928             :  * unless escaped. We do not consider embeddings in string literals yet
     929             :  */
     930             : 
     931             : static int
     932         234 : scanner_body(mvc *c)
     933             : {
     934         234 :         struct scanner *lc = &c->scanner;
     935         234 :         bstream *rs = lc->rs;
     936         234 :         int cur = (int) 'x';
     937         234 :         int blk = 1;
     938         234 :         bool escape = false;
     939             : 
     940         234 :         lc->started = 1;
     941         234 :         assert(rs->buf[rs->pos + lc->yycur-1] == '{');
     942         290 :         while (cur != EOF) {
     943         290 :                 size_t pos = rs->pos + lc->yycur;
     944             : 
     945       32350 :                 while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
     946       32060 :                         if (cur != '\\')
     947             :                                 escape = false;
     948             :                         else
     949          12 :                                 escape = !escape;
     950       32060 :                         blk += cur =='{';
     951       32060 :                         blk -= cur =='}';
     952             :                 }
     953         290 :                 lc->yycur = pos - rs->pos;
     954         290 :                 assert(pos <= rs->len + 1);
     955         290 :                 if (blk == 0 && !escape){
     956         234 :                         lc->yycur--; /* go back to current (possibly invalid) char */
     957         234 :                         return scanner_token(lc, X_BODY);
     958             :                 }
     959          56 :                 lc->yycur--; /* go back to current (possibly invalid) char */
     960          56 :                 if (!cur) {
     961          56 :                         if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
     962           0 :                                 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
     963           0 :                                 return LEX_ERROR;
     964             :                         }
     965          56 :                         cur = scanner_read_more(lc, 1);
     966             :                 } else {
     967           0 :                         cur = scanner_getc(lc);
     968             :                 }
     969             :         }
     970           0 :         (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
     971           0 :         return EOF;
     972             : }
     973             : 
     974             : static int
     975    14069388 : keyword_or_ident(mvc * c, int cur)
     976             : {
     977    14069388 :         struct scanner *lc = &c->scanner;
     978    14069388 :         keyword *k = NULL;
     979    14069388 :         size_t s;
     980             : 
     981    14069388 :         lc->started = 1;
     982    14069388 :         utf8_putchar(lc, cur);
     983    14069386 :         s = lc->yycur;
     984    14069386 :         lc->yyval = IDENT;
     985    84476417 :         while ((cur = scanner_getc(lc)) != EOF) {
     986    84476155 :                 if (!iswalnum(cur) && cur != '_') {
     987    14069124 :                         utf8_putchar(lc, cur);
     988    14069131 :                         (void)scanner_token(lc, IDENT);
     989    14069131 :                         if ((k = find_keyword_bs(lc,s)))
     990     8620159 :                                 lc->yyval = k->token;
     991    14069582 :                         return lc->yyval;
     992             :                 }
     993             :         }
     994             :         if (cur < 0)
     995             :                 return cur;
     996             :         (void)scanner_token(lc, IDENT);
     997             :         if ((k = find_keyword_bs(lc,s)))
     998             :                 lc->yyval = k->token;
     999             :         return lc->yyval;
    1000             : }
    1001             : 
    1002             : static int
    1003    14658214 : skip_white_space(struct scanner * lc)
    1004             : {
    1005    18287317 :         int cur;
    1006             : 
    1007    18287317 :         do {
    1008    18287317 :                 lc->yysval = lc->yycur;
    1009    18287317 :         } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
    1010    14656962 :         return cur;
    1011             : }
    1012             : 
    1013             : static int
    1014       71266 : skip_c_comment(struct scanner * lc)
    1015             : {
    1016       71266 :         int cur;
    1017       71266 :         int prev = 0;
    1018       71266 :         int started = lc->started;
    1019       71266 :         int depth = 1;
    1020             : 
    1021       71266 :         lc->started = 1;
    1022     1437108 :         while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
    1023     1365842 :                 if (prev == '*' && cur == '/')
    1024       71266 :                         depth--;
    1025     1294576 :                 else if (prev == '/' && cur == '*') {
    1026             :                         /* block comments can nest */
    1027           0 :                         cur = 0; /* prevent slash-star-slash from matching */
    1028           0 :                         depth++;
    1029             :                 }
    1030             :                 prev = cur;
    1031             :         }
    1032       71266 :         lc->yysval = lc->yycur;
    1033       71266 :         lc->started = started;
    1034             :         /* a comment is equivalent to a newline */
    1035       71266 :         return cur == EOF ? cur : '\n';
    1036             : }
    1037             : 
    1038             : static int
    1039        3273 : skip_sql_comment(struct scanner * lc)
    1040             : {
    1041        3273 :         int cur;
    1042        3273 :         int started = lc->started;
    1043             : 
    1044        3273 :         lc->started = 1;
    1045      833837 :         while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
    1046             :                 ;
    1047        3273 :         lc->yysval = lc->yycur;
    1048        3273 :         lc->started = started;
    1049             :         /* a comment is equivalent to a newline */
    1050        3273 :         return cur;
    1051             : }
    1052             : 
    1053             : static int tokenize(mvc * lc, int cur);
    1054             : 
    1055     5990658 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
    1056          13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
    1057          10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
    1058        3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
    1059             : 
    1060     1999177 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
    1061     1999177 :         struct scanner *lc = &c->scanner;
    1062     1999177 :         bool (*is_valid_n_ary_digit)(int);
    1063             : 
    1064     1999177 :         if (pcur == '_' && !initial_underscore_allowed)  /* ERROR: initial underscore not allowed */  {
    1065           0 :                 *token = 0;
    1066           0 :                 return '_';
    1067             :         }
    1068             : 
    1069     1999177 :         switch (type) {
    1070             :         case BINARYNUM:
    1071             :                 is_valid_n_ary_digit = &is_valid_binary_digit;
    1072             :                 break;
    1073           3 :         case OCTALNUM:
    1074           3 :                 is_valid_n_ary_digit = &is_valid_octal_digit;
    1075           3 :                 break;
    1076         280 :         case HEXADECIMALNUM:
    1077         280 :                 is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
    1078         280 :                 break;
    1079     1998892 :         default:
    1080     1998892 :                 is_valid_n_ary_digit = &is_valid_decimal_digit;
    1081     1998892 :                 break;
    1082             :         }
    1083             : 
    1084     1999177 :         if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
    1085          18 :                 *token = 0;
    1086          18 :                 return pcur;
    1087             :         }
    1088             : 
    1089     1999297 :         int cur = scanner_getc(lc);
    1090     1999472 :         *token = type;
    1091     4008388 :         while (cur != EOF) {
    1092     4008190 :                 if (cur == '_') {
    1093          25 :                         if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
    1094           2 :                                 *token = 0;
    1095           2 :                                 return '_';
    1096             :                         }
    1097             :                 }
    1098     4008165 :                 else if (!is_valid_n_ary_digit(cur))
    1099             :                         break;
    1100     2009261 :                 pcur = cur;
    1101     2009261 :                 cur = scanner_getc(lc);
    1102             :         }
    1103             : 
    1104     1998806 :         if (pcur == '_')  {
    1105           3 :                 *token = 0;
    1106           3 :                 if (iswalnum(cur))       /* ERROR: not a valid digit */
    1107             :                         return cur;
    1108             :                 else                            /* ERROR: number ends with underscore */
    1109             :                         return '_';
    1110             :         }
    1111             : 
    1112             :         return cur;
    1113             : }
    1114             : 
    1115             : static int
    1116     1985410 : number(mvc * c, int cur)
    1117             : {
    1118     1985410 :         struct scanner *lc = &c->scanner;
    1119     1985410 :         int token = sqlINT;
    1120             : 
    1121             :         /* a number has one of these forms (expressed in regular expressions):
    1122             :          * 0x[0-9A-Fa-f]+                   -- (hexadecimal) INTEGER
    1123             :          * \.[0-9]+                         -- DECIMAL
    1124             :          * [0-9]+\.[0-9]*                   -- DECIMAL
    1125             :          * [0-9]+@0                         -- OID
    1126             :          * [0-9]*\.[0-9]+[eE][-+]?[0-9]+    -- REAL
    1127             :          * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
    1128             :          * [0-9]+                           -- (decimal) INTEGER
    1129             :          */
    1130     1985410 :         lc->started = 1;
    1131     1985410 :         if (cur == '0') {
    1132      349174 :                 switch ((cur = scanner_getc(lc))) {
    1133           2 :                 case 'b':
    1134           2 :                         cur = scanner_getc(lc);
    1135           2 :                         if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
    1136             :                         break;
    1137           3 :                 case 'o':
    1138           3 :                         cur = scanner_getc(lc);
    1139           3 :                         if ((cur = check_validity_number(c,  cur, true, &token, OCTALNUM)) == EOF) return cur;
    1140             :                         break;
    1141         280 :                 case 'x':
    1142         280 :                         cur = scanner_getc(lc);
    1143         280 :                         if ((cur = check_validity_number(c,  cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
    1144             :                         break;
    1145      348894 :                 default:
    1146      348894 :                         utf8_putchar(lc, cur);
    1147      348894 :                         cur = '0';
    1148             :                 }
    1149             :         }
    1150     1985413 :         if (token == sqlINT) {
    1151     1985309 :                 if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
    1152     1985125 :                 if (cur == '@') {
    1153           0 :                         if (token == sqlINT) {
    1154           0 :                                 cur = scanner_getc(lc);
    1155           0 :                                 if (cur == EOF)
    1156             :                                         return cur;
    1157           0 :                                 if (cur == '0') {
    1158           0 :                                         cur = scanner_getc(lc);
    1159           0 :                                         if (cur == EOF)
    1160             :                                                 return cur;
    1161           0 :                                         token = OIDNUM;
    1162             :                                 } else {
    1163             :                                         /* number + '@' not followed by 0: show '@' as erroneous */
    1164           0 :                                         utf8_putchar(lc, cur);
    1165           0 :                                         cur = '@';
    1166           0 :                                         token = 0;
    1167             :                                 }
    1168             :                         }
    1169             :                 } else {
    1170     1985125 :                         if (cur == '.') {
    1171       11186 :                                 cur = scanner_getc(lc);
    1172       11186 :                                 if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
    1173       11179 :                                 if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
    1174             :                         }
    1175     1985125 :                         if (token != 0)
    1176     1984785 :                         if (cur == 'e' || cur == 'E') {
    1177        2231 :                                 cur = scanner_getc(lc);
    1178        2231 :                                 if (cur == '+' || cur == '-')
    1179        2111 :                                         cur = scanner_getc(lc);
    1180        2231 :                                 if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
    1181             :                         }
    1182             :                 }
    1183             :         }
    1184             : 
    1185     1982998 :         assert(cur != EOF);
    1186             : 
    1187     1985229 :         if (iswalnum(cur)) /* ERROR: not a valid digit */
    1188           6 :                 token = 0;
    1189             : 
    1190     1985229 :         utf8_putchar(lc, cur);
    1191             : 
    1192     1985207 :         if (token) {
    1193     1985197 :                 return scanner_token(lc, token);
    1194             :         } else {
    1195          10 :                 (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
    1196          10 :                 return LEX_ERROR;
    1197             :         }
    1198             : }
    1199             : 
    1200             : static
    1201    13337237 : int scanner_symbol(mvc * c, int cur)
    1202             : {
    1203    13337237 :         struct scanner *lc = &c->scanner;
    1204    13337237 :         int next = 0;
    1205    13337237 :         int started = lc->started;
    1206             : 
    1207    13337237 :         switch (cur) {
    1208       73932 :         case '/':
    1209       73932 :                 lc->started = 1;
    1210       73932 :                 next = scanner_getc(lc);
    1211       73932 :                 if (next < 0)
    1212             :                         return EOF;
    1213       73932 :                 if (next == '*') {
    1214       71266 :                         lc->started = started;
    1215       71266 :                         cur = skip_c_comment(lc);
    1216       71266 :                         if (cur < 0)
    1217             :                                 return EOF;
    1218       71266 :                         return tokenize(c, cur);
    1219             :                 } else {
    1220        2666 :                         utf8_putchar(lc, next);
    1221        2666 :                         return scanner_token(lc, cur);
    1222             :                 }
    1223           0 :         case '0':
    1224             :         case '1':
    1225             :         case '2':
    1226             :         case '3':
    1227             :         case '4':
    1228             :         case '5':
    1229             :         case '6':
    1230             :         case '7':
    1231             :         case '8':
    1232             :         case '9':
    1233           0 :                 return number(c, cur);
    1234           8 :         case '#':
    1235           8 :                 if ((cur = skip_sql_comment(lc)) == EOF)
    1236             :                         return cur;
    1237           8 :                 return tokenize(c, cur);
    1238      825110 :         case '\'':
    1239      825110 :                 if (lc->raw_string_mode || lc->next_string_is_raw)
    1240          50 :                         return scanner_string(c, cur, false);
    1241      825060 :                 return scanner_string(c, cur, true);
    1242     1330639 :         case '"':
    1243     1330639 :                 return scanner_string(c, cur, false);
    1244         500 :         case '{':
    1245             :                 // if previous tokens like LANGUAGE IDENT
    1246             :                 // TODO checking on IDENT only may not be enough
    1247         500 :                 if (lc->yylast == IDENT)
    1248         234 :                         return scanner_body(c);
    1249         266 :                 lc->started = 1;
    1250         266 :                 return scanner_token(lc, cur);
    1251         266 :         case '}':
    1252         266 :                 lc->started = 1;
    1253         266 :                 return scanner_token(lc, cur);
    1254       30595 :         case '-':
    1255       30595 :                 lc->started = 1;
    1256       30595 :                 next = scanner_getc(lc);
    1257       30595 :                 if (next < 0)
    1258             :                         return EOF;
    1259       30594 :                 if (next == '-') {
    1260        3265 :                         lc->started = started;
    1261        3265 :                         if ((cur = skip_sql_comment(lc)) == EOF)
    1262             :                                 return cur;
    1263        3265 :                         return tokenize(c, cur);
    1264             :                 }
    1265       27329 :                 lc->started = 1;
    1266       27329 :                 utf8_putchar(lc, next);
    1267       27329 :                 return scanner_token(lc, cur);
    1268          12 :         case '~': /* binary not */
    1269          12 :                 lc->started = 1;
    1270          12 :                 next = scanner_getc(lc);
    1271          12 :                 if (next < 0)
    1272             :                         return EOF;
    1273          12 :                 if (next == '=')
    1274           5 :                         return scanner_token(lc, GEOM_MBR_EQUAL);
    1275           7 :                 utf8_putchar(lc, next);
    1276           7 :                 return scanner_token(lc, cur);
    1277     7450536 :         case '^': /* binary xor */
    1278             :         case '*':
    1279             :         case ':':
    1280             :         case '%':
    1281             :         case '+':
    1282             :         case '(':
    1283             :         case ')':
    1284             :         case ',':
    1285             :         case '=':
    1286             :         case '[':
    1287             :         case ']':
    1288     7450536 :                 lc->started = 1;
    1289     7450536 :                 return scanner_token(lc, cur);
    1290        1615 :         case '?':
    1291        1615 :                 lc->started = 1;
    1292        1615 :                 return scanner_token(lc, PARAM);
    1293        6397 :         case '&':
    1294        6397 :                 lc->started = 1;
    1295        6397 :                 cur = scanner_getc(lc);
    1296        6397 :                 if (cur < 0)
    1297             :                         return EOF;
    1298        6397 :                 if (cur < 0)
    1299             :                         return EOF;
    1300        6397 :                 if(cur == '<') {
    1301           3 :                         next = scanner_getc(lc);
    1302           3 :                         if (next < 0)
    1303             :                                 return EOF;
    1304           3 :                         if(next == '|') {
    1305           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
    1306             :                         } else {
    1307           3 :                                 utf8_putchar(lc, next); //put the char back
    1308           3 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
    1309             :                         }
    1310        6394 :                 } else if(cur == '>')
    1311           3 :                         return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
    1312        6391 :                 else if(cur == '&')
    1313           3 :                         return scanner_token(lc, GEOM_OVERLAP);
    1314             :                 else {/* binary and */
    1315        6388 :                         utf8_putchar(lc, cur); //put the char back
    1316        6388 :                         return scanner_token(lc, '&');
    1317             :                 }
    1318          19 :         case '@':
    1319          19 :                 lc->started = 1;
    1320          19 :                 return scanner_token(lc, AT);
    1321     1046546 :         case ';':
    1322     1046546 :                 lc->started = 0;
    1323     1046546 :                 return scanner_token(lc, SCOLON);
    1324          35 :         case '!':
    1325          35 :                 lc->started = 1;
    1326          35 :                 cur = scanner_getc(lc);
    1327          35 :                 if (cur < 0)
    1328             :                         return EOF;
    1329          35 :                 else if (cur == '=') {
    1330          29 :                         lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
    1331          29 :                         lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
    1332          29 :                         return scanner_token( lc, COMPARISON);
    1333             :                 } else {
    1334           6 :                         utf8_putchar(lc, cur); //put the char back
    1335             :                 }
    1336           6 :                 return scanner_token(lc, '!');
    1337       52814 :         case '<':
    1338       52814 :                 lc->started = 1;
    1339       52814 :                 cur = scanner_getc(lc);
    1340       52814 :                 if (cur < 0)
    1341             :                         return EOF;
    1342       52814 :                 if (cur == '=') {
    1343        3140 :                         return scanner_token( lc, COMPARISON);
    1344       49674 :                 } else if (cur == '>') {
    1345       35951 :                         return scanner_token( lc, COMPARISON);
    1346       13723 :                 } else if (cur == '<') {
    1347          44 :                         next = scanner_getc(lc);
    1348          44 :                         if (next < 0)
    1349             :                                 return EOF;
    1350          44 :                         if (next == '=') {
    1351           4 :                                 return scanner_token( lc, LEFT_SHIFT_ASSIGN);
    1352          40 :                         } else if (next == '|') {
    1353           1 :                                 return scanner_token(lc, GEOM_BELOW);
    1354             :                         } else {
    1355          39 :                                 utf8_putchar(lc, next); //put the char back
    1356          39 :                                 return scanner_token( lc, LEFT_SHIFT);
    1357             :                         }
    1358       13679 :                 } else if(cur == '-') {
    1359          19 :                         next = scanner_getc(lc);
    1360          19 :                         if (next < 0)
    1361             :                                 return EOF;
    1362          19 :                         if(next == '>') {
    1363           7 :                                 return scanner_token(lc, GEOM_DIST);
    1364             :                         } else {
    1365             :                                 //put the characters back and fall in the next possible case
    1366          12 :                                 utf8_putchar(lc, next);
    1367          12 :                                 utf8_putchar(lc, cur);
    1368          12 :                                 return scanner_token( lc, COMPARISON);
    1369             :                         }
    1370             :                 } else {
    1371       13660 :                         utf8_putchar(lc, cur);
    1372       13660 :                         return scanner_token( lc, COMPARISON);
    1373             :                 }
    1374       47904 :         case '>':
    1375       47904 :                 lc->started = 1;
    1376       47904 :                 cur = scanner_getc(lc);
    1377       47904 :                 if (cur < 0)
    1378             :                         return EOF;
    1379       47904 :                 if (cur == '>') {
    1380        2713 :                         cur = scanner_getc(lc);
    1381        2713 :                         if (cur < 0)
    1382             :                                 return EOF;
    1383        2713 :                         if (cur == '=')
    1384           3 :                                 return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
    1385        2710 :                         utf8_putchar(lc, cur);
    1386        2710 :                         return scanner_token( lc, RIGHT_SHIFT);
    1387       45191 :                 } else if (cur != '=') {
    1388       42921 :                         utf8_putchar(lc, cur);
    1389       42921 :                         return scanner_token( lc, COMPARISON);
    1390             :                 } else {
    1391        2270 :                         return scanner_token( lc, COMPARISON);
    1392             :                 }
    1393     2281360 :         case '.':
    1394     2281360 :                 lc->started = 1;
    1395     2281360 :                 cur = scanner_getc(lc);
    1396     2281360 :                 if (cur < 0)
    1397             :                         return EOF;
    1398     2281359 :                 if (!iswdigit(cur)) {
    1399     2281345 :                         utf8_putchar(lc, cur);
    1400     2281348 :                         return scanner_token( lc, '.');
    1401             :                 } else {
    1402          14 :                         utf8_putchar(lc, cur);
    1403          14 :                         cur = '.';
    1404          14 :                         return number(c, cur);
    1405             :                 }
    1406      188939 :         case '|': /* binary or or string concat */
    1407      188939 :                 lc->started = 1;
    1408      188939 :                 cur = scanner_getc(lc);
    1409      188939 :                 if (cur < 0)
    1410             :                         return EOF;
    1411      188939 :                 if (cur == '|') {
    1412      188914 :                         return scanner_token(lc, CONCATSTRING);
    1413          25 :                 } else if (cur == '&') {
    1414           0 :                         next = scanner_getc(lc);
    1415           0 :                         if (next < 0)
    1416             :                                 return EOF;
    1417           0 :                         if(next == '>') {
    1418           0 :                                 return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
    1419             :                         } else {
    1420           0 :                                 utf8_putchar(lc, next); //put the char back
    1421           0 :                                 utf8_putchar(lc, cur); //put the char back
    1422           0 :                                 return scanner_token(lc, '|');
    1423             :                         }
    1424          25 :                 } else if (cur == '>') {
    1425           1 :                         next = scanner_getc(lc);
    1426           1 :                         if (next < 0)
    1427             :                                 return EOF;
    1428           1 :                         if(next == '>') {
    1429           1 :                                 return scanner_token(lc, GEOM_ABOVE);
    1430             :                         } else {
    1431           0 :                                 utf8_putchar(lc, next); //put the char back
    1432           0 :                                 utf8_putchar(lc, cur); //put the char back
    1433           0 :                                 return scanner_token(lc, '|');
    1434             :                         }
    1435             :                 } else {
    1436          24 :                         utf8_putchar(lc, cur);
    1437          24 :                         return scanner_token(lc, '|');
    1438             :                 }
    1439             :         }
    1440          10 :         (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
    1441          10 :         return LEX_ERROR;
    1442             : }
    1443             : 
    1444             : static int
    1445    29415909 : tokenize(mvc * c, int cur)
    1446             : {
    1447    29415909 :         struct scanner *lc = &c->scanner;
    1448    58687271 :         while (1) {
    1449    44051590 :                 if (cur == 0xFEFF) {
    1450             :                         /* on Linux at least, iswpunct returns TRUE
    1451             :                          * for U+FEFF, but we don't want that, we just
    1452             :                          * want to go to the scanner_error case
    1453             :                          * below */
    1454             :                         ;
    1455    44052010 :                 } else if (iswspace(cur)) {
    1456    14652729 :                         if ((cur = skip_white_space(lc)) == EOF)
    1457             :                                 return cur;
    1458    14635681 :                         continue;  /* try again */
    1459    29399281 :                 } else if (iswdigit(cur)) {
    1460     1985883 :                         return number(c, cur);
    1461    27413398 :                 } else if (iswalpha(cur) || cur == '_') {
    1462    14039541 :                         switch (cur) {
    1463      668188 :                         case 'e': /* string with escapes */
    1464             :                         case 'E':
    1465      668188 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1466      668188 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1467        3915 :                                         return scanner_string(c, scanner_getc(lc), true);
    1468             :                                 }
    1469             :                                 break;
    1470      428340 :                         case 'x': /* blob */
    1471             :                         case 'X':
    1472             :                         case 'r': /* raw string */
    1473             :                         case 'R':
    1474      428340 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1475      428340 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
    1476        3295 :                                         return scanner_string(c, scanner_getc(lc), false);
    1477             :                                 }
    1478             :                                 break;
    1479      162207 :                         case 'u': /* unicode string */
    1480             :                         case 'U':
    1481      162207 :                                 if (scanner_read_more(lc, 1) != EOF &&
    1482      162224 :                                     lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
    1483          17 :                                     scanner_read_more(lc, 2) != EOF &&
    1484          17 :                                     (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
    1485             :                                      lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
    1486          17 :                                         cur = scanner_getc(lc); /* '&' */
    1487          17 :                                         return scanner_string(c, scanner_getc(lc), false);
    1488             :                                 }
    1489             :                                 break;
    1490             :                         default:
    1491             :                                 break;
    1492             :                         }
    1493    14069395 :                         return keyword_or_ident(c, cur);
    1494    13336776 :                 } else if (iswpunct(cur)) {
    1495    13336324 :                         return scanner_symbol(c, cur);
    1496             :                 }
    1497          32 :                 if (cur == EOF) {
    1498           0 :                         if (lc->mode == LINE_1 || !lc->started )
    1499             :                                 return cur;
    1500           0 :                         return scanner_error(c, cur);
    1501             :                 }
    1502             :                 /* none of the above: error */
    1503          32 :                 return scanner_error(c, cur);
    1504             :         }
    1505             : }
    1506             : 
    1507             : /* SQL 'quoted' idents consist of a set of any character of
    1508             :  * the source language character set other than a 'quote'
    1509             :  *
    1510             :  * MonetDB has 3 restrictions:
    1511             :  *      1 we disallow '%' as the first character.
    1512             :  *      2 the length is limited to 1024 characters
    1513             :  *      3 the identifier 'TID%' is not allowed
    1514             :  */
    1515             : static bool
    1516     1330628 : valid_ident(bool admin, const char *restrict s, char *restrict dst)
    1517             : {
    1518     1330628 :         int p = 0;
    1519             : 
    1520     1330628 :         if (!admin && *s == '%')
    1521             :                 return false;
    1522             : 
    1523     9848093 :         while (*s) {
    1524     8517465 :                 if ((dst[p++] = *s++) == '"' && *s == '"')
    1525          66 :                         s++;
    1526     8517465 :                 if (p >= 1024)
    1527             :                         return false;
    1528             :         }
    1529     1330628 :         dst[p] = '\0';
    1530     1330628 :         if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
    1531             :                 return false;
    1532             :         return true;
    1533             : }
    1534             : 
    1535             : static inline int
    1536    29805488 : sql_get_next_token(YYSTYPE *yylval, void *parm)
    1537             : {
    1538    29805488 :         mvc *c = (mvc*)parm;
    1539    29805488 :         struct scanner *lc = &c->scanner;
    1540    29805488 :         int token = 0, cur = 0;
    1541             : 
    1542    29805488 :         if (lc->rs->buf == NULL) /* malloc failure */
    1543             :                 return EOF;
    1544             : 
    1545    29805488 :         if (lc->yynext) {
    1546      358570 :                 int next = lc->yynext;
    1547             : 
    1548      358570 :                 lc->yynext = 0;
    1549      358570 :                 return(next);
    1550             :         }
    1551             : 
    1552    29446918 :         if (lc->yybak) {
    1553    28369692 :                 lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
    1554    28369692 :                 lc->yybak = 0;
    1555             :         }
    1556             : 
    1557    29446918 :         lc->yysval = lc->yycur;
    1558    29446918 :         lc->yylast = lc->yyval;
    1559    29446918 :         cur = scanner_getc(lc);
    1560    29451656 :         if (cur < 0)
    1561             :                 return EOF;
    1562    29340845 :         token = tokenize(c, cur);
    1563             : 
    1564    29333307 :         yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
    1565             : 
    1566    29333307 :         if (token == KW_TYPE)
    1567             :                 token = aTYPE;
    1568    29280694 :         if (token == KW_OPERATORS)
    1569       11249 :                 token = OPERATORS;
    1570             : 
    1571    29333307 :         if (token == IDENT || token == COMPARISON ||
    1572    23785891 :             token == RANK || token == aTYPE || token == MARGFUNC || token == OPERATORS) {
    1573     5618886 :                 yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
    1574     5619231 :                 lc->next_string_is_raw = false;
    1575    23714421 :         } else if (token == STRING) {
    1576     2162946 :                 char quote = *yylval->sval;
    1577     2162946 :                 char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
    1578     2162946 :                 char *dst;
    1579             : 
    1580     2162946 :                 assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
    1581             : 
    1582     2162946 :                 lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
    1583     2162946 :                 switch (quote) {
    1584     1330628 :                 case '"':
    1585     1330628 :                         if (valid_ident(c->user_id == USER_MONETDB || c->user_id == ROLE_SYSADMIN, yylval->sval+1,str)) {
    1586             :                                 token = IDENT;
    1587             :                         } else {
    1588           0 :                                 sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
    1589           0 :                                 return LEX_ERROR;
    1590             :                         }
    1591             :                         break;
    1592        3914 :                 case 'e':
    1593             :                 case 'E':
    1594        3914 :                         assert(yylval->sval[1] == '\'');
    1595        3914 :                         if (GDKstrFromStr((unsigned char *) str,
    1596             :                                                           (unsigned char *) yylval->sval + 2,
    1597        3914 :                                                           lc->yycur-lc->yysval - 2, '\'') < 0) {
    1598           1 :                                 char *err = GDKerrbuf;
    1599           1 :                                 if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
    1600           1 :                                         err += strlen(GDKERROR);
    1601           0 :                                 else if (*err == '!')
    1602           0 :                                         err++;
    1603           1 :                                 sql_error(c, 1, SQLSTATE(42000) "%s", err);
    1604           1 :                                 return LEX_ERROR;
    1605             :                         }
    1606             :                         quote = '\'';
    1607             :                         break;
    1608          17 :                 case 'u':
    1609             :                 case 'U':
    1610          17 :                         assert(yylval->sval[1] == '&');
    1611          17 :                         assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
    1612          17 :                         strcpy(str, yylval->sval + 3);
    1613          17 :                         token = yylval->sval[2] == '\'' ? -STRING : -IDENT; /* Passing unicode string/ident as - numbers, handled
    1614             :                                                                                                                                    later in scanner */
    1615          17 :                         quote = yylval->sval[2];
    1616          17 :                         lc->next_string_is_raw = true;
    1617          17 :                         break;
    1618           1 :                 case 'x':
    1619             :                 case 'X':
    1620           1 :                         assert(yylval->sval[1] == '\'');
    1621           1 :                         dst = str;
    1622           5 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1623           4 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1624           0 :                                         src++;
    1625           1 :                         *dst = 0;
    1626           1 :                         quote = '\'';
    1627           1 :                         token = XSTRING;
    1628           1 :                         lc->next_string_is_raw = true;
    1629           1 :                         break;
    1630        3287 :                 case 'r':
    1631             :                 case 'R':
    1632        3287 :                         assert(yylval->sval[1] == '\'');
    1633        3287 :                         dst = str;
    1634      450623 :                         for (char *src = yylval->sval + 2; *src; dst++)
    1635      447336 :                                 if ((*dst = *src++) == '\'' && *src == '\'')
    1636        2780 :                                         src++;
    1637        3287 :                         quote = '\'';
    1638        3287 :                         *dst = 0;
    1639        3287 :                         break;
    1640      825099 :                 default:
    1641      825099 :                         if (lc->raw_string_mode || lc->next_string_is_raw) {
    1642          50 :                                 dst = str;
    1643         479 :                                 for (char *src = yylval->sval + 1; *src; dst++)
    1644         429 :                                         if ((*dst = *src++) == '\'' && *src == '\'')
    1645           3 :                                                 src++;
    1646          50 :                                 *dst = 0;
    1647             :                         } else {
    1648      825049 :                                 if (GDKstrFromStr((unsigned char *)str,
    1649      825049 :                                                                   (unsigned char *)yylval->sval + 1,
    1650      825049 :                                                                   lc->yycur - lc->yysval - 1,
    1651             :                                                                   '\'') < 0) {
    1652           1 :                                         sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
    1653           1 :                                         return LEX_ERROR;
    1654             :                                 }
    1655             :                         }
    1656             :                         break;
    1657             :                 }
    1658     2162944 :                 yylval->sval = str;
    1659             : 
    1660             :                 /* reset original */
    1661     2162944 :                 lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
    1662             :         } else {
    1663    21551475 :                 lc->next_string_is_raw = false;
    1664             :         }
    1665             : 
    1666             :         return(token);
    1667             : }
    1668             : 
    1669             : static int scanner( YYSTYPE *yylval, void *m, bool log);
    1670             : 
    1671             : static int
    1672    29300928 : scanner(YYSTYPE * yylval, void *parm, bool log)
    1673             : {
    1674    29300928 :         int token;
    1675    29300928 :         mvc *c = (mvc *) parm;
    1676    29300928 :         struct scanner *lc = &c->scanner;
    1677    29300928 :         size_t pos;
    1678    29300928 :         int last = lc->yyval;
    1679             : 
    1680             :         /* store position for when view's query ends */
    1681    29300928 :         pos = lc->rs->pos + lc->yycur;
    1682             : 
    1683    29300928 :         token = sql_get_next_token(yylval, parm);
    1684    29295399 :         if (token == '[')
    1685         292 :                 lc->brackets++;
    1686    29295399 :         if (token == ']')
    1687         295 :                 lc->brackets--;
    1688             : 
    1689             :         /* TODO make hash out of the possible complex tokens and add with current tokens hash */
    1690    29295399 :         if (token == -IDENT || token == -STRING) {
    1691          17 :                 char *sval = yylval->sval;
    1692          17 :                 int next = sql_get_next_token(yylval, parm);
    1693             : 
    1694          17 :                 if (token == -STRING && next == STRING) {
    1695           2 :                         sval = sa_strconcat(c->sa, sval, yylval->sval);
    1696           6 :                         while((next = sql_get_next_token(yylval, parm)) == STRING)
    1697           4 :                                 sval = sa_strconcat(c->sa, sval, yylval->sval);
    1698             :                 }
    1699             : 
    1700          17 :                 char *uescape = "\\";
    1701          17 :                 if (next == UESCAPE) {
    1702          14 :                         int nxt = sql_get_next_token(yylval, parm);
    1703          14 :                         if (nxt == STRING) {
    1704          14 :                                 next = 0;
    1705          14 :                                 uescape = yylval->sval;
    1706          14 :                 if (strlen(uescape) != 1 || strchr("\"'0123456789abcdefABCDEF+ \t\n\r\f", *uescape) != NULL) {
    1707           0 :                     sqlformaterror(c, SQLSTATE(22019) "%s", "UESCAPE must be one character");
    1708           0 :                                         return LEX_ERROR;
    1709             :                 }
    1710             :                         } else {
    1711           0 :                 sqlformaterror(c, SQLSTATE(22019) "%s", "UESCAPE character missing");
    1712           0 :                                 return LEX_ERROR;
    1713             :                         }
    1714             :                 }
    1715          17 :                 yylval->sval = uescape_xform(sval, uescape);
    1716          17 :                 if (yylval->sval == NULL && token == -STRING) {
    1717           0 :                         sqlformaterror(c, SQLSTATE(22019) "%s", "Bad Unicode string");
    1718           0 :                         return LEX_ERROR;
    1719             :                 }
    1720             : 
    1721          17 :                 if (next)
    1722           3 :                         lc->yynext = next;
    1723          17 :                 return (token == -IDENT)?IDENT:STRING;
    1724    29295382 :         } else if (token == WITH) { /* check for TIME WITH ... */
    1725       15572 :                 int next = sql_get_next_token(yylval, parm);
    1726       15572 :                 if (next == TIME)
    1727        5416 :                         token = WITH_LA;
    1728       15572 :                 lc->yynext = next;
    1729    29279810 :         } else if (token == INTO) { /* check for INTO followed by STRING / (BIG/LITTLE/NATIVE) for copy into file vs copy select into var */
    1730      140215 :                 int next = sql_get_next_token(yylval, parm);
    1731      140156 :                 if (next == STRING || next == BIG || next == LITTLE || next == NATIVE ||
    1732             :                         next == BINARY || next == STDOUT)
    1733          77 :                         token = INTO_LA;
    1734      140156 :                 lc->yynext = next;
    1735    29139595 :         } else if (last == ODBC_FUNC_ESCAPE_PREFIX && token == TIMESTAMPADD) {
    1736             :                 token = ODBC_TIMESTAMPADD;
    1737    29139860 :         } else if (last == ODBC_FUNC_ESCAPE_PREFIX && token == TIMESTAMPDIFF) {
    1738             :                 token = ODBC_TIMESTAMPDIFF;
    1739    29139975 :         } else if (last == INTERVAL && (token == '-' || token == '+')) { /* backward compatibility: INTERVAL +- 'string' -> interval '+-string'*/
    1740          12 :                 int next = sql_get_next_token(yylval, parm);
    1741          12 :                 if (next == STRING) {
    1742          12 :                         if (token != '+') {
    1743           8 :                                 char *sval = yylval->sval;
    1744           8 :                                 if (sval[0] == '+')
    1745           0 :                                         sval[0] = '-';
    1746           8 :                                 else if (sval[0] == '-')
    1747           1 :                                                 yylval->sval++;
    1748             :                                 else
    1749           7 :                                         yylval->sval = sa_strconcat(c->sa, token=='-'?"-":"+", sval);
    1750             :                         }
    1751             :                         token = next;
    1752             :                         next = 0;
    1753             :                 }
    1754          12 :                 lc->yynext = next;
    1755    29139963 :         } else if (token == OUTER) { /* check for OUTER UNION */
    1756       18457 :                 int next = sql_get_next_token(yylval, parm);
    1757       18457 :                 if (next == UNION)
    1758             :                         token = OUTER_UNION;
    1759             :                 else
    1760       18448 :                         lc->yynext = next;
    1761    29121506 :         } else if (token == TO) { /* check for end_field (of interval spec) TO (MONTH etc) */
    1762      121026 :                 int next = sql_get_next_token(yylval, parm);
    1763      121026 :                 if (next == YEAR || next == MONTH || next == DAY || next == HOUR || next == MINUTE || next == SECOND)
    1764         273 :                         token = TO_LA;
    1765      121026 :                 lc->yynext = next;
    1766    29000480 :         } else if (token == NOT) {
    1767       78228 :                 int next = sql_get_next_token(yylval, parm);
    1768             : 
    1769       78228 :                 if (next == NOT) {
    1770           2 :                         return scanner(yylval, parm, false);
    1771             :                 } else if (next == EXISTS) {
    1772             :                         token = NOT_EXISTS;
    1773             :                 } else if (next == BETWEEN) {
    1774             :                         token = NOT_BETWEEN;
    1775             :                 } else if (next == sqlIN) {
    1776             :                         token = NOT_IN;
    1777             :                 } else if (next == LIKE) {
    1778             :                         token = NOT_LIKE;
    1779             :                 } else if (next == ILIKE) {
    1780             :                         token = NOT_ILIKE;
    1781             :                 } else {
    1782       63281 :                         lc->yynext = next;
    1783             :                 }
    1784    28922252 :         } else if (token == ':' && !lc->brackets) {
    1785          80 :                 int next = sql_get_next_token(yylval, parm);
    1786          80 :                 if (next == IDENT)
    1787             :                         token = PARAM;
    1788             :                 else
    1789          66 :                         lc->yynext = next;
    1790    28922172 :         } else if (token == SCOLON) {
    1791             :                 /* ignore semi-colon(s) following a semi-colon */
    1792     1046562 :                 if (lc->yylast == SCOLON) {
    1793      131730 :                         size_t prev = lc->yycur;
    1794      131731 :                         while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
    1795           1 :                                 prev = lc->yycur;
    1796             : 
    1797             :                         /* skip the skipped stuff also in the buffer */
    1798      131736 :                         lc->rs->pos += prev;
    1799      131736 :                         lc->yycur -= prev;
    1800             :                 }
    1801             :         }
    1802             : 
    1803    29295327 :         if (lc->log && log)
    1804           0 :                 mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
    1805             : 
    1806    29295327 :         lc->started += (token != EOF);
    1807    29295327 :         return token;
    1808             : }
    1809             : 
    1810             : /* also see sql_parser.y */
    1811             : extern int sqllex(YYSTYPE * yylval, void *parm);
    1812             : 
    1813             : int
    1814    29301800 : sqllex(YYSTYPE * yylval, void *parm)
    1815             : {
    1816    29301800 :         return scanner(yylval, parm, true);
    1817             : }

Generated by: LCOV version 1.14