LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - pcre.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 781 988 79.0 %
Date: 2025-03-25 20:06:35 Functions: 35 39 89.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * N. Nes
      15             :  * PCRE library interface
      16             :  * The  PCRE library is a set of functions that implement regular
      17             :  * expression pattern matching using the same syntax  and  semantics  as  Perl,
      18             :  * with  just  a  few  differences.  The  current  implementation of PCRE
      19             :  * (release 4.x) corresponds approximately with Perl 5.8, including  support
      20             :  * for  UTF-8  encoded  strings.   However,  this support has to be
      21             :  * explicitly enabled; it is not the default.
      22             :  *
      23             :  * ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
      24             :  */
      25             : #include "monetdb_config.h"
      26             : #include <string.h>
      27             : 
      28             : #include "mal.h"
      29             : #include "mal_client.h"
      30             : #include "mal_interpreter.h"
      31             : #include "mal_exception.h"
      32             : 
      33             : #include <wchar.h>
      34             : #include <wctype.h>
      35             : 
      36             : #ifdef HAVE_LIBPCRE
      37             : #include <pcre.h>
      38             : #ifndef PCRE_STUDY_JIT_COMPILE
      39             : /* old library version on e.g. EPEL 6 */
      40             : #define pcre_free_study(x)              pcre_free(x)
      41             : #define PCRE_STUDY_JIT_COMPILE  0
      42             : #endif
      43             : #define JIT_COMPILE_MIN 1024    /* when to try JIT compilation of patterns */
      44             : 
      45             : #else
      46             : 
      47             : #include <regex.h>
      48             : 
      49             : typedef regex_t pcre;
      50             : #endif
      51             : 
      52             : /* current implementation assumes simple %keyword% [keyw%]* */
      53             : struct RE {
      54             :         char *k;
      55             :         bool search:1, atend:1, case_ignore:1;
      56             :         size_t skip;                    /* number of codepoints to skip before matching */
      57             :         size_t len;                             /* number of bytes in string */
      58             :         size_t ulen;                    /* number of codepoints in string */
      59             :         struct RE *n;
      60             : };
      61             : 
      62             : /* We cannot use strcasecmp and strncasecmp since they work byte for
      63             :  * byte and don't deal with multibyte encodings (such as UTF-8). */
      64             : 
      65             : static inline bool
      66        6851 : mnre_is_pattern_properly_escaped(const char *pat, unsigned char esc)
      67             : {
      68        6851 :         bool escaped = false;
      69             : 
      70        6851 :         if (pat == 0)
      71             :                 return true;
      72       52401 :         while (*pat) {
      73       45550 :                 if (escaped) {
      74             :                         escaped = false;
      75       45398 :                 } else if ((unsigned char) *pat == esc) {
      76       45550 :                         escaped = true;
      77             :                 }
      78       45550 :                 pat++;
      79             :         }
      80        6851 :         return escaped ? false : true;
      81             : }
      82             : 
      83             : /* returns true if the pattern does not contain wildcard
      84             :  * characters ('%' or '_') and no character is escaped
      85             :  */
      86             : static inline bool
      87        6825 : is_strcmpable(const char *pat, const char *esc)
      88             : {
      89        6825 :         if (pat[strcspn(pat, "%_")])
      90             :                 return false;
      91        1826 :         return strlen(esc) == 0 || strNil(esc) || strstr(pat, esc) == NULL;
      92             : }
      93             : 
      94             : /* Match regular expression by comparing bytes.
      95             :  */
      96             : static inline bool
      97      408691 : mnre_match(const char *restrict s, const struct RE *restrict pattern)
      98             : {
      99      408691 :         const struct RE *r;
     100             : 
     101      484805 :         for (r = pattern; r; r = r->n) {
     102      436494 :                 for (size_t i = 0; i < r->skip; s++) {
     103       25792 :                         if (*s == 0)
     104             :                                 return false;
     105       26300 :                         i += (*s & 0xC0) != 0x80;
     106             :                 }
     107      410702 :                 if (r->search) {
     108      191828 :                         if (r->atend) {
     109             :                                 /* we're searching for a string at the end, so just skip
     110             :                                  * over everything and just compare with the tail of the
     111             :                                  * haystack */
     112       21623 :                                 size_t slen = strlen(s);
     113       21623 :                                 if (slen < r->ulen) {
     114             :                                         /* remaining string too short: each codepoint
     115             :                                          * requires at least one byte */
     116             :                                         return false;
     117             :                                 }
     118       21611 :                                 const char *e = s + slen;
     119       21611 :                                 if (!r->case_ignore) {
     120       21531 :                                         if (slen < r->len) {
     121             :                                                 /* remaining string is too short to match */
     122             :                                                 return false;
     123             :                                         }
     124       21528 :                                         e -= r->len;
     125       21528 :                                         if ((*e & 0xC0) == 0x80) {
     126             :                                                 /* not at start of a Unicode character, so
     127             :                                                  * cannot match (this test not strictly
     128             :                                                  * required: the strcmp should also return
     129             :                                                  * unequal) */
     130             :                                                 return false;
     131             :                                         }
     132       21537 :                                         return strcmp(e, r->k) == 0;
     133             :                                 }
     134             :                                 size_t ulen = r->ulen;
     135         353 :                                 while (e > s && ulen != 0) {
     136         273 :                                         ulen -= (*--e & 0xC0) != 0x80;
     137             :                                 }
     138             :                                 /* ulen != 0 means remaining string is too short */
     139         141 :                                 return ulen == 0 && GDKstrcasecmp(e, r->k) == 0;
     140             :                         }
     141             :                         /* in case we have a pattern consisting of % followed by _,
     142             :                          * we need to backtrack, so use recursion; here we know we
     143             :                          * have the %, look for an _ in the rest of the pattern
     144             :                          * (note %_ and _% are equivalent and is taken care of by
     145             :                          * the pattern construction in mnre_create) */
     146      182972 :                         for (const struct RE *p = r->n; p; p = p->n) {
     147       14907 :                                 if (p->skip != 0) {
     148        2140 :                                         struct RE pat = *r;
     149        2140 :                                         pat.search = false;
     150        2140 :                                         pat.skip = 0;
     151      160145 :                                         do {
     152      160145 :                                                 if (mnre_match(s, &pat))
     153             :                                                         return true;
     154      159951 :                                                 do
     155      159951 :                                                         s++;
     156      159952 :                                                 while (*s && (*s & 0xC0) == 0x80);
     157      159952 :                                         } while (*s != 0);
     158             :                                         return false;
     159             :                                 }
     160             :                         }
     161             :                 }
     162      386939 :                 if (r->k[0] == 0 && (r->search || *s == 0))
     163             :                         return true;
     164      386897 :                 if (r->case_ignore) {
     165       11407 :                         for (;;) {
     166       11407 :                                 if (r->search && (s = GDKstrcasestr(s, r->k)) == NULL)
     167             :                                         return false;
     168        3763 :                                 if (*s == '\0')
     169             :                                         return false;
     170             :                                 /* in "atend" comparison, compare whole string, else
     171             :                                  * only part */
     172        3822 :                                 if ((!r->search || r->atend) &&
     173          59 :                                         (r->atend ? GDKstrcasecmp(s, r->k) : GDKstrncasecmp(s, r->k, SIZE_MAX, r->len)) != 0) {
     174             :                                         /* no match */
     175          22 :                                         if (!r->search)
     176             :                                                 return false;
     177             :                                         /* try again with next character */
     178           0 :                                         do
     179           0 :                                                 s++;
     180           0 :                                         while (*s != '\0' && (*s & 0xC0) == 0x80);
     181           0 :                                         continue;
     182             :                                 }
     183             :                                 /* match; find end of match by counting codepoints */
     184       58947 :                                 for (size_t i = 0; *s && i < r->ulen; s++)
     185       55206 :                                         i += (*s & 0xC0) != 0x80;
     186             :                                 break;
     187             :                         }
     188             :                 } else {
     189      375490 :                         for (;;) {
     190      375490 :                                 if (r->search && (s = strstr(s, r->k)) == NULL)
     191             :                                         return false;
     192      260989 :                                 if (*s == '\0')
     193             :                                         return false;
     194             :                                 /* in "atend" comparison, include NUL byte in the compare */
     195      260498 :                                 if ((!r->search || r->atend) &&
     196      190408 :                                         strncmp(s, r->k, r->len + r->atend) != 0) {
     197             :                                         /* no match */
     198      188125 :                                         if (!r->search)
     199             :                                                 return false;
     200             :                                         /* try again with next character: have search start
     201             :                                          * after current first byte */
     202           0 :                                         if ((s = strchr(s + 1, r->k[0])) == NULL)
     203             :                                                 return false;
     204           0 :                                         continue;
     205             :                                 }
     206             :                                 /* match */
     207       72373 :                                 s += r->len;
     208       72373 :                                 break;
     209             :                         }
     210             :                 }
     211             :         }
     212             :         return true;
     213             : }
     214             : 
     215             : static void
     216        5965 : mnre_destroy(struct RE *p)
     217             : {
     218        5965 :         if (p) {
     219        5965 :                 GDKfree(p->k);
     220        6715 :                 do {
     221        6715 :                         struct RE *n = p->n;
     222             : 
     223        6715 :                         GDKfree(p);
     224        6715 :                         p = n;
     225        6715 :                 } while (p);
     226             :         }
     227        5965 : }
     228             : 
     229             : /* Create a linked list of RE structures.  Depending on the
     230             :  * caseignore and the ascii_pattern flags, the w
     231             :  * (if caseignore == true && ascii_pattern == false) or the k
     232             :  * (in every other case) field is used.  These in the first
     233             :  * structure are allocated, whereas in all subsequent
     234             :  * structures the fields point into the allocated buffer of
     235             :  * the first.
     236             :  */
     237             : static struct RE *
     238        5965 : mnre_create(const char *pat, bool caseignore, uint32_t esc)
     239             : {
     240        5965 :         struct RE *r = GDKmalloc(sizeof(struct RE)), *n = r;
     241        5965 :         bool escaped = false;
     242        5965 :         char *p, *q;
     243             : 
     244        5965 :         if (r == NULL)
     245             :                 return NULL;
     246        5965 :         *r = (struct RE) {
     247             :                 .atend = true,
     248             :                 .case_ignore = caseignore,
     249             :         };
     250             : 
     251       11274 :         for (;;) {
     252       11274 :                 if (esc != '%' && *pat == '%') {
     253        5137 :                         pat++;                                  /* skip % */
     254        5137 :                         r->search = true;
     255        6137 :                 } else if (esc != '_' && *pat == '_') {
     256         172 :                         pat++;
     257         172 :                         r->skip++;
     258             :                 } else {
     259             :                         break;
     260             :                 }
     261             :         }
     262        5965 :         if ((p = GDKstrdup(pat)) == NULL) {
     263           0 :                 GDKfree(r);
     264           0 :                 return NULL;
     265             :         }
     266             : 
     267        5965 :         r->k = p;
     268        5965 :         q = p;
     269       41628 :         while (*p) {
     270       35663 :                 if (escaped) {
     271         149 :                         *q++ = *p;
     272         149 :                         n->len++;
     273         149 :                         n->ulen += (*p & 0xC0) != 0x80;
     274         149 :                         escaped = false;
     275       35514 :                 } else if ((unsigned char) *p == esc) {
     276             :                         escaped = true;
     277       35365 :                 } else if (*p == '%' || *p == '_') {
     278        6006 :                         n->atend = false;
     279        6006 :                         bool search = false;
     280        6006 :                         size_t skip = 0;
     281       18130 :                         for (;;) {
     282       12068 :                                 if (*p == '_')
     283         602 :                                         skip++;
     284       11466 :                                 else if (*p == '%')
     285             :                                         search = true;
     286             :                                 else
     287             :                                         break;
     288        6062 :                                 p++;
     289             :                         }
     290        6006 :                         if (*p || skip != 0) {
     291         750 :                                 n = n->n = GDKmalloc(sizeof(struct RE));
     292         750 :                                 if (n == NULL)
     293           0 :                                         goto bailout;
     294         750 :                                 *n = (struct RE) {
     295             :                                         .search = search,
     296             :                                         .atend = true,
     297             :                                         .skip = skip,
     298             :                                         .k = p,
     299             :                                         .case_ignore = caseignore,
     300             :                                 };
     301             :                         }
     302        6006 :                         *q = 0;
     303        6006 :                         q = p;
     304        6006 :                         continue;                       /* skip increment, we already did it */
     305             :                 } else {
     306       29359 :                         *q++ = *p;
     307       29359 :                         n->len++;
     308       29359 :                         n->ulen += (*p & 0xC0) != 0x80;
     309             :                 }
     310       29657 :                 p++;
     311             :         }
     312        5965 :         *q = 0;
     313        5965 :         return r;
     314           0 :   bailout:
     315           0 :         mnre_destroy(r);
     316           0 :         return NULL;
     317             : }
     318             : 
     319             : #ifdef HAVE_LIBPCRE
     320             : static str
     321          25 : pcre_compile_wrap(pcre **res, const char *pattern, bit insensitive)
     322             : {
     323          25 :         pcre *r;
     324          25 :         const char *err_p = NULL;
     325          25 :         int errpos = 0;
     326          25 :         int options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK | PCRE_MULTILINE;
     327          25 :         if (insensitive)
     328           0 :                 options |= PCRE_CASELESS;
     329             : 
     330          25 :         if ((r = pcre_compile(pattern, options, &err_p, &errpos, NULL)) == NULL) {
     331           0 :                 throw(MAL, "pcre.compile", OPERATION_FAILED
     332             :                           " with\n'%s'\nat %d in\n'%s'.\n", err_p, errpos, pattern);
     333             :         }
     334          25 :         *res = r;
     335          25 :         return MAL_SUCCEED;
     336             : }
     337             : #endif
     338             : 
     339             : /* maximum number of back references and quoted \ or $ in replacement string */
     340             : #define MAX_NR_REFS             20
     341             : 
     342             : struct backref {
     343             :         int idx;
     344             :         int start;
     345             :         int end;
     346             : };
     347             : 
     348             : #ifdef HAVE_LIBPCRE
     349             : /* fill in parameter backrefs (length maxrefs) with information about
     350             :  * back references in the replacement string; a back reference is a
     351             :  * dollar or backslash followed by a number */
     352             : static int
     353         104 : parse_replacement(const char *replacement, int len_replacement,
     354             :                                   struct backref *backrefs, int maxrefs)
     355             : {
     356         104 :         int nbackrefs = 0;
     357             : 
     358         173 :         for (int i = 0; i < len_replacement && nbackrefs < maxrefs; i++) {
     359          69 :                 if (replacement[i] == '$' || replacement[i] == '\\') {
     360          10 :                         char *endptr;
     361          10 :                         backrefs[nbackrefs].idx = strtol(replacement + i + 1, &endptr, 10);
     362          10 :                         if (endptr > replacement + i + 1) {
     363          10 :                                 int k = (int) (endptr - (replacement + i + 1));
     364          10 :                                 backrefs[nbackrefs].start = i;
     365          10 :                                 backrefs[nbackrefs].end = i + k + 1;
     366          10 :                                 nbackrefs++;
     367           0 :                         } else if (replacement[i] == replacement[i + 1]) {
     368             :                                 /* doubled $ or \, we must copy just one to the output */
     369           0 :                                 backrefs[nbackrefs].idx = INT_MAX;      /* impossible value > 0 */
     370           0 :                                 backrefs[nbackrefs].start = i;
     371           0 :                                 backrefs[nbackrefs].end = i + 1;
     372           0 :                                 i++;                    /* don't look at second $ or \ again */
     373           0 :                                 nbackrefs++;
     374             :                         }
     375             :                         /* else: $ or \ followed by something we don't recognize,
     376             :                          * so just leave it */
     377             :                 }
     378             :         }
     379         104 :         return nbackrefs;
     380             : }
     381             : 
     382             : static char *
     383       51342 : single_replace(pcre *pcre_code, pcre_extra *extra,
     384             :                            const char *origin_str, int len_origin_str,
     385             :                            int exec_options, int *ovector, int ovecsize,
     386             :                            const char *replacement, int len_replacement,
     387             :                            struct backref *backrefs, int nbackrefs,
     388             :                            bool global, char *result, int *max_result)
     389             : {
     390       51342 :         int offset = 0;
     391       51342 :         int len_result = 0;
     392       51342 :         int addlen;
     393       51342 :         int empty_match_correction = 0;
     394      191251 :         char *tmp;
     395             : 
     396      191251 :         do {
     397      191251 :                 int j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, offset,
     398             :                                                   exec_options, ovector, ovecsize);
     399      191296 :                 if (j <= 0)
     400             :                         break;
     401             : 
     402      143586 :                 empty_match_correction = ovector[0] == ovector[1] ? 1 : 0;
     403             : 
     404             :                 // calculate the length of the string that will be appended to result
     405      287172 :                 addlen = ovector[0] - offset
     406      143586 :                                 + (nbackrefs == 0 ? len_replacement : 0) + empty_match_correction;
     407      143586 :                 if (len_result + addlen >= *max_result) {
     408       12149 :                         tmp = GDKrealloc(result, len_result + addlen + 1);
     409       12149 :                         if (tmp == NULL) {
     410           0 :                                 GDKfree(result);
     411           0 :                                 return NULL;
     412             :                         }
     413       12149 :                         result = tmp;
     414       12149 :                         *max_result = len_result + addlen + 1;
     415             :                 }
     416             :                 // append to the result the parts of the original string that are left unchanged
     417      143586 :                 if (ovector[0] > offset) {
     418      139340 :                         strncpy(result + len_result, origin_str + offset,
     419      139340 :                                         ovector[0] - offset);
     420      139340 :                         len_result += ovector[0] - offset;
     421             :                 }
     422             :                 // append to the result the replacement of the matched string
     423      143586 :                 if (nbackrefs == 0) {
     424      139915 :                         strncpy(result + len_result, replacement, len_replacement);
     425      139915 :                         len_result += len_replacement;
     426             :                 } else {
     427             :                         int prevend = 0;
     428        7342 :                         for (int i = 0; i < nbackrefs; i++) {
     429        3671 :                                 int off, len;
     430        3671 :                                 if (backrefs[i].idx >= ovecsize / 3) {
     431             :                                         /* out of bounds, replace with empty string */
     432             :                                         off = 0;
     433             :                                         len = 0;
     434             :                                 } else {
     435        3671 :                                         off = ovector[backrefs[i].idx * 2];
     436        3671 :                                         len = ovector[backrefs[i].idx * 2 + 1] - off;
     437             :                                 }
     438        3671 :                                 addlen = backrefs[i].start - prevend + len;
     439        3671 :                                 if (len_result + addlen >= *max_result) {
     440          37 :                                         tmp = GDKrealloc(result, len_result + addlen + 1);
     441          37 :                                         if (tmp == NULL) {
     442           0 :                                                 GDKfree(result);
     443           0 :                                                 return NULL;
     444             :                                         }
     445          37 :                                         result = tmp;
     446          37 :                                         *max_result = len_result + addlen + 1;
     447             :                                 }
     448        3671 :                                 if (backrefs[i].start > prevend) {
     449           2 :                                         strncpy(result + len_result, replacement + prevend,
     450           2 :                                                         backrefs[i].start - prevend);
     451           2 :                                         len_result += backrefs[i].start - prevend;
     452             :                                 }
     453        3671 :                                 if (len > 0) {
     454        3671 :                                         strncpy(result + len_result, origin_str + off, len);
     455        3671 :                                         len_result += len;
     456             :                                 }
     457        3671 :                                 prevend = backrefs[i].end;
     458             :                         }
     459             :                         /* copy rest of replacement string (after last backref) */
     460        3671 :                         addlen = len_replacement - prevend;
     461        3671 :                         if (addlen > 0) {
     462           2 :                                 if (len_result + addlen >= *max_result) {
     463           1 :                                         tmp = GDKrealloc(result, len_result + addlen + 1);
     464           1 :                                         if (tmp == NULL) {
     465           0 :                                                 GDKfree(result);
     466           0 :                                                 return NULL;
     467             :                                         }
     468           1 :                                         result = tmp;
     469           1 :                                         *max_result = len_result + addlen + 1;
     470             :                                 }
     471           2 :                                 strncpy(result + len_result, replacement + prevend, addlen);
     472           2 :                                 len_result += addlen;
     473             :                         }
     474             :                 }
     475             :                 // In case of an empty match just advance the offset by 1
     476      143586 :                 offset = ovector[1] + empty_match_correction;
     477             :                 // and copy the character that we just advanced over
     478      143586 :                 if (empty_match_correction) {
     479          14 :                         strncpy(result + len_result, origin_str + ovector[1], 1);
     480          14 :                         ++len_result;
     481             :                 }
     482             :                 // before we loop around check with the offset - 1 if we had an empty match
     483             :                 // since we manually advanced the offset by one. otherwise we gonna skip a
     484             :                 // replacement at the end of the string
     485      143586 :         } while ((offset - empty_match_correction) < len_origin_str && global);
     486             : 
     487       51387 :         if (offset < len_origin_str) {
     488       47716 :                 addlen = len_origin_str - offset;
     489       47716 :                 if (len_result + addlen >= *max_result) {
     490         575 :                         tmp = GDKrealloc(result, len_result + addlen + 1);
     491         575 :                         if (tmp == NULL) {
     492           0 :                                 GDKfree(result);
     493           0 :                                 return NULL;
     494             :                         }
     495         575 :                         result = tmp;
     496         575 :                         *max_result = len_result + addlen + 1;
     497             :                 }
     498       47716 :                 strncpy(result + len_result, origin_str + offset, addlen);
     499       47716 :                 len_result += addlen;
     500             :         }
     501             :         /* null terminate string */
     502       51387 :         result[len_result] = '\0';
     503       51387 :         return result;
     504             : }
     505             : #endif
     506             : 
     507             : static str
     508          14 : pcre_replace(str *res, const char *origin_str, const char *pattern,
     509             :                          const char *replacement, const char *flags, bool global)
     510             : {
     511             : #ifdef HAVE_LIBPCRE
     512          14 :         const char *err_p = NULL;
     513          14 :         pcre *pcre_code = NULL;
     514          14 :         pcre_extra *extra;
     515          14 :         char *tmpres;
     516          14 :         int max_result;
     517          14 :         int i, errpos = 0;
     518          14 :         int compile_options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
     519          14 :         int exec_options = PCRE_NOTEMPTY | PCRE_NO_UTF8_CHECK;
     520          14 :         int *ovector, ovecsize;
     521          14 :         int len_origin_str = (int) strlen(origin_str);
     522          14 :         int len_replacement = (int) strlen(replacement);
     523          14 :         struct backref backrefs[MAX_NR_REFS];
     524          14 :         int nbackrefs = 0;
     525             : 
     526          21 :         while (*flags) {
     527           7 :                 switch (*flags) {
     528             :                 case 'e':
     529             :                         exec_options &= ~PCRE_NOTEMPTY;
     530             :                         break;
     531           1 :                 case 'i':
     532           1 :                         compile_options |= PCRE_CASELESS;
     533           1 :                         break;
     534           1 :                 case 'm':
     535           1 :                         compile_options |= PCRE_MULTILINE;
     536           1 :                         break;
     537           1 :                 case 's':
     538           1 :                         compile_options |= PCRE_DOTALL;
     539           1 :                         break;
     540           1 :                 case 'x':
     541           1 :                         compile_options |= PCRE_EXTENDED;
     542           1 :                         break;
     543           0 :                 default:
     544           0 :                         throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     545             :                                   ILLEGAL_ARGUMENT ": unsupported flag character '%c'\n",
     546             :                                   *flags);
     547             :                 }
     548           7 :                 flags++;
     549             :         }
     550             : 
     551          14 :         if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) {
     552           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     553             :                           OPERATION_FAILED
     554             :                           ": pcre compile of pattern (%s) failed at %d with\n'%s'.\n",
     555             :                           pattern, errpos, err_p);
     556             :         }
     557             : 
     558             :         /* Since the compiled pattern is going to be used several times, it is
     559             :          * worth spending more time analyzing it in order to speed up the time
     560             :          * taken for matching.
     561             :          */
     562          14 :         extra = pcre_study(pcre_code, 0, &err_p);
     563          14 :         if (err_p != NULL) {
     564           0 :                 pcre_free(pcre_code);
     565           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     566             :                           OPERATION_FAILED
     567             :                           ": pcre study of pattern (%s) failed with '%s'.\n", pattern,
     568             :                           err_p);
     569             :         }
     570          14 :         pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i);
     571          14 :         ovecsize = (i + 1) * 3;
     572          14 :         if ((ovector = (int *) GDKmalloc(sizeof(int) * ovecsize)) == NULL) {
     573           0 :                 pcre_free_study(extra);
     574           0 :                 pcre_free(pcre_code);
     575           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     576             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     577             :         }
     578             : 
     579             :         /* identify back references in the replacement string */
     580          14 :         nbackrefs = parse_replacement(replacement, len_replacement,
     581             :                                                                   backrefs, MAX_NR_REFS);
     582             : 
     583          14 :         max_result = len_origin_str + 1;
     584          14 :         tmpres = GDKmalloc(max_result);
     585          14 :         if (tmpres == NULL) {
     586           0 :                 GDKfree(ovector);
     587           0 :                 pcre_free_study(extra);
     588           0 :                 pcre_free(pcre_code);
     589           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     590             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     591             :         }
     592             : 
     593          14 :         tmpres = single_replace(pcre_code, extra, origin_str, len_origin_str,
     594             :                                                         exec_options, ovector, ovecsize, replacement,
     595             :                                                         len_replacement, backrefs, nbackrefs, global,
     596             :                                                         tmpres, &max_result);
     597          14 :         GDKfree(ovector);
     598          14 :         pcre_free_study(extra);
     599          14 :         pcre_free(pcre_code);
     600          14 :         if (tmpres == NULL)
     601           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     602             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     603             : 
     604          14 :         *res = tmpres;
     605          14 :         return MAL_SUCCEED;
     606             : #else
     607             :         (void) res;
     608             :         (void) origin_str;
     609             :         (void) pattern;
     610             :         (void) replacement;
     611             :         (void) flags;
     612             :         (void) global;
     613             :         throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     614             :                   "Database was compiled without PCRE support.");
     615             : #endif
     616             : }
     617             : 
     618             : static str
     619          90 : pcre_replace_bat(BAT **res, BAT *origin_strs, const char *pattern,
     620             :                                  const char *replacement, const char *flags, bool global)
     621             : {
     622             : #ifdef HAVE_LIBPCRE
     623          90 :         const char *err_p = NULL;
     624          90 :         char *tmpres;
     625          90 :         int i, errpos = 0;
     626          90 :         int compile_options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
     627          90 :         int exec_options = PCRE_NOTEMPTY | PCRE_NO_UTF8_CHECK;
     628          90 :         pcre *pcre_code = NULL;
     629          90 :         pcre_extra *extra;
     630          90 :         BAT *tmpbat;
     631          90 :         BUN p, q;
     632          90 :         int *ovector, ovecsize;
     633          90 :         int len_replacement = (int) strlen(replacement);
     634          90 :         struct backref backrefs[MAX_NR_REFS];
     635          90 :         int nbackrefs = 0;
     636          90 :         const char *origin_str;
     637          90 :         int max_dest_size = 0;
     638             : 
     639         126 :         while (*flags) {
     640          36 :                 switch (*flags) {
     641             :                 case 'e':
     642             :                         exec_options &= ~PCRE_NOTEMPTY;
     643             :                         break;
     644           9 :                 case 'i':
     645           9 :                         compile_options |= PCRE_CASELESS;
     646           9 :                         break;
     647          18 :                 case 'm':
     648          18 :                         compile_options |= PCRE_MULTILINE;
     649          18 :                         break;
     650           9 :                 case 's':
     651           9 :                         compile_options |= PCRE_DOTALL;
     652           9 :                         break;
     653           0 :                 case 'x':
     654           0 :                         compile_options |= PCRE_EXTENDED;
     655           0 :                         break;
     656           0 :                 default:
     657           0 :                         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     658             :                                   ILLEGAL_ARGUMENT ": unsupported flag character '%c'\n",
     659             :                                   *flags);
     660             :                 }
     661          36 :                 flags++;
     662             :         }
     663             : 
     664          90 :         if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) {
     665           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     666             :                           OPERATION_FAILED
     667             :                           ": pcre compile of pattern (%s) failed at %d with\n'%s'.\n",
     668             :                           pattern, errpos, err_p);
     669             :         }
     670             : 
     671             :         /* Since the compiled pattern is going to be used several times,
     672             :          * it is worth spending more time analyzing it in order to speed
     673             :          * up the time taken for matching.
     674             :          */
     675         180 :         extra = pcre_study(pcre_code,
     676          90 :                                            BATcount(origin_strs) >
     677             :                                            JIT_COMPILE_MIN ? PCRE_STUDY_JIT_COMPILE : 0, &err_p);
     678          90 :         if (err_p != NULL) {
     679           0 :                 pcre_free(pcre_code);
     680           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     681             :                           OPERATION_FAILED);
     682             :         }
     683          90 :         pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i);
     684          90 :         ovecsize = (i + 1) * 3;
     685          90 :         if ((ovector = (int *) GDKzalloc(sizeof(int) * ovecsize)) == NULL) {
     686           0 :                 pcre_free_study(extra);
     687           0 :                 pcre_free(pcre_code);
     688           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     689             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     690             :         }
     691             : 
     692             :         /* identify back references in the replacement string */
     693          90 :         nbackrefs = parse_replacement(replacement, len_replacement,
     694             :                                                                   backrefs, MAX_NR_REFS);
     695             : 
     696          90 :         tmpbat = COLnew(origin_strs->hseqbase, TYPE_str, BATcount(origin_strs),
     697             :                                         TRANSIENT);
     698             : 
     699             :         /* the buffer for all destination strings is allocated only once,
     700             :          * and extended when needed */
     701          90 :         max_dest_size = len_replacement + 1;
     702          90 :         tmpres = GDKmalloc(max_dest_size);
     703          90 :         if (tmpbat == NULL || tmpres == NULL) {
     704           0 :                 pcre_free_study(extra);
     705           0 :                 pcre_free(pcre_code);
     706           0 :                 GDKfree(ovector);
     707           0 :                 BBPreclaim(tmpbat);
     708           0 :                 GDKfree(tmpres);
     709           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     710             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     711             :         }
     712          90 :         BATiter origin_strsi = bat_iterator(origin_strs);
     713       51410 :         BATloop(origin_strs, p, q) {
     714       51320 :                 origin_str = BUNtvar(origin_strsi, p);
     715      102692 :                 tmpres = single_replace(pcre_code, extra, origin_str,
     716       51320 :                                                                 (int) strlen(origin_str), exec_options,
     717             :                                                                 ovector, ovecsize, replacement,
     718             :                                                                 len_replacement, backrefs, nbackrefs, global,
     719             :                                                                 tmpres, &max_dest_size);
     720       51372 :                 if (tmpres == NULL || BUNappend(tmpbat, tmpres, false) != GDK_SUCCEED) {
     721           0 :                         bat_iterator_end(&origin_strsi);
     722           0 :                         pcre_free_study(extra);
     723           0 :                         pcre_free(pcre_code);
     724           0 :                         GDKfree(ovector);
     725           0 :                         GDKfree(tmpres);
     726           0 :                         BBPreclaim(tmpbat);
     727           0 :                         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     728             :                                   SQLSTATE(HY013) MAL_MALLOC_FAIL);
     729             :                 }
     730             :         }
     731          90 :         bat_iterator_end(&origin_strsi);
     732          90 :         pcre_free_study(extra);
     733          90 :         pcre_free(pcre_code);
     734          90 :         GDKfree(ovector);
     735          90 :         GDKfree(tmpres);
     736          90 :         *res = tmpbat;
     737          90 :         return MAL_SUCCEED;
     738             : #else
     739             :         (void) res;
     740             :         (void) origin_strs;
     741             :         (void) pattern;
     742             :         (void) replacement;
     743             :         (void) flags;
     744             :         (void) global;
     745             :         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     746             :                   "Database was compiled without PCRE support.");
     747             : #endif
     748             : }
     749             : 
     750             : static str
     751           4 : pcre_match_with_flags(bit *ret, const char *val, const char *pat,
     752             :                                           const char *flags)
     753             : {
     754           4 :         int pos;
     755             : #ifdef HAVE_LIBPCRE
     756           4 :         const char *err_p = NULL;
     757           4 :         int errpos = 0;
     758           4 :         int options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK | PCRE_DOTALL;
     759           4 :         pcre *re;
     760             : #else
     761             :         int options = REG_NOSUB | REG_EXTENDED;
     762             :         regex_t re;
     763             :         int errcode;
     764             :         int retval;
     765             : #endif
     766             : 
     767           4 :         while (*flags) {
     768           0 :                 switch (*flags) {
     769           0 :                 case 'i':
     770             : #ifdef HAVE_LIBPCRE
     771           0 :                         options |= PCRE_CASELESS;
     772             : #else
     773             :                         options |= REG_ICASE;
     774             : #endif
     775           0 :                         break;
     776           0 :                 case 'm':
     777             : #ifdef HAVE_LIBPCRE
     778           0 :                         options |= PCRE_MULTILINE;
     779             : #else
     780             :                         options |= REG_NEWLINE;
     781             : #endif
     782           0 :                         break;
     783             : #ifdef HAVE_LIBPCRE
     784           0 :                 case 's':
     785           0 :                         options |= PCRE_DOTALL;
     786           0 :                         break;
     787             : #endif
     788           0 :                 case 'x':
     789             : #ifdef HAVE_LIBPCRE
     790           0 :                         options |= PCRE_EXTENDED;
     791             : #else
     792             :                         options |= REG_EXTENDED;
     793             : #endif
     794           0 :                         break;
     795           0 :                 default:
     796           0 :                         throw(MAL, "pcre.match", ILLEGAL_ARGUMENT
     797             :                                   ": unsupported flag character '%c'\n", *flags);
     798             :                 }
     799           0 :                 flags++;
     800             :         }
     801           4 :         if (strNil(val)) {
     802           0 :                 *ret = FALSE;
     803           0 :                 return MAL_SUCCEED;
     804             :         }
     805             : 
     806             : #ifdef HAVE_LIBPCRE
     807           4 :         if ((re = pcre_compile(pat, options, &err_p, &errpos, NULL)) == NULL)
     808             : #else
     809             :         if ((errcode = regcomp(&re, pat, options)) != 0)
     810             : #endif
     811             :         {
     812           0 :                 throw(MAL, "pcre.match", OPERATION_FAILED
     813             :                           ": compilation of regular expression (%s) failed "
     814             : #ifdef HAVE_LIBPCRE
     815             :                           "at %d with '%s'", pat, errpos, err_p
     816             : #else
     817             :                           , pat
     818             : #endif
     819             :                                 );
     820             :         }
     821             : #ifdef HAVE_LIBPCRE
     822           4 :         pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, PCRE_NO_UTF8_CHECK,
     823             :                                         NULL, 0);
     824           4 :         pcre_free(re);
     825             : #else
     826             :         retval = regexec(&re, val, (size_t) 0, NULL, 0);
     827             :         pos = retval == REG_NOMATCH ? -1 : (retval == REG_ENOSYS ? -2 : 0);
     828             :         regfree(&re);
     829             : #endif
     830           4 :         if (pos >= 0)
     831           3 :                 *ret = TRUE;
     832           1 :         else if (pos == -1)
     833           1 :                 *ret = FALSE;
     834             :         else
     835           0 :                 throw(MAL, "pcre.match", OPERATION_FAILED
     836             :                           ": matching of regular expression (%s) failed with %d", pat, pos);
     837             :         return MAL_SUCCEED;
     838             : }
     839             : 
     840             : #ifdef HAVE_LIBPCRE
     841             : /* special characters in PCRE that need to be escaped */
     842             : static const char pcre_specials[] = "$()*+.?[\\]^{|}";
     843             : #else
     844             : /* special characters in POSIX basic regular expressions that need to
     845             :  * be escaped */
     846             : static const char pcre_specials[] = "$()*+.?[\\^{|";
     847             : #endif
     848             : 
     849             : /* change SQL LIKE pattern into PCRE pattern */
     850             : static str
     851           6 : sql2pcre(str *r, const char *pat, const char *esc_str)
     852             : {
     853           6 :         int escaped = 0;
     854           6 :         int hasWildcard = 0;
     855           6 :         char *ppat;
     856          12 :         int esc = strNil(esc_str) ? 0 : esc_str[0];     /* should change to utf8_convert() */
     857           6 :         int specials;
     858           6 :         int c;
     859             : 
     860           6 :         if (strlen(esc_str) > 1)
     861           0 :                 throw(MAL, "pcre.sql2pcre",
     862             :                           SQLSTATE(22019) ILLEGAL_ARGUMENT
     863             :                           ": ESCAPE string must have length 1");
     864           6 :         if (pat == NULL)
     865           0 :                 throw(MAL, "pcre.sql2pcre",
     866             :                           SQLSTATE(22019) ILLEGAL_ARGUMENT
     867             :                           ": (I)LIKE pattern must not be NULL");
     868           6 :         ppat = GDKmalloc(strlen(pat) * 3 +
     869             :                                          3 /* 3 = "^'the translated regexp'$0" */ );
     870           6 :         if (ppat == NULL)
     871           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     872             : 
     873           6 :         *r = ppat;
     874             :         /* The escape character can be a char which is special in a PCRE
     875             :          * expression.  If the user used the "+" char as escape and has "++"
     876             :          * in their pattern, then replacing this with "+" is not correct and
     877             :          * should be "\+" instead. */
     878           6 :         specials = (esc && strchr(pcre_specials, esc) != NULL);
     879             : 
     880           6 :         *ppat++ = '^';
     881          17 :         while ((c = *pat++) != 0) {
     882          11 :                 if (c == esc) {
     883           2 :                         if (escaped) {
     884           1 :                                 if (specials) { /* change ++ into \+ */
     885           1 :                                         *ppat++ = esc;
     886             :                                 } else {                /* do not escape simple escape symbols */
     887           0 :                                         ppat[-1] = esc; /* overwrite backslash */
     888             :                                 }
     889             :                                 escaped = 0;
     890             :                         } else {
     891           1 :                                 *ppat++ = '\\';
     892           1 :                                 escaped = 1;
     893             :                         }
     894             :                         hasWildcard = 1;
     895           9 :                 } else if (strchr(pcre_specials, c) != NULL) {
     896             :                         /* escape PCRE special chars, avoid double backslash if the
     897             :                          * user uses an invalid escape sequence */
     898           2 :                         if (!escaped)
     899           2 :                                 *ppat++ = '\\';
     900           2 :                         *ppat++ = c;
     901           2 :                         hasWildcard = 1;
     902           2 :                         escaped = 0;
     903           7 :                 } else if (c == '%' && !escaped) {
     904           3 :                         *ppat++ = '.';
     905           3 :                         *ppat++ = '*';
     906           3 :                         *ppat++ = '?';
     907           3 :                         hasWildcard = 1;
     908             :                         /* collapse multiple %, but only if it isn't the escape */
     909           3 :                         if (esc != '%')
     910           3 :                                 while (*pat == '%')
     911           0 :                                         pat++;
     912           4 :                 } else if (c == '_' && !escaped) {
     913           3 :                         *ppat++ = '.';
     914           3 :                         hasWildcard = 1;
     915             :                 } else {
     916           1 :                         if (escaped) {
     917           0 :                                 ppat[-1] = c;   /* overwrite backslash of invalid escape */
     918             :                         } else {
     919           1 :                                 *ppat++ = c;
     920             :                         }
     921             :                         escaped = 0;
     922             :                 }
     923             :         }
     924             :         /* no wildcard or escape character at end of string */
     925           6 :         if (!hasWildcard || escaped) {
     926           1 :                 GDKfree(*r);
     927           1 :                 *r = NULL;
     928           1 :                 if (escaped)
     929           0 :                         throw(MAL, "pcre.sql2pcre",
     930             :                                   SQLSTATE(22019) ILLEGAL_ARGUMENT
     931             :                                   ": (I)LIKE pattern must not end with escape character");
     932           1 :                 *r = GDKstrdup(str_nil);
     933           1 :                 if (*r == NULL)
     934           0 :                         throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     935             :         } else {
     936           5 :                 *ppat++ = '$';
     937           5 :                 *ppat = 0;
     938             :         }
     939             :         return MAL_SUCCEED;
     940             : }
     941             : 
     942             : #ifdef HAVE_LIBPCRE
     943             : /* change SQL PATINDEX pattern into PCRE pattern */
     944             : static str
     945          25 : pat2pcre(str *r, const char *pat)
     946             : {
     947          25 :         size_t len = strlen(pat);
     948          25 :         char *ppat = GDKmalloc(len * 2 + 3 /* 3 = "^'the translated regexp'$0" */ );
     949          25 :         int start = 0;
     950             : 
     951          25 :         if (ppat == NULL)
     952           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     953          25 :         *r = ppat;
     954          77 :         while (*pat) {
     955          52 :                 int c = *pat++;
     956             : 
     957          52 :                 if (strchr(pcre_specials, c) != NULL) {
     958          17 :                         *ppat++ = '\\';
     959          17 :                         *ppat++ = c;
     960          35 :                 } else if (c == '%') {
     961           3 :                         if (start && *pat) {
     962           0 :                                 *ppat++ = '.';
     963           0 :                                 *ppat++ = '*';
     964             :                         }
     965           3 :                         start++;
     966          32 :                 } else if (c == '_') {
     967           0 :                         *ppat++ = '.';
     968             :                 } else {
     969          32 :                         *ppat++ = c;
     970             :                 }
     971             :         }
     972          25 :         *ppat = 0;
     973          25 :         return MAL_SUCCEED;
     974             : }
     975             : #endif
     976             : 
     977             : /*
     978             :  * @+ Wrapping
     979             :  */
     980             : 
     981             : static str
     982          14 : PCREreplace_wrap(str *res, const char *const *or, const char *const *pat,
     983             :                                  const char *const *repl, const char *const *flags)
     984             : {
     985          14 :         return pcre_replace(res, *or, *pat, *repl, *flags, true);
     986             : }
     987             : 
     988             : static str
     989           0 : PCREreplacefirst_wrap(str *res, const char *const *or, const char *const *pat,
     990             :                                           const char *const *repl, const char *const *flags)
     991             : {
     992           0 :         return pcre_replace(res, *or, *pat, *repl, *flags, false);
     993             : }
     994             : 
     995             : static str
     996          90 : PCREreplace_bat_wrap(bat *res, const bat *bid, const char *const *pat,
     997             :                                          const char *const *repl, const char *const *flags)
     998             : {
     999          90 :         BAT *b, *bn = NULL;
    1000          90 :         str msg;
    1001          90 :         if ((b = BATdescriptor(*bid)) == NULL)
    1002           0 :                 throw(MAL, "batpcre.replace", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1003             : 
    1004          90 :         msg = pcre_replace_bat(&bn, b, *pat, *repl, *flags, true);
    1005          90 :         if (msg == MAL_SUCCEED) {
    1006          90 :                 *res = bn->batCacheid;
    1007          90 :                 BBPkeepref(bn);
    1008             :         }
    1009          90 :         BBPunfix(b->batCacheid);
    1010          90 :         return msg;
    1011             : }
    1012             : 
    1013             : static str
    1014           0 : PCREreplacefirst_bat_wrap(bat *res, const bat *bid, const char *const *pat,
    1015             :                                                   const char *const *repl, const char *const *flags)
    1016             : {
    1017           0 :         BAT *b, *bn = NULL;
    1018           0 :         str msg;
    1019           0 :         if ((b = BATdescriptor(*bid)) == NULL)
    1020           0 :                 throw(MAL, "batpcre.replace_first", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1021             : 
    1022           0 :         msg = pcre_replace_bat(&bn, b, *pat, *repl, *flags, false);
    1023           0 :         if (msg == MAL_SUCCEED) {
    1024           0 :                 *res = bn->batCacheid;
    1025           0 :                 BBPkeepref(bn);
    1026             :         }
    1027           0 :         BBPunfix(b->batCacheid);
    1028           0 :         return msg;
    1029             : }
    1030             : 
    1031             : static str
    1032           4 : PCREmatch(bit *ret, const char *const *val, const char *const *pat)
    1033             : {
    1034           4 :         return pcre_match_with_flags(ret, *val, *pat, "");
    1035             : }
    1036             : 
    1037             : static str
    1038           0 : PCREimatch(bit *ret, const char *const *val, const char *const *pat)
    1039             : {
    1040           0 :         return pcre_match_with_flags(ret, *val, *pat, "i");
    1041             : }
    1042             : 
    1043             : static str
    1044          25 : PCREindex(int *res, const pcre *pattern, const char *const *s)
    1045             : {
    1046             : #ifdef HAVE_LIBPCRE
    1047          25 :         int v[3];
    1048             : 
    1049          25 :         v[0] = v[1] = *res = 0;
    1050          25 :         if (pcre_exec(pattern, NULL, *s, (int) strlen(*s), 0,
    1051             :                                   PCRE_NO_UTF8_CHECK, v, 3) >= 0) {
    1052          23 :                 *res = v[1];
    1053             :         }
    1054          25 :         return MAL_SUCCEED;
    1055             : #else
    1056             :         (void) res;
    1057             :         (void) pattern;
    1058             :         (void) s;
    1059             :         throw(MAL, "pcre.index", "Database was compiled without PCRE support.");
    1060             : #endif
    1061             : }
    1062             : 
    1063             : static str
    1064          27 : PCREpatindex(int *ret, const char *const *pat, const char *const *val)
    1065             : {
    1066             : #ifdef HAVE_LIBPCRE
    1067          27 :         pcre *re = NULL;
    1068          27 :         char *ppat = NULL, *msg;
    1069             : 
    1070          53 :         if (strNil(*pat) || strNil(*val)) {
    1071           2 :                 *ret = int_nil;
    1072           2 :                 return MAL_SUCCEED;
    1073             :         }
    1074             : 
    1075          25 :         if ((msg = pat2pcre(&ppat, *pat)) != MAL_SUCCEED)
    1076             :                 return msg;
    1077          25 :         if ((msg = pcre_compile_wrap(&re, ppat, FALSE)) != MAL_SUCCEED) {
    1078           0 :                 GDKfree(ppat);
    1079           0 :                 return msg;
    1080             :         }
    1081          25 :         GDKfree(ppat);
    1082          25 :         msg = PCREindex(ret, re, val);
    1083          25 :         pcre_free(re);
    1084          25 :         return msg;
    1085             : #else
    1086             :         (void) ret;
    1087             :         (void) pat;
    1088             :         (void) val;
    1089             :         throw(MAL, "pcre.patindex", "Database was compiled without PCRE support.");
    1090             : #endif
    1091             : }
    1092             : 
    1093             : static str
    1094           0 : PCREquote(str *ret, const char *const *val)
    1095             : {
    1096           0 :         char *p;
    1097           0 :         const char *s = *val;
    1098             : 
    1099           0 :         *ret = p = GDKmalloc(strlen(s) * 2 + 1);        /* certainly long enough */
    1100           0 :         if (p == NULL)
    1101           0 :                 throw(MAL, "pcre.quote", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1102             :         /* quote all non-alphanumeric ASCII characters (i.e. leave
    1103             :            non-ASCII and alphanumeric alone) */
    1104           0 :         while (*s) {
    1105           0 :                 if (!((*s & 0x80) != 0 ||
    1106           0 :                           ('a' <= *s && *s <= 'z') ||
    1107           0 :                           ('A' <= *s && *s <= 'Z') || isdigit((unsigned char) *s)))
    1108           0 :                         *p++ = '\\';
    1109           0 :                 *p++ = *s++;
    1110             :         }
    1111           0 :         *p = 0;
    1112           0 :         return MAL_SUCCEED;
    1113             : }
    1114             : 
    1115             : static str
    1116           6 : PCREsql2pcre(str *ret, const char *const *pat, const char *const *esc)
    1117             : {
    1118           6 :         return sql2pcre(ret, *pat, *esc);
    1119             : }
    1120             : 
    1121             : static inline str
    1122        7341 : choose_like_path(bool *use_re, bool *use_strcmp, bool *empty,
    1123             :                                  const char *pat, const char *esc)
    1124             : {
    1125        7341 :         str res = MAL_SUCCEED;
    1126        7341 :         *use_re = false;
    1127        7341 :         *use_strcmp = false;
    1128        7341 :         *empty = false;
    1129             : 
    1130             : 
    1131       14192 :         if (strNil(pat) || strNil(esc)) {
    1132         490 :                 *empty = true;
    1133             :         } else {
    1134        6851 :                 if (!mnre_is_pattern_properly_escaped(pat, (unsigned char) *esc))
    1135           5 :                         throw(MAL, "pcre.sql2pcre",
    1136             :                                   SQLSTATE(22019) ILLEGAL_ARGUMENT
    1137             :                                   ": (I)LIKE pattern must not end with escape character");
    1138        6845 :                 if (is_strcmpable(pat, esc)) {
    1139         882 :                         *use_re = true;
    1140         882 :                         *use_strcmp = true;
    1141             :                 } else {
    1142        5963 :                         *use_re = true;
    1143             :                 }
    1144             :         }
    1145             :         return res;
    1146             : }
    1147             : 
    1148             : static str
    1149         234 : PCRElike_imp(bit *ret, const char *const *s, const char *const *pat,
    1150             :                          const char *const *esc, const bit *isens)
    1151             : {
    1152         234 :         str res = MAL_SUCCEED;
    1153         234 :         bool use_re = false, use_strcmp = false, empty = false;
    1154         234 :         struct RE *re = NULL;
    1155             : 
    1156         234 :         if ((res = choose_like_path(&use_re, &use_strcmp, &empty,
    1157             :                                                                 *pat, *esc)) != MAL_SUCCEED)
    1158             :                 return res;
    1159             : 
    1160         459 :         MT_thread_setalgorithm(empty ? "pcrelike: trivially empty" : use_strcmp ?
    1161         225 :                                                    "pcrelike: pattern matching using strcmp" : use_re ?
    1162             :                                                    "pcrelike: pattern matching using RE" :
    1163             :                                                    "pcrelike: pattern matching using pcre");
    1164             : 
    1165         468 :         if (strNil(*s) || empty) {
    1166           0 :                 *ret = bit_nil;
    1167             :         } else {
    1168         234 :                 if (use_strcmp) {
    1169           9 :                         *ret = *isens ? GDKstrcasecmp(*s, *pat) == 0
    1170           7 :                                 : strcmp(*s, *pat) == 0;
    1171             :                 } else {
    1172         225 :                         if (!(re = mnre_create(*pat, *isens, (unsigned char) **esc)))
    1173           0 :                                 res = createException(MAL, "pcre.like4",
    1174             :                                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1175             :                         else
    1176         225 :                                 *ret = mnre_match(*s, re);
    1177             :                 }
    1178             :         }
    1179             : 
    1180         234 :         if (re)
    1181         225 :                 mnre_destroy(re);
    1182             :         return res;
    1183             : }
    1184             : 
    1185             : static str
    1186         234 : PCRElike(bit *ret, const char *const *s, const char *const *pat,
    1187             :                  const char *const *esc, const bit *isens)
    1188             : {
    1189         229 :         return PCRElike_imp(ret, s, pat, esc, isens);
    1190             : }
    1191             : 
    1192             : static str
    1193           5 : PCREnotlike(bit *ret, const char *const *s, const char *const *pat,
    1194             :                         const char *const *esc, const bit *isens)
    1195             : {
    1196           5 :         str tmp;
    1197           5 :         bit r;
    1198             : 
    1199           5 :         rethrow("str.not_like", tmp, PCRElike(&r, s, pat, esc, isens));
    1200           5 :         *ret = r == bit_nil ? bit_nil : !r;
    1201           5 :         return MAL_SUCCEED;
    1202             : }
    1203             : 
    1204             : static inline str
    1205        6613 : mnre_like_build(struct RE **re, const char *pat, bool caseignore,
    1206             :                           bool use_strcmp, uint32_t esc)
    1207             : {
    1208        6613 :         if (!use_strcmp) {
    1209        5740 :                 if (!(*re = mnre_create(pat, caseignore, esc)))
    1210           0 :                         return createException(MAL, "pcre.re_like_build",
    1211             :                                                                    SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1212             :         }
    1213             :         return MAL_SUCCEED;
    1214             : }
    1215             : 
    1216             : static inline bit
    1217        6251 : mnre_like_proj_apply(const char *s, const struct RE *restrict re,
    1218             :                                    const char *pat,
    1219             :                                    bool caseignore, bool anti, bool use_strcmp)
    1220             : {
    1221        6251 :         if (strNil(s))
    1222         446 :                 return bit_nil;
    1223        5805 :         if (use_strcmp) {
    1224        1134 :                 if (caseignore) {
    1225         492 :                         if (anti)
    1226         461 :                                 return GDKstrcasecmp(s, pat) != 0;
    1227             :                         else
    1228          31 :                                 return GDKstrcasecmp(s, pat) == 0;
    1229             :                 } else {
    1230         642 :                         if (anti)
    1231         302 :                                 return strcmp(s, pat) != 0;
    1232             :                         else
    1233         340 :                                 return strcmp(s, pat) == 0;
    1234             :                 }
    1235             :         } else {
    1236        4671 :                 if (anti)
    1237         137 :                         return !mnre_match(s, re);
    1238             :                 else
    1239        4534 :                         return mnre_match(s, re);
    1240             :         }
    1241             : }
    1242             : 
    1243             : static inline void
    1244        6723 : mnre_like_clean(struct RE **re)
    1245             : {
    1246        6723 :         if (*re) {
    1247         562 :                 mnre_destroy(*re);
    1248        5740 :                 *re = NULL;
    1249             :         }
    1250             : }
    1251             : 
    1252             : static str
    1253         684 : BATPCRElike_imp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
    1254             :                                 const char *const *esc, const bit *isens, const bit *not)
    1255             : {
    1256         684 :         str msg = MAL_SUCCEED;
    1257         684 :         BAT *b = NULL, *pbn = NULL, *bn = NULL;
    1258         684 :         const char *input = NULL;
    1259         684 :         bool use_re = false,
    1260         684 :                 use_strcmp = false,
    1261         684 :                 empty = false,
    1262         684 :                 isensitive = (bool) *isens,
    1263         684 :                 anti = (bool) *not,
    1264         684 :                 has_nil = false,
    1265         684 :                 input_is_a_bat = isaBatType(getArgType(mb, pci, 1)),
    1266         684 :                 pattern_is_a_bat = isaBatType(getArgType(mb, pci, 2));
    1267         684 :         bat *r = getArgReference_bat(stk, pci, 0);
    1268         684 :         BUN q = 0;
    1269         684 :         bit *restrict ret = NULL;
    1270         684 :         struct RE *mnre_simple = NULL;
    1271         684 :         BATiter bi = (BATiter) { 0 }, pi;
    1272             : 
    1273         684 :         (void) cntxt;
    1274         684 :         if (input_is_a_bat) {
    1275         684 :                 bat *bid = getArgReference_bat(stk, pci, 1);
    1276         684 :                 if (!(b = BATdescriptor(*bid))) {
    1277           0 :                         msg = createException(MAL, "batalgebra.batpcrelike3",
    1278             :                                                                   SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1279           0 :                         goto bailout;
    1280             :                 }
    1281             :         }
    1282         684 :         if (pattern_is_a_bat) {
    1283          88 :                 bat *pb = getArgReference_bat(stk, pci, 2);
    1284          88 :                 if (!(pbn = BATdescriptor(*pb))) {
    1285           0 :                         msg = createException(MAL, "batalgebra.batpcrelike3",
    1286             :                                                                   SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1287           0 :                         goto bailout;
    1288             :                 }
    1289             :         }
    1290         684 :         assert((!b || ATOMstorage(b->ttype) == TYPE_str)
    1291             :                    && (!pbn || ATOMstorage(pbn->ttype) == TYPE_str));
    1292             : 
    1293         684 :         q = BATcount(b ? b : pbn);
    1294         684 :         if (!(bn = COLnew(b ? b->hseqbase : pbn->hseqbase, TYPE_bit, q, TRANSIENT))) {
    1295           0 :                 msg = createException(MAL, "batalgebra.batpcrelike3",
    1296             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1297           0 :                 goto bailout;
    1298             :         }
    1299         684 :         ret = (bit *) Tloc(bn, 0);
    1300             : 
    1301         684 :         if (pattern_is_a_bat) {
    1302          88 :                 pi = bat_iterator(pbn);
    1303          88 :                 if (b)
    1304          88 :                         bi = bat_iterator(b);
    1305             :                 else
    1306           0 :                         input = *getArgReference_str(stk, pci, 1);
    1307             : 
    1308        1219 :                 for (BUN p = 0; p < q; p++) {
    1309        1131 :                         const char *next_input = b ? BUNtvar(bi, p) : input,
    1310        1131 :                                 *np = BUNtvar(pi, p);
    1311             : 
    1312        1131 :                         if ((msg = choose_like_path(&use_re, &use_strcmp, &empty,
    1313             :                                                                                 np, *esc)) != MAL_SUCCEED) {
    1314           0 :                                 bat_iterator_end(&pi);
    1315           0 :                                 if (b)
    1316           0 :                                         bat_iterator_end(&bi);
    1317           0 :                                 goto bailout;
    1318             :                         }
    1319             : 
    1320        1132 :                         if (empty) {
    1321         462 :                                 ret[p] = bit_nil;
    1322             :                         } else {
    1323         670 :                                 if ((msg = mnre_like_build(&mnre_simple, np, isensitive,
    1324             :                                                                                  use_strcmp,
    1325         670 :                                                                                  (unsigned char) **esc)) != MAL_SUCCEED) {
    1326           0 :                                         bat_iterator_end(&pi);
    1327           0 :                                         if (b)
    1328           0 :                                                 bat_iterator_end(&bi);
    1329           0 :                                         goto bailout;
    1330             :                                 }
    1331         670 :                                 ret[p] = mnre_like_proj_apply(next_input, mnre_simple, np,
    1332             :                                                                                         isensitive, anti, use_strcmp);
    1333         670 :                                 mnre_like_clean(&mnre_simple);
    1334             :                         }
    1335        1132 :                         has_nil |= is_bit_nil(ret[p]);
    1336             :                 }
    1337          88 :                 bat_iterator_end(&pi);
    1338          88 :                 if (b)
    1339          88 :                         bat_iterator_end(&bi);
    1340             :         } else {
    1341         596 :                 const char *pat = *getArgReference_str(stk, pci, 2);
    1342         596 :                 if ((msg = choose_like_path(&use_re, &use_strcmp, &empty,
    1343             :                                                                         pat, *esc)) != MAL_SUCCEED)
    1344           5 :                         goto bailout;
    1345             : 
    1346         591 :                 bi = bat_iterator(b);
    1347        1123 :                 MT_thread_setalgorithm(empty ? "pcrelike: trivially empty" : use_strcmp
    1348             :                                                            ? "pcrelike: pattern matching using strcmp" :
    1349         532 :                                                            use_re ? "pcrelike: pattern matching using RE" :
    1350             :                                                            "pcrelike: pattern matching using pcre");
    1351             : 
    1352         591 :                 if (empty) {
    1353          43 :                         for (BUN p = 0; p < q; p++)
    1354          26 :                                 ret[p] = bit_nil;
    1355             :                         has_nil = true;
    1356             :                 } else {
    1357         574 :                         if ((msg = mnre_like_build(&mnre_simple, pat, isensitive, use_strcmp,
    1358         574 :                                                                          (unsigned char) **esc)) != MAL_SUCCEED) {
    1359           0 :                                 bat_iterator_end(&bi);
    1360           0 :                                 goto bailout;
    1361             :                         }
    1362        6155 :                         for (BUN p = 0; p < q; p++) {
    1363        5582 :                                 const char *s = BUNtvar(bi, p);
    1364        5585 :                                 ret[p] = mnre_like_proj_apply(s, mnre_simple, pat, isensitive,
    1365             :                                                                                         anti, use_strcmp);
    1366        5578 :                                 has_nil |= is_bit_nil(ret[p]);
    1367             :                         }
    1368             :                 }
    1369         590 :                 bat_iterator_end(&bi);
    1370             :         }
    1371             : 
    1372         684 :   bailout:
    1373         684 :         mnre_like_clean(&mnre_simple);
    1374         684 :         if (bn && !msg) {
    1375         679 :                 BATsetcount(bn, q);
    1376         679 :                 bn->tnil = has_nil;
    1377         679 :                 bn->tnonil = !has_nil;
    1378         679 :                 bn->tkey = BATcount(bn) <= 1;
    1379         679 :                 bn->tsorted = BATcount(bn) <= 1;
    1380         679 :                 bn->trevsorted = BATcount(bn) <= 1;
    1381         679 :                 *r = bn->batCacheid;
    1382         679 :                 BBPkeepref(bn);
    1383           5 :         } else if (bn)
    1384           5 :                 BBPreclaim(bn);
    1385         684 :         BBPreclaim(b);
    1386         684 :         BBPreclaim(pbn);
    1387         684 :         return msg;
    1388             : }
    1389             : 
    1390             : static str
    1391         541 : BATPCRElike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1392             : {
    1393         541 :         const char *esc = *getArgReference_str(stk, pci, 3);
    1394         541 :         const bit *ci = getArgReference_bit(stk, pci, 4);
    1395         541 :         bit no = FALSE;
    1396             : 
    1397         541 :         return BATPCRElike_imp(cntxt, mb, stk, pci, &esc, ci, &no);
    1398             : }
    1399             : 
    1400             : static str
    1401         143 : BATPCREnotlike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1402             : {
    1403         143 :         const char *esc = *getArgReference_str(stk, pci, 3);
    1404         143 :         const bit *ci = getArgReference_bit(stk, pci, 4);
    1405         143 :         bit yes = TRUE;
    1406             : 
    1407         143 :         return BATPCRElike_imp(cntxt, mb, stk, pci, &esc, ci, &yes);
    1408             : }
    1409             : 
    1410             : /* scan select loop with or without candidates */
    1411             : #define pcrescanloop(TEST, KEEP_NULLS)                                                                  \
    1412             :         do {                                                                                                                            \
    1413             :                 TRC_DEBUG(ALGO,                                                                                                 \
    1414             :                                   "PCREselect(b=%s#"BUNFMT",anti=%d): "                                     \
    1415             :                                   "scanselect %s\n", BATgetId(b), BATcount(b),                        \
    1416             :                                   anti, #TEST);                                                                                 \
    1417             :                 if (!s || BATtdense(s)) {                                                                               \
    1418             :                         for (; p < q; p++) {                                                                         \
    1419             :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,                                             \
    1420             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    1421             :                                 const char *restrict v = BUNtvar(bi, p - off);                  \
    1422             :                                 if ((TEST) || ((KEEP_NULLS) && strNil(v)))                              \
    1423             :                                         vals[cnt++] = p;                                                                        \
    1424             :                         }                                                                                                                       \
    1425             :                 } else {                                                                                                                \
    1426             :                         for (; p < ncands; p++) {                                                                    \
    1427             :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,                                             \
    1428             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    1429             :                                 oid o = canditer_next(ci);                                                              \
    1430             :                                 const char *restrict v = BUNtvar(bi, o - off);                  \
    1431             :                                 if ((TEST) || ((KEEP_NULLS) && strNil(v)))                              \
    1432             :                                         vals[cnt++] = o;                                                                        \
    1433             :                         }                                                                                                                       \
    1434             :                 }                                                                                                                               \
    1435             :         } while (0)
    1436             : 
    1437             : static str
    1438        5260 : mnre_likeselect(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
    1439             :                           BUN *rcnt, const char *pat, bool caseignore, bool anti,
    1440             :                           bool use_strcmp, uint32_t esc, bool keep_nulls)
    1441             : {
    1442        5260 :         BATiter bi = bat_iterator(b);
    1443        5263 :         BUN cnt = 0, ncands = ci->ncand;
    1444        5263 :         oid off = b->hseqbase, *restrict vals = Tloc(bn, 0);
    1445        5263 :         struct RE *re = NULL;
    1446        5263 :         str msg = MAL_SUCCEED;
    1447             : 
    1448        5263 :         size_t counter = 0;
    1449        5263 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1450             : 
    1451        5263 :         if ((msg = mnre_like_build(&re, pat, caseignore, use_strcmp,
    1452             :                                                          esc)) != MAL_SUCCEED)
    1453           0 :                 goto bailout;
    1454             : 
    1455        5262 :         if (use_strcmp) {
    1456          85 :                 if (caseignore) {
    1457          27 :                         if (anti)
    1458          58 :                                 pcrescanloop(!strNil(v)
    1459             :                                                          && GDKstrcasecmp(v, pat) != 0, keep_nulls);
    1460             :                         else
    1461         671 :                                 pcrescanloop(!strNil(v)
    1462             :                                                          && GDKstrcasecmp(v, pat) == 0, keep_nulls);
    1463             :                 } else {
    1464          58 :                         if (anti)
    1465           5 :                                 pcrescanloop(!strNil(v) && strcmp(v, pat) != 0, keep_nulls);
    1466             :                         else
    1467        8481 :                                 pcrescanloop(!strNil(v) && strcmp(v, pat) == 0, keep_nulls);
    1468             :                 }
    1469             :         } else {
    1470        5177 :                 if (caseignore) {
    1471          74 :                         if (anti) {
    1472          44 :                                 pcrescanloop(!strNil(v)
    1473             :                                                          && !mnre_match(v, re), keep_nulls);
    1474             :                         } else {
    1475       11485 :                                 pcrescanloop(!strNil(v)
    1476             :                                                          && mnre_match(v, re), keep_nulls);
    1477             :                         }
    1478             :                 } else {
    1479        5103 :                         if (anti)
    1480       55258 :                                 pcrescanloop(!strNil(v)
    1481             :                                                          && !mnre_match(v, re), keep_nulls);
    1482             :                         else
    1483      174899 :                                 pcrescanloop(!strNil(v)
    1484             :                                                          && mnre_match(v, re), keep_nulls);
    1485             :                 }
    1486             :         }
    1487             : 
    1488          41 :   bailout:
    1489        5259 :         bat_iterator_end(&bi);
    1490        5263 :         mnre_like_clean(&re);
    1491        5263 :         *rcnt = cnt;
    1492        5263 :         return msg;
    1493             : }
    1494             : 
    1495             : static str
    1496        5263 : PCRElikeselect(bat *ret, const bat *bid, const bat *sid, const char *const *pat,
    1497             :                            const char *const *esc, const bit *caseignore, const bit *anti)
    1498             : {
    1499        5263 :         BAT *b, *s = NULL, *bn = NULL, *old_s = NULL;
    1500        5263 :         str msg = MAL_SUCCEED;
    1501        5263 :         bool use_re = false,
    1502        5263 :                 use_strcmp = false,
    1503        5263 :                 empty = false;
    1504        5263 :         bool with_strimps = false;
    1505        5263 :         bool with_strimps_anti = false;
    1506        5263 :         BUN p = 0, q = 0, rcnt = 0;
    1507        5263 :         struct canditer ci;
    1508             : 
    1509        5263 :         if ((b = BATdescriptor(*bid)) == NULL) {
    1510           0 :                 msg = createException(MAL, "algebra.likeselect",
    1511             :                                                           SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1512           0 :                 goto bailout;
    1513             :         }
    1514        5263 :         if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) {
    1515           0 :                 msg = createException(MAL, "algebra.likeselect",
    1516             :                                                           SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1517           0 :                 goto bailout;
    1518             :         }
    1519             : 
    1520        5262 :         assert(ATOMstorage(b->ttype) == TYPE_str);
    1521             : 
    1522        5262 :         if ((msg = choose_like_path(&use_re, &use_strcmp, &empty,
    1523             :                                                                 *pat, *esc)) != MAL_SUCCEED)
    1524           0 :                 goto bailout;
    1525             : 
    1526        5248 :         if (empty) {
    1527           0 :                 if (!(bn = BATdense(0, 0, 0)))
    1528           0 :                         msg = createException(MAL, "algebra.likeselect",
    1529             :                                                                   SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1530             : 
    1531           0 :                 goto bailout;
    1532             :         }
    1533             :         /* Since the strimp pre-filtering of a LIKE query produces a superset of the actual result the complement of that
    1534             :          * set will necessarily reject some of the matching entries in the NOT LIKE query.
    1535             :          *
    1536             :          * In this case we run the PCRElikeselect as a LIKE query with strimps and return the complement of the result,
    1537             :          * taking extra care to not return NULLs. This currently means that we do not run strimps for NOT LIKE queries if
    1538             :          * the BAT contains NULLs.
    1539             :          */
    1540        5248 :         if (BAThasstrimps(b)) {
    1541          24 :                 if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
    1542          24 :                         BAT *tmp_s = STRMPfilter(b, s, *pat, *anti);
    1543          24 :                         if (tmp_s) {
    1544          24 :                                 old_s = s;
    1545          24 :                                 s = tmp_s;
    1546          24 :                                 if (!*anti)
    1547             :                                         with_strimps = true;
    1548             :                                 else
    1549           0 :                                         with_strimps_anti = true;
    1550             :                         }
    1551             :                 } else {                                /* If we cannot filter with the strimp just continue normally */
    1552           0 :                         GDKclrerr();
    1553             :                 }
    1554             :         }
    1555             : 
    1556             : 
    1557        5261 :         MT_thread_setalgorithm(use_strcmp
    1558        5261 :                                                    ? (with_strimps ?
    1559             :                                                           "pcrelike: pattern matching using strcmp with strimps"
    1560             :                                                           : (with_strimps_anti ?
    1561             :                                                                  "pcrelike: pattern matching using strcmp with strimps anti"
    1562        5261 :                                                                  : "pcrelike: pattern matching using strcmp")) :
    1563        5176 :                                                    use_re ? (with_strimps ?
    1564             :                                                                          "pcrelike: pattern matching using RE with strimps"
    1565             :                                                                          : (with_strimps_anti ?
    1566             :                                                                                 "pcrelike: patterm matching using RE with strimps anti"
    1567             :                                                                                 :
    1568             :                                                                                 "pcrelike: pattern matching using RE"))
    1569             :                                                    : (with_strimps ?
    1570             :                                                           "pcrelike: pattern matching using pcre with strimps"
    1571             :                                                           : (with_strimps_anti ?
    1572             :                                                                  "pcrelike: pattermatching using pcre with strimps anti"
    1573             :                                                                  : "pcrelike: pattern matching using pcre")));
    1574             : 
    1575        5263 :         canditer_init(&ci, b, s);
    1576        5263 :         if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
    1577           0 :                 msg = createException(MAL, "algebra.likeselect",
    1578             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1579           0 :                 goto bailout;
    1580             :         }
    1581             : 
    1582        5261 :         if (!s || BATtdense(s)) {
    1583        1125 :                 if (s) {
    1584        4095 :                         assert(BATtdense(s));
    1585        4095 :                         p = (BUN) s->tseqbase;
    1586        4095 :                         q = p + BATcount(s);
    1587        4095 :                         if ((oid) p < b->hseqbase)
    1588             :                                 p = b->hseqbase;
    1589        4095 :                         if ((oid) q > b->hseqbase + BATcount(b))
    1590             :                                 q = b->hseqbase + BATcount(b);
    1591             :                 } else {
    1592        1125 :                         p = b->hseqbase;
    1593        1125 :                         q = BATcount(b) + b->hseqbase;
    1594             :                 }
    1595             :         }
    1596             : 
    1597        5261 :         msg = mnre_likeselect(bn, b, s, &ci, p, q, &rcnt, *pat, *caseignore, *anti
    1598         961 :                                                 && !with_strimps_anti, use_strcmp,
    1599        5261 :                                                 (unsigned char) **esc, with_strimps_anti);
    1600             : 
    1601        5263 :         if (!msg) {                                     /* set some properties */
    1602        5263 :                 BATsetcount(bn, rcnt);
    1603        5263 :                 bn->tsorted = true;
    1604        5263 :                 bn->trevsorted = bn->batCount <= 1;
    1605        5263 :                 bn->tkey = true;
    1606        5263 :                 bn->tnil = false;
    1607        5263 :                 bn->tnonil = true;
    1608        5263 :                 bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const oid *) Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
    1609        5263 :                 if (with_strimps_anti) {
    1610             :                         /* Reverse the result taking into account the original candidate list. */
    1611             :                         // BAT *rev = BATdiffcand(BATdense(b->hseqbase, 0, b->batCount), bn);
    1612           0 :                         BAT *rev;
    1613           0 :                         if (old_s) {
    1614           0 :                                 rev = BATdiffcand(old_s, bn);
    1615             : #ifndef NDEBUG
    1616           0 :                                 BAT *is = BATintersectcand(old_s, bn);
    1617           0 :                                 if (is) {
    1618           0 :                                         assert(is->batCount == bn->batCount);
    1619           0 :                                         BBPreclaim(is);
    1620             :                                 }
    1621           0 :                                 assert(rev->batCount == old_s->batCount - bn->batCount);
    1622             : #endif
    1623             :                         }
    1624             : 
    1625             :                         else
    1626           0 :                                 rev = BATnegcands(0, b->batCount, bn);
    1627             :                         /* BAT *rev = BATnegcands(0, b->batCount, bn); */
    1628           0 :                         BBPunfix(bn->batCacheid);
    1629           0 :                         bn = rev;
    1630             :                 }
    1631             :         }
    1632             : 
    1633             : 
    1634        5263 :   bailout:
    1635        5263 :         BBPreclaim(b);
    1636        5262 :         BBPreclaim(s);
    1637        5263 :         BBPreclaim(old_s);
    1638        5263 :         if (bn && !msg) {
    1639        5263 :                 *ret = bn->batCacheid;
    1640        5263 :                 BBPkeepref(bn);
    1641           0 :         } else if (bn)
    1642           0 :                 BBPreclaim(bn);
    1643        5262 :         return msg;
    1644             : }
    1645             : 
    1646             : #define APPEND(b, o)    (((oid *) b->theap->base)[b->batCount++] = (o))
    1647             : #define VALUE(s, x)             (s##vars + VarHeapVal(s##vals, (x), s##i.width))
    1648             : 
    1649             : /* nested loop implementation for PCRE join */
    1650             : #define pcre_join_loop(STRCMP, MNRE_MATCH)                                                              \
    1651             :         do {                                                                                                                            \
    1652             :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                                       \
    1653             :                         ro = canditer_next(&rci);                                                                   \
    1654             :                         vr = VALUE(r, ro - rbase);                                                                      \
    1655             :                         nl = 0;                                                                                                         \
    1656             :                         use_re = use_strcmp = empty = false;                                            \
    1657             :                         if ((msg = choose_like_path(&use_re, &use_strcmp, &empty, vr, esc))) \
    1658             :                                 goto bailout;                                                                                   \
    1659             :                         if (!empty) {                                                                                           \
    1660             :                                 if ((msg = mnre_like_build(&re, vr, false, use_strcmp, (unsigned char) *esc)) != MAL_SUCCEED) \
    1661             :                                         goto bailout;                                                                           \
    1662             :                                 canditer_reset(&lci);                                                                       \
    1663             :                                 TIMEOUT_LOOP_IDX_DECL(lidx, lci.ncand, qry_ctx) {               \
    1664             :                                         lo = canditer_next(&lci);                                                   \
    1665             :                                         vl = VALUE(l, lo - lbase);                                                      \
    1666             :                                         if (strNil(vl)) {                                                                       \
    1667             :                                                 continue;                                                                               \
    1668             :                                         } else {                                                                                        \
    1669             :                                                 if (use_strcmp) {                                                               \
    1670             :                                                         if (STRCMP)                                                                     \
    1671             :                                                                 continue;                                                               \
    1672             :                                                 } else {                                                                                \
    1673             :                                                         assert(re);                                                                     \
    1674             :                                                         if (MNRE_MATCH)                                                         \
    1675             :                                                                 continue;                                                               \
    1676             :                                                 }                                                                                               \
    1677             :                                         }                                                                                                       \
    1678             :                                         if (BATcount(r1) == BATcapacity(r1)) {                          \
    1679             :                                                 newcap = BATgrows(r1);                                                  \
    1680             :                                                 BATsetcount(r1, BATcount(r1));                                  \
    1681             :                                                 if (r2)                                                                                 \
    1682             :                                                         BATsetcount(r2, BATcount(r2));                          \
    1683             :                                                 if (BATextend(r1, newcap) != GDK_SUCCEED || (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \
    1684             :                                                         msg = createException(MAL, "pcre.join", SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    1685             :                                                         goto bailout;                                                           \
    1686             :                                                 }                                                                                               \
    1687             :                                                 assert(!r2 || BATcapacity(r1) == BATcapacity(r2)); \
    1688             :                                         }                                                                                                       \
    1689             :                                         if (BATcount(r1) > 0) {                                                              \
    1690             :                                                 if (lastl + 1 != lo)                                                    \
    1691             :                                                         r1->tseqbase = oid_nil;                                              \
    1692             :                                                 if (nl == 0) {                                                                  \
    1693             :                                                         if (r2)                                                                         \
    1694             :                                                                 r2->trevsorted = false;                                      \
    1695             :                                                         if (lastl > lo) {                                                    \
    1696             :                                                                 r1->tsorted = false;                                 \
    1697             :                                                                 r1->tkey = false;                                            \
    1698             :                                                         } else if (lastl < lo) {                                     \
    1699             :                                                                 r1->trevsorted = false;                                      \
    1700             :                                                         } else {                                                                        \
    1701             :                                                                 r1->tkey = false;                                            \
    1702             :                                                         }                                                                                       \
    1703             :                                                 }                                                                                               \
    1704             :                                         }                                                                                                       \
    1705             :                                         APPEND(r1, lo);                                                                         \
    1706             :                                         if (r2)                                                                                         \
    1707             :                                                 APPEND(r2, ro);                                                                 \
    1708             :                                         lastl = lo;                                                                                     \
    1709             :                                         nl++;                                                                                           \
    1710             :                                 }                                                                                                               \
    1711             :                                 mnre_like_clean(&re);                                                                               \
    1712             :                                 TIMEOUT_CHECK(qry_ctx,                                                                  \
    1713             :                                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    1714             :                         }                                                                                                                       \
    1715             :                         if (r2) {                                                                                                       \
    1716             :                                 if (nl > 1) {                                                                                        \
    1717             :                                         r2->tkey = false;                                                                    \
    1718             :                                         r2->tseqbase = oid_nil;                                                              \
    1719             :                                         r1->trevsorted = false;                                                              \
    1720             :                                 } else if (nl == 0) {                                                                   \
    1721             :                                         rskipped = BATcount(r2) > 0;                                         \
    1722             :                                 } else if (rskipped) {                                                                  \
    1723             :                                         r2->tseqbase = oid_nil;                                                              \
    1724             :                                 }                                                                                                               \
    1725             :                         } else if (nl > 1) {                                                                         \
    1726             :                                 r1->trevsorted = false;                                                                      \
    1727             :                         }                                                                                                                       \
    1728             :                 }                                                                                                                               \
    1729             :         } while (0)
    1730             : 
    1731             : static char *
    1732          39 : pcrejoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, const char *esc,
    1733             :                  bit caseignore, bit anti)
    1734             : {
    1735          39 :         struct canditer lci, rci;
    1736          39 :         const char *lvals, *rvals, *lvars, *rvars, *vl, *vr;
    1737          39 :         int rskipped = 0;                       /* whether we skipped values in r */
    1738          39 :         oid lbase, rbase, lo, ro, lastl = 0;    /* last value inserted into r1 */
    1739          39 :         BUN nl, newcap;
    1740          39 :         char *msg = MAL_SUCCEED;
    1741          39 :         struct RE *re = NULL;
    1742          39 :         bool use_re = false,
    1743          39 :                 use_strcmp = false,
    1744          39 :                 empty = false;
    1745          39 :         lng t0 = 0;
    1746             : 
    1747          39 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1748             : 
    1749          39 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    1750             : 
    1751         117 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1752          39 :         assert(ATOMtype(l->ttype) == TYPE_str);
    1753             : 
    1754          39 :         BAT *ol = NULL, *or = NULL;
    1755          39 :         if (caseignore) {
    1756           3 :                 ol = l;
    1757           3 :                 or = r;
    1758           3 :                 l = BATcasefold(l, NULL);
    1759           3 :                 r = BATcasefold(r, NULL);
    1760           3 :                 if (l == NULL || r == NULL) {
    1761           0 :                         BBPreclaim(l);
    1762           0 :                         BBPreclaim(r);
    1763           0 :                         throw(MAL, "pcre.join", GDK_EXCEPTION);
    1764             :                 }
    1765             :         }
    1766             : 
    1767          39 :         canditer_init(&lci, l, sl);
    1768          39 :         canditer_init(&rci, r, sr);
    1769             : 
    1770          39 :         BATiter li = bat_iterator(l);
    1771          39 :         BATiter ri = bat_iterator(r);
    1772          39 :         lbase = l->hseqbase;
    1773          39 :         rbase = r->hseqbase;
    1774          39 :         lvals = (const char *) li.base;
    1775          39 :         rvals = (const char *) ri.base;
    1776          39 :         assert(ri.vh && r->ttype);
    1777          39 :         lvars = li.vh->base;
    1778          39 :         rvars = ri.vh->base;
    1779             : 
    1780          39 :         r1->tkey = true;
    1781          39 :         r1->tsorted = true;
    1782          39 :         r1->trevsorted = true;
    1783          39 :         r1->tnil = false;
    1784          39 :         r1->tnonil = true;
    1785          39 :         if (r2) {
    1786          17 :                 r2->tkey = true;
    1787          17 :                 r2->tsorted = true;
    1788          17 :                 r2->trevsorted = true;
    1789          17 :                 r2->tnil = false;
    1790          17 :                 r2->tnonil = true;
    1791             :         }
    1792             : 
    1793          39 :         if (anti) {
    1794         482 :                 pcre_join_loop(strcmp(vl, vr) == 0, mnre_match(vl, re));
    1795             :         } else {
    1796         456 :                 pcre_join_loop(strcmp(vl, vr) != 0, !mnre_match(vl, re));
    1797             :         }
    1798          39 :         bat_iterator_end(&li);
    1799          39 :         bat_iterator_end(&ri);
    1800          39 :         if (ol) {
    1801           3 :                 BBPreclaim(l);
    1802           3 :                 BBPreclaim(r);
    1803           3 :                 l = ol;
    1804           3 :                 r = or;
    1805             :         }
    1806             : 
    1807          39 :         assert(!r2 || BATcount(r1) == BATcount(r2));
    1808             :         /* also set other bits of heap to correct value to indicate size */
    1809          39 :         BATsetcount(r1, BATcount(r1));
    1810          39 :         if (r2)
    1811          17 :                 BATsetcount(r2, BATcount(r2));
    1812          39 :         if (BATcount(r1) > 0) {
    1813          26 :                 if (BATtdense(r1))
    1814           7 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1815          26 :                 if (r2 && BATtdense(r2))
    1816           8 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1817             :         } else {
    1818          13 :                 r1->tseqbase = 0;
    1819          13 :                 if (r2)
    1820           6 :                         r2->tseqbase = 0;
    1821             :         }
    1822             : 
    1823          14 :         if (r2)
    1824          17 :                 TRC_DEBUG(ALGO,
    1825             :                                   "l=%s#" BUNFMT "[%s]%s%s,"
    1826             :                                   "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    1827             :                                   "sr=%s#" BUNFMT "%s%s -> "
    1828             :                                   "%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s (" LLFMT " usec)\n",
    1829             :                                   BATgetId(l), BATcount(l), ATOMname(l->ttype),
    1830             :                                   l->tsorted ? "-sorted" : "",
    1831             :                                   l->trevsorted ? "-revsorted" : "",
    1832             :                                   BATgetId(r), BATcount(r), ATOMname(r->ttype),
    1833             :                                   r->tsorted ? "-sorted" : "",
    1834             :                                   r->trevsorted ? "-revsorted" : "",
    1835             :                                   sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
    1836             :                                   sl && sl->tsorted ? "-sorted" : "",
    1837             :                                   sl && sl->trevsorted ? "-revsorted" : "",
    1838             :                                   sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
    1839             :                                   sr && sr->tsorted ? "-sorted" : "",
    1840             :                                   sr && sr->trevsorted ? "-revsorted" : "",
    1841             :                                   BATgetId(r1), BATcount(r1),
    1842             :                                   r1->tsorted ? "-sorted" : "",
    1843             :                                   r1->trevsorted ? "-revsorted" : "",
    1844             :                                   BATgetId(r2), BATcount(r2),
    1845             :                                   r2->tsorted ? "-sorted" : "",
    1846             :                                   r2->trevsorted ? "-revsorted" : "", GDKusec() - t0);
    1847             :         else
    1848          22 :                 TRC_DEBUG(ALGO,
    1849             :                                   "l=%s#" BUNFMT "[%s]%s%s,"
    1850             :                                   "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    1851             :                                   "sr=%s#" BUNFMT "%s%s -> "
    1852             :                                   "%s#" BUNFMT "%s%s (" LLFMT " usec)\n",
    1853             :                                   BATgetId(l), BATcount(l), ATOMname(l->ttype),
    1854             :                                   l->tsorted ? "-sorted" : "",
    1855             :                                   l->trevsorted ? "-revsorted" : "",
    1856             :                                   BATgetId(r), BATcount(r), ATOMname(r->ttype),
    1857             :                                   r->tsorted ? "-sorted" : "",
    1858             :                                   r->trevsorted ? "-revsorted" : "",
    1859             :                                   sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
    1860             :                                   sl && sl->tsorted ? "-sorted" : "",
    1861             :                                   sl && sl->trevsorted ? "-revsorted" : "",
    1862             :                                   sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
    1863             :                                   sr && sr->tsorted ? "-sorted" : "",
    1864             :                                   sr && sr->trevsorted ? "-revsorted" : "",
    1865             :                                   BATgetId(r1), BATcount(r1),
    1866             :                                   r1->tsorted ? "-sorted" : "",
    1867             :                                   r1->trevsorted ? "-revsorted" : "", GDKusec() - t0);
    1868             :         return MAL_SUCCEED;
    1869             : 
    1870           0 :   bailout:
    1871           0 :         bat_iterator_end(&li);
    1872           0 :         bat_iterator_end(&ri);
    1873           0 :         mnre_like_clean(&re);
    1874           0 :         assert(msg != MAL_SUCCEED);
    1875             :         return msg;
    1876             : }
    1877             : 
    1878             : static str
    1879          39 : PCREjoin(bat *r1, bat *r2, bat lid, bat rid, bat slid, bat srid, bat elid,
    1880             :                  bat ciid, bit anti)
    1881             : {
    1882          39 :         BAT *left = NULL, *right = NULL, *escape = NULL, *caseignore = NULL,
    1883          39 :                 *candleft = NULL, *candright = NULL;
    1884          39 :         BAT *result1 = NULL, *result2 = NULL;
    1885          39 :         char *msg = MAL_SUCCEED;
    1886          39 :         const char *esc = "";
    1887          39 :         bit ci;
    1888          39 :         BATiter bi;
    1889             : 
    1890          39 :         if ((left = BATdescriptor(lid)) == NULL)
    1891           0 :                 goto fail;
    1892          39 :         if ((right = BATdescriptor(rid)) == NULL)
    1893           0 :                 goto fail;
    1894          39 :         if ((escape = BATdescriptor(elid)) == NULL)
    1895           0 :                 goto fail;
    1896          39 :         if ((caseignore = BATdescriptor(ciid)) == NULL)
    1897           0 :                 goto fail;
    1898          39 :         if (!is_bat_nil(slid) && (candleft = BATdescriptor(slid)) == NULL)
    1899           0 :                 goto fail;
    1900          39 :         if (!is_bat_nil(srid) && (candright = BATdescriptor(srid)) == NULL)
    1901           0 :                 goto fail;
    1902          39 :         result1 = COLnew(0, TYPE_oid, BATcount(left), TRANSIENT);
    1903          39 :         if (r2)
    1904          17 :                 result2 = COLnew(0, TYPE_oid, BATcount(left), TRANSIENT);
    1905          39 :         if (!result1 || (r2 && !result2)) {
    1906           0 :                 msg = createException(MAL, "pcre.join",
    1907             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1908           0 :                 goto fail;
    1909             :         }
    1910          39 :         result1->tnil = false;
    1911          39 :         result1->tnonil = true;
    1912          39 :         result1->tkey = true;
    1913          39 :         result1->tsorted = true;
    1914          39 :         result1->trevsorted = true;
    1915          39 :         result1->tseqbase = 0;
    1916          39 :         if (r2) {
    1917          17 :                 result2->tnil = false;
    1918          17 :                 result2->tnonil = true;
    1919          17 :                 result2->tkey = true;
    1920          17 :                 result2->tsorted = true;
    1921          17 :                 result2->trevsorted = true;
    1922          17 :                 result2->tseqbase = 0;
    1923             :         }
    1924          39 :         if (BATcount(escape) != 1) {
    1925           0 :                 msg = createException(MAL, "pcre.join",
    1926             :                                                           SQLSTATE(42000)
    1927             :                                                           "At the moment, only one value is allowed for the escape input at pcre join");
    1928           0 :                 goto fail;
    1929             :         }
    1930          39 :         if (BATcount(caseignore) != 1) {
    1931           0 :                 msg = createException(MAL, "pcre.join",
    1932             :                                                           SQLSTATE(42000)
    1933             :                                                           "At the moment, only one value is allowed for the case ignore input at pcre join");
    1934           0 :                 goto fail;
    1935             :         }
    1936          39 :         bi = bat_iterator(caseignore);
    1937          39 :         ci = *(bit *) BUNtloc(bi, 0);
    1938          39 :         bat_iterator_end(&bi);
    1939          39 :         bi = bat_iterator(escape);
    1940          39 :         esc = BUNtvar(bi, 0);
    1941          39 :         msg = pcrejoin(result1, result2, left, right, candleft, candright, esc, ci,
    1942             :                                    anti);
    1943          39 :         bat_iterator_end(&bi);
    1944          39 :         if (msg)
    1945           0 :                 goto fail;
    1946          39 :         *r1 = result1->batCacheid;
    1947          39 :         BBPkeepref(result1);
    1948          38 :         if (r2) {
    1949          17 :                 *r2 = result2->batCacheid;
    1950          17 :                 BBPkeepref(result2);
    1951             :         }
    1952          38 :         BBPunfix(left->batCacheid);
    1953          39 :         BBPunfix(right->batCacheid);
    1954          39 :         BBPreclaim(escape);
    1955          39 :         BBPreclaim(caseignore);
    1956          39 :         BBPreclaim(candleft);
    1957          39 :         BBPreclaim(candright);
    1958             :         return MAL_SUCCEED;
    1959             : 
    1960           0 :   fail:
    1961           0 :         BBPreclaim(left);
    1962           0 :         BBPreclaim(right);
    1963           0 :         BBPreclaim(escape);
    1964           0 :         BBPreclaim(caseignore);
    1965           0 :         BBPreclaim(candleft);
    1966           0 :         BBPreclaim(candright);
    1967           0 :         BBPreclaim(result1);
    1968           0 :         BBPreclaim(result2);
    1969           0 :         if (msg)
    1970             :                 return msg;
    1971           0 :         throw(MAL, "pcre.join", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1972             : }
    1973             : 
    1974             : static str
    1975          17 : LIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *elid,
    1976             :                  const bat *cid, const bat *slid, const bat *srid,
    1977             :                  const bit *nil_matches, const lng *estimate, const bit *anti)
    1978             : {
    1979          17 :         (void) nil_matches;
    1980          17 :         (void) estimate;
    1981          17 :         return PCREjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0,
    1982          17 :                                         *elid, *cid, *anti);
    1983             : }
    1984             : 
    1985             : static str
    1986          22 : LIKEjoin1(bat *r1, const bat *lid, const bat *rid, const bat *elid,
    1987             :                   const bat *cid, const bat *slid, const bat *srid,
    1988             :                   const bit *nil_matches, const lng *estimate, const bit *anti)
    1989             : {
    1990          22 :         (void) nil_matches;
    1991          22 :         (void) estimate;
    1992          22 :         return PCREjoin(r1, NULL, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0,
    1993          22 :                                         *elid, *cid, *anti);
    1994             : }
    1995             : 
    1996             : #include "mel.h"
    1997             : mel_atom pcre_init_atoms[] = {
    1998             :  { .name="pcre", },  { .cmp=NULL }
    1999             : };
    2000             : mel_func pcre_init_funcs[] = {
    2001             :  command("pcre", "index", PCREindex, false, "match a pattern, return matched position (or 0 when not found)", args(1,3, arg("",int),arg("pat",pcre),arg("s",str))),
    2002             :  command("pcre", "match", PCREmatch, false, "Perl Compatible Regular Expression pattern matching against a string", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2003             :  command("pcre", "imatch", PCREimatch, false, "Caseless Perl Compatible Regular Expression pattern matching against a string", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2004             :  command("pcre", "patindex", PCREpatindex, false, "Location of the first POSIX pattern matching against a string", args(1,3, arg("",int),arg("pat",str),arg("s",str))),
    2005             :  command("pcre", "replace", PCREreplace_wrap, false, "Replace _all_ matches of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2006             :  command("pcre", "replace_first", PCREreplacefirst_wrap, false, "Replace _the first_ match of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2007             :  command("pcre", "pcre_quote", PCREquote, false, "Return a PCRE pattern string that matches the argument exactly.", args(1,2, arg("",str),arg("s",str))),
    2008             :  command("pcre", "sql2pcre", PCREsql2pcre, false, "Convert a SQL like pattern with the given escape character into a PCRE pattern.", args(1,3, arg("",str),arg("pat",str),arg("esc",str))),
    2009             :  command("str", "replace", PCREreplace_wrap, false, "", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2010             :  command("batpcre", "replace", PCREreplace_bat_wrap, false, "", args(1,5, batarg("",str),batarg("orig",str),arg("pat",str),arg("repl",str),arg("flag",str))),
    2011             :  command("batpcre", "replace_first", PCREreplacefirst_bat_wrap, false, "", args(1,5, batarg("",str),batarg("orig",str),arg("pat",str),arg("repl",str),arg("flag",str))),
    2012             :  command("algebra", "like", PCRElike, false, "", args(1,5, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2013             :  command("algebra", "not_like", PCREnotlike, false, "", args(1,5, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2014             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2015             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2016             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2017             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2018             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2019             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2020             :  command("algebra", "likeselect", PCRElikeselect, false, "Select all head values of the first input BAT for which the\ntail value is \"like\" the given (SQL-style) pattern and for\nwhich the head value occurs in the tail of the second input\nBAT.\nInput is a dense-headed BAT, output is a dense-headed BAT with in\nthe tail the head value of the input BAT for which the\nrelationship holds.  The output BAT is sorted on the tail value.", args(1,7, batarg("",oid),batarg("b",str),batarg("s",oid),arg("pat",str),arg("esc",str),arg("caseignore",bit),arg("anti",bit))),
    2021             :  command("algebra", "likejoin", LIKEjoin, false, "Join the string bat L with the pattern bat R\nwith optional candidate lists SL and SR using pattern escape string ESC\nand doing a case sensitive match.\nThe result is two aligned bats with oids of matching rows.", args(2,11, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2022             :  command("algebra", "likejoin", LIKEjoin1, false, "The same as LIKEjoin_esc, but only produce one output", args(1,10,batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    2023             :  { .imp=NULL }
    2024             : };
    2025             : #include "mal_import.h"
    2026             : #ifdef _MSC_VER
    2027             : #undef read
    2028             : #pragma section(".CRT$XCU",read)
    2029             : #endif
    2030         350 : LIB_STARTUP_FUNC(init_pcre_mal)
    2031         350 : { mal_module("pcre", pcre_init_atoms, pcre_init_funcs); }

Generated by: LCOV version 1.14