LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - pcre.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 781 988 79.0 %
Date: 2024-12-20 21:24:02 Functions: 35 39 89.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * N. Nes
      15             :  * PCRE library interface
      16             :  * The  PCRE library is a set of functions that implement regular
      17             :  * expression pattern matching using the same syntax  and  semantics  as  Perl,
      18             :  * with  just  a  few  differences.  The  current  implementation of PCRE
      19             :  * (release 4.x) corresponds approximately with Perl 5.8, including  support
      20             :  * for  UTF-8  encoded  strings.   However,  this support has to be
      21             :  * explicitly enabled; it is not the default.
      22             :  *
      23             :  * ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
      24             :  */
      25             : #include "monetdb_config.h"
      26             : #include <string.h>
      27             : 
      28             : #include "mal.h"
      29             : #include "mal_client.h"
      30             : #include "mal_interpreter.h"
      31             : #include "mal_exception.h"
      32             : 
      33             : #include <wchar.h>
      34             : #include <wctype.h>
      35             : 
      36             : #ifdef HAVE_LIBPCRE
      37             : #include <pcre.h>
      38             : #ifndef PCRE_STUDY_JIT_COMPILE
      39             : /* old library version on e.g. EPEL 6 */
      40             : #define pcre_free_study(x)              pcre_free(x)
      41             : #define PCRE_STUDY_JIT_COMPILE  0
      42             : #endif
      43             : #define JIT_COMPILE_MIN 1024    /* when to try JIT compilation of patterns */
      44             : 
      45             : #else
      46             : 
      47             : #include <regex.h>
      48             : 
      49             : typedef regex_t pcre;
      50             : #endif
      51             : 
      52             : /* current implementation assumes simple %keyword% [keyw%]* */
      53             : struct RE {
      54             :         char *k;
      55             :         bool search:1, atend:1, case_ignore:1;
      56             :         size_t skip;                    /* number of codepoints to skip before matching */
      57             :         size_t len;                             /* number of bytes in string */
      58             :         size_t ulen;                    /* number of codepoints in string */
      59             :         struct RE *n;
      60             : };
      61             : 
      62             : /* We cannot use strcasecmp and strncasecmp since they work byte for
      63             :  * byte and don't deal with multibyte encodings (such as UTF-8). */
      64             : 
      65             : static inline bool
      66        8651 : mnre_is_pattern_properly_escaped(const char *pat, unsigned char esc)
      67             : {
      68        8651 :         bool escaped = false;
      69             : 
      70        8651 :         if (pat == 0)
      71             :                 return true;
      72       64360 :         while (*pat) {
      73       55709 :                 if (escaped) {
      74             :                         escaped = false;
      75       55537 :                 } else if ((unsigned char) *pat == esc) {
      76       55709 :                         escaped = true;
      77             :                 }
      78       55709 :                 pat++;
      79             :         }
      80        8651 :         return escaped ? false : true;
      81             : }
      82             : 
      83             : /* returns true if the pattern does not contain wildcard
      84             :  * characters ('%' or '_') and no character is escaped
      85             :  */
      86             : static inline bool
      87        8595 : is_strcmpable(const char *pat, const char *esc)
      88             : {
      89        8595 :         if (pat[strcspn(pat, "%_")])
      90             :                 return false;
      91        1902 :         return strlen(esc) == 0 || strNil(esc) || strstr(pat, esc) == NULL;
      92             : }
      93             : 
      94             : /* Match regular expression by comparing bytes.
      95             :  */
      96             : static inline bool
      97      381823 : mnre_match(const char *restrict s, const struct RE *restrict pattern)
      98             : {
      99      381823 :         const struct RE *r;
     100             : 
     101      441926 :         for (r = pattern; r; r = r->n) {
     102      406548 :                 for (size_t i = 0; i < r->skip; s++) {
     103       22679 :                         if (*s == 0)
     104             :                                 return false;
     105       23599 :                         i += (*s & 0xC0) != 0x80;
     106             :                 }
     107      383869 :                 if (r->search) {
     108      157947 :                         if (r->atend) {
     109             :                                 /* we're searching for a string at the end, so just skip
     110             :                                  * over everything and just compare with the tail of the
     111             :                                  * haystack */
     112       23069 :                                 size_t slen = strlen(s);
     113       23069 :                                 if (slen < r->ulen) {
     114             :                                         /* remaining string too short: each codepoint
     115             :                                          * requires at least one byte */
     116             :                                         return false;
     117             :                                 }
     118       23273 :                                 const char *e = s + slen;
     119       23273 :                                 if (!r->case_ignore) {
     120       23191 :                                         if (slen < r->len) {
     121             :                                                 /* remaining string is too short to match */
     122             :                                                 return false;
     123             :                                         }
     124       23305 :                                         e -= r->len;
     125       23305 :                                         if ((*e & 0xC0) == 0x80) {
     126             :                                                 /* not at start of a Unicode character, so
     127             :                                                  * cannot match (this test not strictly
     128             :                                                  * required: the strcmp should also return
     129             :                                                  * unequal) */
     130             :                                                 return false;
     131             :                                         }
     132       23570 :                                         return strcmp(e, r->k) == 0;
     133             :                                 }
     134             :                                 size_t ulen = r->ulen;
     135         363 :                                 while (e > s && ulen != 0) {
     136         281 :                                         ulen -= (*--e & 0xC0) != 0x80;
     137             :                                 }
     138             :                                 /* ulen != 0 means remaining string is too short */
     139         144 :                                 return ulen == 0 && GDKstrcasecmp(e, r->k) == 0;
     140             :                         }
     141             :                         /* in case we have a pattern consisting of % followed by _,
     142             :                          * we need to backtrack, so use recursion; here we know we
     143             :                          * have the %, look for an _ in the rest of the pattern
     144             :                          * (note %_ and _% are equivalent and is taken care of by
     145             :                          * the pattern construction in mnre_create) */
     146      140061 :                         for (const struct RE *p = r->n; p; p = p->n) {
     147        7325 :                                 if (p->skip != 0) {
     148        2142 :                                         struct RE pat = *r;
     149        2142 :                                         pat.search = false;
     150        2142 :                                         pat.skip = 0;
     151      161001 :                                         do {
     152      161001 :                                                 if (mnre_match(s, &pat))
     153             :                                                         return true;
     154      160804 :                                                 do
     155      160804 :                                                         s++;
     156      160815 :                                                 while (*s && (*s & 0xC0) == 0x80);
     157      160815 :                                         } while (*s != 0);
     158             :                                         return false;
     159             :                                 }
     160             :                         }
     161             :                 }
     162      358658 :                 if (r->k[0] == 0 && (r->search || *s == 0))
     163             :                         return true;
     164      358616 :                 if (r->case_ignore) {
     165       11335 :                         for (;;) {
     166       11335 :                                 if (r->search && (s = GDKstrcasestr(s, r->k)) == NULL)
     167             :                                         return false;
     168        3709 :                                 if (*s == '\0')
     169             :                                         return false;
     170             :                                 /* in "atend" comparison, compare whole string, else
     171             :                                  * only part */
     172        3765 :                                 if ((!r->search || r->atend) &&
     173          56 :                                         (r->atend ? GDKstrcasecmp(s, r->k) : GDKstrncasecmp(s, r->k, SIZE_MAX, r->len)) != 0) {
     174             :                                         /* no match */
     175          18 :                                         if (!r->search)
     176             :                                                 return false;
     177             :                                         /* try again with next character */
     178           0 :                                         do
     179           0 :                                                 s++;
     180           0 :                                         while (*s != '\0' && (*s & 0xC0) == 0x80);
     181           0 :                                         continue;
     182             :                                 }
     183             :                                 /* match; find end of match by counting codepoints */
     184       58157 :                                 for (size_t i = 0; *s && i < r->ulen; s++)
     185       54466 :                                         i += (*s & 0xC0) != 0x80;
     186             :                                 break;
     187             :                         }
     188             :                 } else {
     189      347281 :                         for (;;) {
     190      347281 :                                 if (r->search && (s = strstr(s, r->k)) == NULL)
     191             :                                         return false;
     192      246236 :                                 if (*s == '\0')
     193             :                                         return false;
     194             :                                 /* in "atend" comparison, include NUL byte in the compare */
     195      245759 :                                 if ((!r->search || r->atend) &&
     196      192007 :                                         strncmp(s, r->k, r->len + r->atend) != 0) {
     197             :                                         /* no match */
     198      189347 :                                         if (!r->search)
     199             :                                                 return false;
     200             :                                         /* try again with next character: have search start
     201             :                                          * after current first byte */
     202           0 :                                         if ((s = strchr(s + 1, r->k[0])) == NULL)
     203             :                                                 return false;
     204           0 :                                         continue;
     205             :                                 }
     206             :                                 /* match */
     207       56412 :                                 s += r->len;
     208       56412 :                                 break;
     209             :                         }
     210             :                 }
     211             :         }
     212             :         return true;
     213             : }
     214             : 
     215             : static void
     216        7743 : mnre_destroy(struct RE *p)
     217             : {
     218        7743 :         if (p) {
     219        7743 :                 GDKfree(p->k);
     220        8745 :                 do {
     221        8745 :                         struct RE *n = p->n;
     222             : 
     223        8745 :                         GDKfree(p);
     224        8743 :                         p = n;
     225        8743 :                 } while (p);
     226             :         }
     227        7757 : }
     228             : 
     229             : /* Create a linked list of RE structures.  Depending on the
     230             :  * caseignore and the ascii_pattern flags, the w
     231             :  * (if caseignore == true && ascii_pattern == false) or the k
     232             :  * (in every other case) field is used.  These in the first
     233             :  * structure are allocated, whereas in all subsequent
     234             :  * structures the fields point into the allocated buffer of
     235             :  * the first.
     236             :  */
     237             : static struct RE *
     238        7756 : mnre_create(const char *pat, bool caseignore, uint32_t esc)
     239             : {
     240        7756 :         struct RE *r = GDKmalloc(sizeof(struct RE)), *n = r;
     241        7760 :         bool escaped = false;
     242        7760 :         char *p, *q;
     243             : 
     244        7760 :         if (r == NULL)
     245             :                 return NULL;
     246        7760 :         *r = (struct RE) {
     247             :                 .atend = true,
     248             :                 .case_ignore = caseignore,
     249             :         };
     250             : 
     251       14479 :         for (;;) {
     252       14479 :                 if (esc != '%' && *pat == '%') {
     253        6468 :                         pat++;                                  /* skip % */
     254        6468 :                         r->search = true;
     255        8011 :                 } else if (esc != '_' && *pat == '_') {
     256         251 :                         pat++;
     257         251 :                         r->skip++;
     258             :                 } else {
     259             :                         break;
     260             :                 }
     261             :         }
     262        7760 :         if ((p = GDKstrdup(pat)) == NULL) {
     263           0 :                 GDKfree(r);
     264           0 :                 return NULL;
     265             :         }
     266             : 
     267        7752 :         r->k = p;
     268        7752 :         q = p;
     269       51895 :         while (*p) {
     270       44156 :                 if (escaped) {
     271         173 :                         *q++ = *p;
     272         173 :                         n->len++;
     273         173 :                         n->ulen += (*p & 0xC0) != 0x80;
     274         173 :                         escaped = false;
     275       43983 :                 } else if ((unsigned char) *p == esc) {
     276             :                         escaped = true;
     277       43810 :                 } else if (*p == '%' || *p == '_') {
     278        7661 :                         n->atend = false;
     279        7661 :                         bool search = false;
     280        7661 :                         size_t skip = 0;
     281       23135 :                         for (;;) {
     282       15398 :                                 if (*p == '_')
     283         775 :                                         skip++;
     284       14623 :                                 else if (*p == '%')
     285             :                                         search = true;
     286             :                                 else
     287             :                                         break;
     288        7737 :                                 p++;
     289             :                         }
     290        7661 :                         if (*p || skip != 0) {
     291         998 :                                 n = n->n = GDKmalloc(sizeof(struct RE));
     292         985 :                                 if (n == NULL)
     293           0 :                                         goto bailout;
     294         985 :                                 *n = (struct RE) {
     295             :                                         .search = search,
     296             :                                         .atend = true,
     297             :                                         .skip = skip,
     298             :                                         .k = p,
     299             :                                         .case_ignore = caseignore,
     300             :                                 };
     301             :                         }
     302        7648 :                         *q = 0;
     303        7648 :                         q = p;
     304        7648 :                         continue;                       /* skip increment, we already did it */
     305             :                 } else {
     306       36149 :                         *q++ = *p;
     307       36149 :                         n->len++;
     308       36149 :                         n->ulen += (*p & 0xC0) != 0x80;
     309             :                 }
     310       36495 :                 p++;
     311             :         }
     312        7739 :         *q = 0;
     313        7739 :         return r;
     314           0 :   bailout:
     315           0 :         mnre_destroy(r);
     316           0 :         return NULL;
     317             : }
     318             : 
     319             : #ifdef HAVE_LIBPCRE
     320             : static str
     321          25 : pcre_compile_wrap(pcre **res, const char *pattern, bit insensitive)
     322             : {
     323          25 :         pcre *r;
     324          25 :         const char *err_p = NULL;
     325          25 :         int errpos = 0;
     326          25 :         int options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK | PCRE_MULTILINE;
     327          25 :         if (insensitive)
     328           0 :                 options |= PCRE_CASELESS;
     329             : 
     330          25 :         if ((r = pcre_compile(pattern, options, &err_p, &errpos, NULL)) == NULL) {
     331           0 :                 throw(MAL, "pcre.compile", OPERATION_FAILED
     332             :                           " with\n'%s'\nat %d in\n'%s'.\n", err_p, errpos, pattern);
     333             :         }
     334          25 :         *res = r;
     335          25 :         return MAL_SUCCEED;
     336             : }
     337             : #endif
     338             : 
     339             : /* maximum number of back references and quoted \ or $ in replacement string */
     340             : #define MAX_NR_REFS             20
     341             : 
     342             : struct backref {
     343             :         int idx;
     344             :         int start;
     345             :         int end;
     346             : };
     347             : 
     348             : #ifdef HAVE_LIBPCRE
     349             : /* fill in parameter backrefs (length maxrefs) with information about
     350             :  * back references in the replacement string; a back reference is a
     351             :  * dollar or backslash followed by a number */
     352             : static int
     353         138 : parse_replacement(const char *replacement, int len_replacement,
     354             :                                   struct backref *backrefs, int maxrefs)
     355             : {
     356         138 :         int nbackrefs = 0;
     357             : 
     358         207 :         for (int i = 0; i < len_replacement && nbackrefs < maxrefs; i++) {
     359          70 :                 if (replacement[i] == '$' || replacement[i] == '\\') {
     360          11 :                         char *endptr;
     361          11 :                         backrefs[nbackrefs].idx = strtol(replacement + i + 1, &endptr, 10);
     362          10 :                         if (endptr > replacement + i + 1) {
     363          10 :                                 int k = (int) (endptr - (replacement + i + 1));
     364          10 :                                 backrefs[nbackrefs].start = i;
     365          10 :                                 backrefs[nbackrefs].end = i + k + 1;
     366          10 :                                 nbackrefs++;
     367           0 :                         } else if (replacement[i] == replacement[i + 1]) {
     368             :                                 /* doubled $ or \, we must copy just one to the output */
     369           0 :                                 backrefs[nbackrefs].idx = INT_MAX;      /* impossible value > 0 */
     370           0 :                                 backrefs[nbackrefs].start = i;
     371           0 :                                 backrefs[nbackrefs].end = i + 1;
     372           0 :                                 i++;                    /* don't look at second $ or \ again */
     373           0 :                                 nbackrefs++;
     374             :                         }
     375             :                         /* else: $ or \ followed by something we don't recognize,
     376             :                          * so just leave it */
     377             :                 }
     378             :         }
     379         137 :         return nbackrefs;
     380             : }
     381             : 
     382             : static char *
     383       51074 : single_replace(pcre *pcre_code, pcre_extra *extra,
     384             :                            const char *origin_str, int len_origin_str,
     385             :                            int exec_options, int *ovector, int ovecsize,
     386             :                            const char *replacement, int len_replacement,
     387             :                            struct backref *backrefs, int nbackrefs,
     388             :                            bool global, char *result, int *max_result)
     389             : {
     390       51074 :         int offset = 0;
     391       51074 :         int len_result = 0;
     392       51074 :         int addlen;
     393       51074 :         int empty_match_correction = 0;
     394      190911 :         char *tmp;
     395             : 
     396      190911 :         do {
     397      190911 :                 int j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, offset,
     398             :                                                   exec_options, ovector, ovecsize);
     399      191030 :                 if (j <= 0)
     400             :                         break;
     401             : 
     402      143505 :                 empty_match_correction = ovector[0] == ovector[1] ? 1 : 0;
     403             : 
     404             :                 // calculate the length of the string that will be appended to result
     405      287010 :                 addlen = ovector[0] - offset
     406      143505 :                                 + (nbackrefs == 0 ? len_replacement : 0) + empty_match_correction;
     407      143505 :                 if (len_result + addlen >= *max_result) {
     408       12149 :                         tmp = GDKrealloc(result, len_result + addlen + 1);
     409       12149 :                         if (tmp == NULL) {
     410           0 :                                 GDKfree(result);
     411           0 :                                 return NULL;
     412             :                         }
     413       12149 :                         result = tmp;
     414       12149 :                         *max_result = len_result + addlen + 1;
     415             :                 }
     416             :                 // append to the result the parts of the original string that are left unchanged
     417      143505 :                 if (ovector[0] > offset) {
     418      139268 :                         strncpy(result + len_result, origin_str + offset,
     419      139268 :                                         ovector[0] - offset);
     420      139268 :                         len_result += ovector[0] - offset;
     421             :                 }
     422             :                 // append to the result the replacement of the matched string
     423      143505 :                 if (nbackrefs == 0) {
     424      139843 :                         strncpy(result + len_result, replacement, len_replacement);
     425      139843 :                         len_result += len_replacement;
     426             :                 } else {
     427             :                         int prevend = 0;
     428        7324 :                         for (int i = 0; i < nbackrefs; i++) {
     429        3662 :                                 int off, len;
     430        3662 :                                 if (backrefs[i].idx >= ovecsize / 3) {
     431             :                                         /* out of bounds, replace with empty string */
     432             :                                         off = 0;
     433             :                                         len = 0;
     434             :                                 } else {
     435        3662 :                                         off = ovector[backrefs[i].idx * 2];
     436        3662 :                                         len = ovector[backrefs[i].idx * 2 + 1] - off;
     437             :                                 }
     438        3662 :                                 addlen = backrefs[i].start - prevend + len;
     439        3662 :                                 if (len_result + addlen >= *max_result) {
     440          37 :                                         tmp = GDKrealloc(result, len_result + addlen + 1);
     441          37 :                                         if (tmp == NULL) {
     442           0 :                                                 GDKfree(result);
     443           0 :                                                 return NULL;
     444             :                                         }
     445          37 :                                         result = tmp;
     446          37 :                                         *max_result = len_result + addlen + 1;
     447             :                                 }
     448        3662 :                                 if (backrefs[i].start > prevend) {
     449           2 :                                         strncpy(result + len_result, replacement + prevend,
     450           2 :                                                         backrefs[i].start - prevend);
     451           2 :                                         len_result += backrefs[i].start - prevend;
     452             :                                 }
     453        3662 :                                 if (len > 0) {
     454        3662 :                                         strncpy(result + len_result, origin_str + off, len);
     455        3662 :                                         len_result += len;
     456             :                                 }
     457        3662 :                                 prevend = backrefs[i].end;
     458             :                         }
     459             :                         /* copy rest of replacement string (after last backref) */
     460        3662 :                         addlen = len_replacement - prevend;
     461        3662 :                         if (addlen > 0) {
     462           2 :                                 if (len_result + addlen >= *max_result) {
     463           1 :                                         tmp = GDKrealloc(result, len_result + addlen + 1);
     464           1 :                                         if (tmp == NULL) {
     465           0 :                                                 GDKfree(result);
     466           0 :                                                 return NULL;
     467             :                                         }
     468           1 :                                         result = tmp;
     469           1 :                                         *max_result = len_result + addlen + 1;
     470             :                                 }
     471           2 :                                 strncpy(result + len_result, replacement + prevend, addlen);
     472           2 :                                 len_result += addlen;
     473             :                         }
     474             :                 }
     475             :                 // In case of an empty match just advance the offset by 1
     476      143505 :                 offset = ovector[1] + empty_match_correction;
     477             :                 // and copy the character that we just advanced over
     478      143505 :                 if (empty_match_correction) {
     479          14 :                         strncpy(result + len_result, origin_str + ovector[1], 1);
     480          14 :                         ++len_result;
     481             :                 }
     482             :                 // before we loop around check with the offset - 1 if we had an empty match
     483             :                 // since we manually advanced the offset by one. otherwise we gonna skip a
     484             :                 // replacement at the end of the string
     485      143505 :         } while ((offset - empty_match_correction) < len_origin_str && global);
     486             : 
     487       51193 :         if (offset < len_origin_str) {
     488       47400 :                 addlen = len_origin_str - offset;
     489       47400 :                 if (len_result + addlen >= *max_result) {
     490         690 :                         tmp = GDKrealloc(result, len_result + addlen + 1);
     491         690 :                         if (tmp == NULL) {
     492           0 :                                 GDKfree(result);
     493           0 :                                 return NULL;
     494             :                         }
     495         690 :                         result = tmp;
     496         690 :                         *max_result = len_result + addlen + 1;
     497             :                 }
     498       47400 :                 strncpy(result + len_result, origin_str + offset, addlen);
     499       47400 :                 len_result += addlen;
     500             :         }
     501             :         /* null terminate string */
     502       51193 :         result[len_result] = '\0';
     503       51193 :         return result;
     504             : }
     505             : #endif
     506             : 
     507             : static str
     508          14 : pcre_replace(str *res, const char *origin_str, const char *pattern,
     509             :                          const char *replacement, const char *flags, bool global)
     510             : {
     511             : #ifdef HAVE_LIBPCRE
     512          14 :         const char *err_p = NULL;
     513          14 :         pcre *pcre_code = NULL;
     514          14 :         pcre_extra *extra;
     515          14 :         char *tmpres;
     516          14 :         int max_result;
     517          14 :         int i, errpos = 0;
     518          14 :         int compile_options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
     519          14 :         int exec_options = PCRE_NOTEMPTY | PCRE_NO_UTF8_CHECK;
     520          14 :         int *ovector, ovecsize;
     521          14 :         int len_origin_str = (int) strlen(origin_str);
     522          14 :         int len_replacement = (int) strlen(replacement);
     523          14 :         struct backref backrefs[MAX_NR_REFS];
     524          14 :         int nbackrefs = 0;
     525             : 
     526          21 :         while (*flags) {
     527           7 :                 switch (*flags) {
     528             :                 case 'e':
     529             :                         exec_options &= ~PCRE_NOTEMPTY;
     530             :                         break;
     531           1 :                 case 'i':
     532           1 :                         compile_options |= PCRE_CASELESS;
     533           1 :                         break;
     534           1 :                 case 'm':
     535           1 :                         compile_options |= PCRE_MULTILINE;
     536           1 :                         break;
     537           1 :                 case 's':
     538           1 :                         compile_options |= PCRE_DOTALL;
     539           1 :                         break;
     540           1 :                 case 'x':
     541           1 :                         compile_options |= PCRE_EXTENDED;
     542           1 :                         break;
     543           0 :                 default:
     544           0 :                         throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     545             :                                   ILLEGAL_ARGUMENT ": unsupported flag character '%c'\n",
     546             :                                   *flags);
     547             :                 }
     548           7 :                 flags++;
     549             :         }
     550             : 
     551          14 :         if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) {
     552           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     553             :                           OPERATION_FAILED
     554             :                           ": pcre compile of pattern (%s) failed at %d with\n'%s'.\n",
     555             :                           pattern, errpos, err_p);
     556             :         }
     557             : 
     558             :         /* Since the compiled pattern is going to be used several times, it is
     559             :          * worth spending more time analyzing it in order to speed up the time
     560             :          * taken for matching.
     561             :          */
     562          14 :         extra = pcre_study(pcre_code, 0, &err_p);
     563          14 :         if (err_p != NULL) {
     564           0 :                 pcre_free(pcre_code);
     565           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     566             :                           OPERATION_FAILED
     567             :                           ": pcre study of pattern (%s) failed with '%s'.\n", pattern,
     568             :                           err_p);
     569             :         }
     570          14 :         pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i);
     571          14 :         ovecsize = (i + 1) * 3;
     572          14 :         if ((ovector = (int *) GDKmalloc(sizeof(int) * ovecsize)) == NULL) {
     573           0 :                 pcre_free_study(extra);
     574           0 :                 pcre_free(pcre_code);
     575           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     576             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     577             :         }
     578             : 
     579             :         /* identify back references in the replacement string */
     580          14 :         nbackrefs = parse_replacement(replacement, len_replacement,
     581             :                                                                   backrefs, MAX_NR_REFS);
     582             : 
     583          14 :         max_result = len_origin_str + 1;
     584          14 :         tmpres = GDKmalloc(max_result);
     585          14 :         if (tmpres == NULL) {
     586           0 :                 GDKfree(ovector);
     587           0 :                 pcre_free_study(extra);
     588           0 :                 pcre_free(pcre_code);
     589           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     590             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     591             :         }
     592             : 
     593          14 :         tmpres = single_replace(pcre_code, extra, origin_str, len_origin_str,
     594             :                                                         exec_options, ovector, ovecsize, replacement,
     595             :                                                         len_replacement, backrefs, nbackrefs, global,
     596             :                                                         tmpres, &max_result);
     597          14 :         GDKfree(ovector);
     598          14 :         pcre_free_study(extra);
     599          14 :         pcre_free(pcre_code);
     600          14 :         if (tmpres == NULL)
     601           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     602             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     603             : 
     604          14 :         *res = tmpres;
     605          14 :         return MAL_SUCCEED;
     606             : #else
     607             :         (void) res;
     608             :         (void) origin_str;
     609             :         (void) pattern;
     610             :         (void) replacement;
     611             :         (void) flags;
     612             :         (void) global;
     613             :         throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     614             :                   "Database was compiled without PCRE support.");
     615             : #endif
     616             : }
     617             : 
     618             : static str
     619         125 : pcre_replace_bat(BAT **res, BAT *origin_strs, const char *pattern,
     620             :                                  const char *replacement, const char *flags, bool global)
     621             : {
     622             : #ifdef HAVE_LIBPCRE
     623         125 :         const char *err_p = NULL;
     624         125 :         char *tmpres;
     625         125 :         int i, errpos = 0;
     626         125 :         int compile_options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
     627         125 :         int exec_options = PCRE_NOTEMPTY | PCRE_NO_UTF8_CHECK;
     628         125 :         pcre *pcre_code = NULL;
     629         125 :         pcre_extra *extra;
     630         125 :         BAT *tmpbat;
     631         125 :         BUN p, q;
     632         125 :         int *ovector, ovecsize;
     633         125 :         int len_replacement = (int) strlen(replacement);
     634         125 :         struct backref backrefs[MAX_NR_REFS];
     635         125 :         int nbackrefs = 0;
     636         125 :         const char *origin_str;
     637         125 :         int max_dest_size = 0;
     638             : 
     639         161 :         while (*flags) {
     640          36 :                 switch (*flags) {
     641             :                 case 'e':
     642             :                         exec_options &= ~PCRE_NOTEMPTY;
     643             :                         break;
     644           9 :                 case 'i':
     645           9 :                         compile_options |= PCRE_CASELESS;
     646           9 :                         break;
     647          18 :                 case 'm':
     648          18 :                         compile_options |= PCRE_MULTILINE;
     649          18 :                         break;
     650           9 :                 case 's':
     651           9 :                         compile_options |= PCRE_DOTALL;
     652           9 :                         break;
     653           0 :                 case 'x':
     654           0 :                         compile_options |= PCRE_EXTENDED;
     655           0 :                         break;
     656           0 :                 default:
     657           0 :                         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     658             :                                   ILLEGAL_ARGUMENT ": unsupported flag character '%c'\n",
     659             :                                   *flags);
     660             :                 }
     661          36 :                 flags++;
     662             :         }
     663             : 
     664         125 :         if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) {
     665           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     666             :                           OPERATION_FAILED
     667             :                           ": pcre compile of pattern (%s) failed at %d with\n'%s'.\n",
     668             :                           pattern, errpos, err_p);
     669             :         }
     670             : 
     671             :         /* Since the compiled pattern is going to be used several times,
     672             :          * it is worth spending more time analyzing it in order to speed
     673             :          * up the time taken for matching.
     674             :          */
     675         248 :         extra = pcre_study(pcre_code,
     676         123 :                                            BATcount(origin_strs) >
     677             :                                            JIT_COMPILE_MIN ? PCRE_STUDY_JIT_COMPILE : 0, &err_p);
     678         125 :         if (err_p != NULL) {
     679           0 :                 pcre_free(pcre_code);
     680           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     681             :                           OPERATION_FAILED);
     682             :         }
     683         125 :         pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i);
     684         124 :         ovecsize = (i + 1) * 3;
     685         124 :         if ((ovector = (int *) GDKzalloc(sizeof(int) * ovecsize)) == NULL) {
     686           0 :                 pcre_free_study(extra);
     687           0 :                 pcre_free(pcre_code);
     688           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     689             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     690             :         }
     691             : 
     692             :         /* identify back references in the replacement string */
     693         126 :         nbackrefs = parse_replacement(replacement, len_replacement,
     694             :                                                                   backrefs, MAX_NR_REFS);
     695             : 
     696         123 :         tmpbat = COLnew(origin_strs->hseqbase, TYPE_str, BATcount(origin_strs),
     697             :                                         TRANSIENT);
     698             : 
     699             :         /* the buffer for all destination strings is allocated only once,
     700             :          * and extended when needed */
     701         126 :         max_dest_size = len_replacement + 1;
     702         126 :         tmpres = GDKmalloc(max_dest_size);
     703         126 :         if (tmpbat == NULL || tmpres == NULL) {
     704           0 :                 pcre_free_study(extra);
     705           0 :                 pcre_free(pcre_code);
     706           0 :                 GDKfree(ovector);
     707           0 :                 BBPreclaim(tmpbat);
     708           0 :                 GDKfree(tmpres);
     709           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     710             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     711             :         }
     712         126 :         BATiter origin_strsi = bat_iterator(origin_strs);
     713       51342 :         BATloop(origin_strs, p, q) {
     714       51216 :                 origin_str = BUNtvar(origin_strsi, p);
     715      102262 :                 tmpres = single_replace(pcre_code, extra, origin_str,
     716       51216 :                                                                 (int) strlen(origin_str), exec_options,
     717             :                                                                 ovector, ovecsize, replacement,
     718             :                                                                 len_replacement, backrefs, nbackrefs, global,
     719             :                                                                 tmpres, &max_dest_size);
     720       51046 :                 if (tmpres == NULL || BUNappend(tmpbat, tmpres, false) != GDK_SUCCEED) {
     721           0 :                         bat_iterator_end(&origin_strsi);
     722           0 :                         pcre_free_study(extra);
     723           0 :                         pcre_free(pcre_code);
     724           0 :                         GDKfree(ovector);
     725           0 :                         GDKfree(tmpres);
     726           0 :                         BBPreclaim(tmpbat);
     727           0 :                         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     728             :                                   SQLSTATE(HY013) MAL_MALLOC_FAIL);
     729             :                 }
     730             :         }
     731         126 :         bat_iterator_end(&origin_strsi);
     732         126 :         pcre_free_study(extra);
     733         126 :         pcre_free(pcre_code);
     734         126 :         GDKfree(ovector);
     735         126 :         GDKfree(tmpres);
     736         126 :         *res = tmpbat;
     737         126 :         return MAL_SUCCEED;
     738             : #else
     739             :         (void) res;
     740             :         (void) origin_strs;
     741             :         (void) pattern;
     742             :         (void) replacement;
     743             :         (void) flags;
     744             :         (void) global;
     745             :         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     746             :                   "Database was compiled without PCRE support.");
     747             : #endif
     748             : }
     749             : 
     750             : static str
     751           4 : pcre_match_with_flags(bit *ret, const char *val, const char *pat,
     752             :                                           const char *flags)
     753             : {
     754           4 :         int pos;
     755             : #ifdef HAVE_LIBPCRE
     756           4 :         const char *err_p = NULL;
     757           4 :         int errpos = 0;
     758           4 :         int options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK | PCRE_DOTALL;
     759           4 :         pcre *re;
     760             : #else
     761             :         int options = REG_NOSUB | REG_EXTENDED;
     762             :         regex_t re;
     763             :         int errcode;
     764             :         int retval;
     765             : #endif
     766             : 
     767           4 :         while (*flags) {
     768           0 :                 switch (*flags) {
     769           0 :                 case 'i':
     770             : #ifdef HAVE_LIBPCRE
     771           0 :                         options |= PCRE_CASELESS;
     772             : #else
     773             :                         options |= REG_ICASE;
     774             : #endif
     775           0 :                         break;
     776           0 :                 case 'm':
     777             : #ifdef HAVE_LIBPCRE
     778           0 :                         options |= PCRE_MULTILINE;
     779             : #else
     780             :                         options |= REG_NEWLINE;
     781             : #endif
     782           0 :                         break;
     783             : #ifdef HAVE_LIBPCRE
     784           0 :                 case 's':
     785           0 :                         options |= PCRE_DOTALL;
     786           0 :                         break;
     787             : #endif
     788           0 :                 case 'x':
     789             : #ifdef HAVE_LIBPCRE
     790           0 :                         options |= PCRE_EXTENDED;
     791             : #else
     792             :                         options |= REG_EXTENDED;
     793             : #endif
     794           0 :                         break;
     795           0 :                 default:
     796           0 :                         throw(MAL, "pcre.match", ILLEGAL_ARGUMENT
     797             :                                   ": unsupported flag character '%c'\n", *flags);
     798             :                 }
     799           0 :                 flags++;
     800             :         }
     801           4 :         if (strNil(val)) {
     802           0 :                 *ret = FALSE;
     803           0 :                 return MAL_SUCCEED;
     804             :         }
     805             : 
     806             : #ifdef HAVE_LIBPCRE
     807           4 :         if ((re = pcre_compile(pat, options, &err_p, &errpos, NULL)) == NULL)
     808             : #else
     809             :         if ((errcode = regcomp(&re, pat, options)) != 0)
     810             : #endif
     811             :         {
     812           0 :                 throw(MAL, "pcre.match", OPERATION_FAILED
     813             :                           ": compilation of regular expression (%s) failed "
     814             : #ifdef HAVE_LIBPCRE
     815             :                           "at %d with '%s'", pat, errpos, err_p
     816             : #else
     817             :                           , pat
     818             : #endif
     819             :                                 );
     820             :         }
     821             : #ifdef HAVE_LIBPCRE
     822           4 :         pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, PCRE_NO_UTF8_CHECK,
     823             :                                         NULL, 0);
     824           4 :         pcre_free(re);
     825             : #else
     826             :         retval = regexec(&re, val, (size_t) 0, NULL, 0);
     827             :         pos = retval == REG_NOMATCH ? -1 : (retval == REG_ENOSYS ? -2 : 0);
     828             :         regfree(&re);
     829             : #endif
     830           4 :         if (pos >= 0)
     831           3 :                 *ret = TRUE;
     832           1 :         else if (pos == -1)
     833           1 :                 *ret = FALSE;
     834             :         else
     835           0 :                 throw(MAL, "pcre.match", OPERATION_FAILED
     836             :                           ": matching of regular expression (%s) failed with %d", pat, pos);
     837             :         return MAL_SUCCEED;
     838             : }
     839             : 
     840             : #ifdef HAVE_LIBPCRE
     841             : /* special characters in PCRE that need to be escaped */
     842             : static const char pcre_specials[] = "$()*+.?[\\]^{|}";
     843             : #else
     844             : /* special characters in POSIX basic regular expressions that need to
     845             :  * be escaped */
     846             : static const char pcre_specials[] = "$()*+.?[\\^{|";
     847             : #endif
     848             : 
     849             : /* change SQL LIKE pattern into PCRE pattern */
     850             : static str
     851           6 : sql2pcre(str *r, const char *pat, const char *esc_str)
     852             : {
     853           6 :         int escaped = 0;
     854           6 :         int hasWildcard = 0;
     855           6 :         char *ppat;
     856          12 :         int esc = strNil(esc_str) ? 0 : esc_str[0];     /* should change to utf8_convert() */
     857           6 :         int specials;
     858           6 :         int c;
     859             : 
     860           6 :         if (strlen(esc_str) > 1)
     861           0 :                 throw(MAL, "pcre.sql2pcre",
     862             :                           SQLSTATE(22019) ILLEGAL_ARGUMENT
     863             :                           ": ESCAPE string must have length 1");
     864           6 :         if (pat == NULL)
     865           0 :                 throw(MAL, "pcre.sql2pcre",
     866             :                           SQLSTATE(22019) ILLEGAL_ARGUMENT
     867             :                           ": (I)LIKE pattern must not be NULL");
     868           6 :         ppat = GDKmalloc(strlen(pat) * 3 +
     869             :                                          3 /* 3 = "^'the translated regexp'$0" */ );
     870           6 :         if (ppat == NULL)
     871           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     872             : 
     873           6 :         *r = ppat;
     874             :         /* The escape character can be a char which is special in a PCRE
     875             :          * expression.  If the user used the "+" char as escape and has "++"
     876             :          * in their pattern, then replacing this with "+" is not correct and
     877             :          * should be "\+" instead. */
     878           6 :         specials = (esc && strchr(pcre_specials, esc) != NULL);
     879             : 
     880           6 :         *ppat++ = '^';
     881          17 :         while ((c = *pat++) != 0) {
     882          11 :                 if (c == esc) {
     883           2 :                         if (escaped) {
     884           1 :                                 if (specials) { /* change ++ into \+ */
     885           1 :                                         *ppat++ = esc;
     886             :                                 } else {                /* do not escape simple escape symbols */
     887           0 :                                         ppat[-1] = esc; /* overwrite backslash */
     888             :                                 }
     889             :                                 escaped = 0;
     890             :                         } else {
     891           1 :                                 *ppat++ = '\\';
     892           1 :                                 escaped = 1;
     893             :                         }
     894             :                         hasWildcard = 1;
     895           9 :                 } else if (strchr(pcre_specials, c) != NULL) {
     896             :                         /* escape PCRE special chars, avoid double backslash if the
     897             :                          * user uses an invalid escape sequence */
     898           2 :                         if (!escaped)
     899           2 :                                 *ppat++ = '\\';
     900           2 :                         *ppat++ = c;
     901           2 :                         hasWildcard = 1;
     902           2 :                         escaped = 0;
     903           7 :                 } else if (c == '%' && !escaped) {
     904           3 :                         *ppat++ = '.';
     905           3 :                         *ppat++ = '*';
     906           3 :                         *ppat++ = '?';
     907           3 :                         hasWildcard = 1;
     908             :                         /* collapse multiple %, but only if it isn't the escape */
     909           3 :                         if (esc != '%')
     910           3 :                                 while (*pat == '%')
     911           0 :                                         pat++;
     912           4 :                 } else if (c == '_' && !escaped) {
     913           3 :                         *ppat++ = '.';
     914           3 :                         hasWildcard = 1;
     915             :                 } else {
     916           1 :                         if (escaped) {
     917           0 :                                 ppat[-1] = c;   /* overwrite backslash of invalid escape */
     918             :                         } else {
     919           1 :                                 *ppat++ = c;
     920             :                         }
     921             :                         escaped = 0;
     922             :                 }
     923             :         }
     924             :         /* no wildcard or escape character at end of string */
     925           6 :         if (!hasWildcard || escaped) {
     926           1 :                 GDKfree(*r);
     927           1 :                 *r = NULL;
     928           1 :                 if (escaped)
     929           0 :                         throw(MAL, "pcre.sql2pcre",
     930             :                                   SQLSTATE(22019) ILLEGAL_ARGUMENT
     931             :                                   ": (I)LIKE pattern must not end with escape character");
     932           1 :                 *r = GDKstrdup(str_nil);
     933           1 :                 if (*r == NULL)
     934           0 :                         throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     935             :         } else {
     936           5 :                 *ppat++ = '$';
     937           5 :                 *ppat = 0;
     938             :         }
     939             :         return MAL_SUCCEED;
     940             : }
     941             : 
     942             : #ifdef HAVE_LIBPCRE
     943             : /* change SQL PATINDEX pattern into PCRE pattern */
     944             : static str
     945          25 : pat2pcre(str *r, const char *pat)
     946             : {
     947          25 :         size_t len = strlen(pat);
     948          25 :         char *ppat = GDKmalloc(len * 2 + 3 /* 3 = "^'the translated regexp'$0" */ );
     949          25 :         int start = 0;
     950             : 
     951          25 :         if (ppat == NULL)
     952           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     953          25 :         *r = ppat;
     954          77 :         while (*pat) {
     955          52 :                 int c = *pat++;
     956             : 
     957          52 :                 if (strchr(pcre_specials, c) != NULL) {
     958          17 :                         *ppat++ = '\\';
     959          17 :                         *ppat++ = c;
     960          35 :                 } else if (c == '%') {
     961           3 :                         if (start && *pat) {
     962           0 :                                 *ppat++ = '.';
     963           0 :                                 *ppat++ = '*';
     964             :                         }
     965           3 :                         start++;
     966          32 :                 } else if (c == '_') {
     967           0 :                         *ppat++ = '.';
     968             :                 } else {
     969          32 :                         *ppat++ = c;
     970             :                 }
     971             :         }
     972          25 :         *ppat = 0;
     973          25 :         return MAL_SUCCEED;
     974             : }
     975             : #endif
     976             : 
     977             : /*
     978             :  * @+ Wrapping
     979             :  */
     980             : 
     981             : static str
     982          14 : PCREreplace_wrap(str *res, const char *const *or, const char *const *pat,
     983             :                                  const char *const *repl, const char *const *flags)
     984             : {
     985          14 :         return pcre_replace(res, *or, *pat, *repl, *flags, true);
     986             : }
     987             : 
     988             : static str
     989           0 : PCREreplacefirst_wrap(str *res, const char *const *or, const char *const *pat,
     990             :                                           const char *const *repl, const char *const *flags)
     991             : {
     992           0 :         return pcre_replace(res, *or, *pat, *repl, *flags, false);
     993             : }
     994             : 
     995             : static str
     996         125 : PCREreplace_bat_wrap(bat *res, const bat *bid, const char *const *pat,
     997             :                                          const char *const *repl, const char *const *flags)
     998             : {
     999         125 :         BAT *b, *bn = NULL;
    1000         125 :         str msg;
    1001         125 :         if ((b = BATdescriptor(*bid)) == NULL)
    1002           0 :                 throw(MAL, "batpcre.replace", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1003             : 
    1004         126 :         msg = pcre_replace_bat(&bn, b, *pat, *repl, *flags, true);
    1005         126 :         if (msg == MAL_SUCCEED) {
    1006         126 :                 *res = bn->batCacheid;
    1007         126 :                 BBPkeepref(bn);
    1008             :         }
    1009         126 :         BBPunfix(b->batCacheid);
    1010         126 :         return msg;
    1011             : }
    1012             : 
    1013             : static str
    1014           0 : PCREreplacefirst_bat_wrap(bat *res, const bat *bid, const char *const *pat,
    1015             :                                                   const char *const *repl, const char *const *flags)
    1016             : {
    1017           0 :         BAT *b, *bn = NULL;
    1018           0 :         str msg;
    1019           0 :         if ((b = BATdescriptor(*bid)) == NULL)
    1020           0 :                 throw(MAL, "batpcre.replace_first", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1021             : 
    1022           0 :         msg = pcre_replace_bat(&bn, b, *pat, *repl, *flags, false);
    1023           0 :         if (msg == MAL_SUCCEED) {
    1024           0 :                 *res = bn->batCacheid;
    1025           0 :                 BBPkeepref(bn);
    1026             :         }
    1027           0 :         BBPunfix(b->batCacheid);
    1028           0 :         return msg;
    1029             : }
    1030             : 
    1031             : static str
    1032           4 : PCREmatch(bit *ret, const char *const *val, const char *const *pat)
    1033             : {
    1034           4 :         return pcre_match_with_flags(ret, *val, *pat, "");
    1035             : }
    1036             : 
    1037             : static str
    1038           0 : PCREimatch(bit *ret, const char *const *val, const char *const *pat)
    1039             : {
    1040           0 :         return pcre_match_with_flags(ret, *val, *pat, "i");
    1041             : }
    1042             : 
    1043             : static str
    1044          25 : PCREindex(int *res, const pcre *pattern, const char *const *s)
    1045             : {
    1046             : #ifdef HAVE_LIBPCRE
    1047          25 :         int v[3];
    1048             : 
    1049          25 :         v[0] = v[1] = *res = 0;
    1050          25 :         if (pcre_exec(pattern, NULL, *s, (int) strlen(*s), 0,
    1051             :                                   PCRE_NO_UTF8_CHECK, v, 3) >= 0) {
    1052          23 :                 *res = v[1];
    1053             :         }
    1054          25 :         return MAL_SUCCEED;
    1055             : #else
    1056             :         (void) res;
    1057             :         (void) pattern;
    1058             :         (void) s;
    1059             :         throw(MAL, "pcre.index", "Database was compiled without PCRE support.");
    1060             : #endif
    1061             : }
    1062             : 
    1063             : static str
    1064          27 : PCREpatindex(int *ret, const char *const *pat, const char *const *val)
    1065             : {
    1066             : #ifdef HAVE_LIBPCRE
    1067          27 :         pcre *re = NULL;
    1068          27 :         char *ppat = NULL, *msg;
    1069             : 
    1070          53 :         if (strNil(*pat) || strNil(*val)) {
    1071           2 :                 *ret = int_nil;
    1072           2 :                 return MAL_SUCCEED;
    1073             :         }
    1074             : 
    1075          25 :         if ((msg = pat2pcre(&ppat, *pat)) != MAL_SUCCEED)
    1076             :                 return msg;
    1077          25 :         if ((msg = pcre_compile_wrap(&re, ppat, FALSE)) != MAL_SUCCEED) {
    1078           0 :                 GDKfree(ppat);
    1079           0 :                 return msg;
    1080             :         }
    1081          25 :         GDKfree(ppat);
    1082          25 :         msg = PCREindex(ret, re, val);
    1083          25 :         pcre_free(re);
    1084          25 :         return msg;
    1085             : #else
    1086             :         (void) ret;
    1087             :         (void) pat;
    1088             :         (void) val;
    1089             :         throw(MAL, "pcre.patindex", "Database was compiled without PCRE support.");
    1090             : #endif
    1091             : }
    1092             : 
    1093             : static str
    1094           0 : PCREquote(str *ret, const char *const *val)
    1095             : {
    1096           0 :         char *p;
    1097           0 :         const char *s = *val;
    1098             : 
    1099           0 :         *ret = p = GDKmalloc(strlen(s) * 2 + 1);        /* certainly long enough */
    1100           0 :         if (p == NULL)
    1101           0 :                 throw(MAL, "pcre.quote", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1102             :         /* quote all non-alphanumeric ASCII characters (i.e. leave
    1103             :            non-ASCII and alphanumeric alone) */
    1104           0 :         while (*s) {
    1105           0 :                 if (!((*s & 0x80) != 0 ||
    1106           0 :                           ('a' <= *s && *s <= 'z') ||
    1107           0 :                           ('A' <= *s && *s <= 'Z') || isdigit((unsigned char) *s)))
    1108           0 :                         *p++ = '\\';
    1109           0 :                 *p++ = *s++;
    1110             :         }
    1111           0 :         *p = 0;
    1112           0 :         return MAL_SUCCEED;
    1113             : }
    1114             : 
    1115             : static str
    1116           6 : PCREsql2pcre(str *ret, const char *const *pat, const char *const *esc)
    1117             : {
    1118           6 :         return sql2pcre(ret, *pat, *esc);
    1119             : }
    1120             : 
    1121             : static inline str
    1122        9144 : choose_like_path(bool *use_re, bool *use_strcmp, bool *empty,
    1123             :                                  const char *pat, const char *esc)
    1124             : {
    1125        9144 :         str res = MAL_SUCCEED;
    1126        9144 :         *use_re = false;
    1127        9144 :         *use_strcmp = false;
    1128        9144 :         *empty = false;
    1129             : 
    1130             : 
    1131       17800 :         if (strNil(pat) || strNil(esc)) {
    1132         488 :                 *empty = true;
    1133             :         } else {
    1134        8656 :                 if (!mnre_is_pattern_properly_escaped(pat, (unsigned char) *esc))
    1135           5 :                         throw(MAL, "pcre.sql2pcre",
    1136             :                                   SQLSTATE(22019) ILLEGAL_ARGUMENT
    1137             :                                   ": (I)LIKE pattern must not end with escape character");
    1138        8596 :                 if (is_strcmpable(pat, esc)) {
    1139         905 :                         *use_re = true;
    1140         905 :                         *use_strcmp = true;
    1141             :                 } else {
    1142        7691 :                         *use_re = true;
    1143             :                 }
    1144             :         }
    1145             :         return res;
    1146             : }
    1147             : 
    1148             : static str
    1149         234 : PCRElike_imp(bit *ret, const char *const *s, const char *const *pat,
    1150             :                          const char *const *esc, const bit *isens)
    1151             : {
    1152         234 :         str res = MAL_SUCCEED;
    1153         234 :         bool use_re = false, use_strcmp = false, empty = false;
    1154         234 :         struct RE *re = NULL;
    1155             : 
    1156         234 :         if ((res = choose_like_path(&use_re, &use_strcmp, &empty,
    1157             :                                                                 *pat, *esc)) != MAL_SUCCEED)
    1158             :                 return res;
    1159             : 
    1160         459 :         MT_thread_setalgorithm(empty ? "pcrelike: trivially empty" : use_strcmp ?
    1161         225 :                                                    "pcrelike: pattern matching using strcmp" : use_re ?
    1162             :                                                    "pcrelike: pattern matching using RE" :
    1163             :                                                    "pcrelike: pattern matching using pcre");
    1164             : 
    1165         468 :         if (strNil(*s) || empty) {
    1166           0 :                 *ret = bit_nil;
    1167             :         } else {
    1168         234 :                 if (use_strcmp) {
    1169           9 :                         *ret = *isens ? GDKstrcasecmp(*s, *pat) == 0
    1170           7 :                                 : strcmp(*s, *pat) == 0;
    1171             :                 } else {
    1172         225 :                         if (!(re = mnre_create(*pat, *isens, (unsigned char) **esc)))
    1173           0 :                                 res = createException(MAL, "pcre.like4",
    1174             :                                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1175             :                         else
    1176         225 :                                 *ret = mnre_match(*s, re);
    1177             :                 }
    1178             :         }
    1179             : 
    1180         234 :         if (re)
    1181         225 :                 mnre_destroy(re);
    1182             :         return res;
    1183             : }
    1184             : 
    1185             : static str
    1186         234 : PCRElike(bit *ret, const char *const *s, const char *const *pat,
    1187             :                  const char *const *esc, const bit *isens)
    1188             : {
    1189         229 :         return PCRElike_imp(ret, s, pat, esc, isens);
    1190             : }
    1191             : 
    1192             : static str
    1193           5 : PCREnotlike(bit *ret, const char *const *s, const char *const *pat,
    1194             :                         const char *const *esc, const bit *isens)
    1195             : {
    1196           5 :         str tmp;
    1197           5 :         bit r;
    1198             : 
    1199           5 :         rethrow("str.not_like", tmp, PCRElike(&r, s, pat, esc, isens));
    1200           5 :         *ret = r == bit_nil ? bit_nil : !r;
    1201           5 :         return MAL_SUCCEED;
    1202             : }
    1203             : 
    1204             : static inline str
    1205        8429 : mnre_like_build(struct RE **re, const char *pat, bool caseignore,
    1206             :                           bool use_strcmp, uint32_t esc)
    1207             : {
    1208        8429 :         if (!use_strcmp) {
    1209        7533 :                 if (!(*re = mnre_create(pat, caseignore, esc)))
    1210           0 :                         return createException(MAL, "pcre.re_like_build",
    1211             :                                                                    SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1212             :         }
    1213             :         return MAL_SUCCEED;
    1214             : }
    1215             : 
    1216             : static inline bit
    1217        6176 : mnre_like_proj_apply(const char *s, const struct RE *restrict re,
    1218             :                                    const char *pat,
    1219             :                                    bool caseignore, bool anti, bool use_strcmp)
    1220             : {
    1221        6176 :         if (strNil(s))
    1222         408 :                 return bit_nil;
    1223        5768 :         if (use_strcmp) {
    1224        1097 :                 if (caseignore) {
    1225         489 :                         if (anti)
    1226         458 :                                 return GDKstrcasecmp(s, pat) != 0;
    1227             :                         else
    1228          31 :                                 return GDKstrcasecmp(s, pat) == 0;
    1229             :                 } else {
    1230         608 :                         if (anti)
    1231         301 :                                 return strcmp(s, pat) != 0;
    1232             :                         else
    1233         307 :                                 return strcmp(s, pat) == 0;
    1234             :                 }
    1235             :         } else {
    1236        4671 :                 if (anti)
    1237         136 :                         return !mnre_match(s, re);
    1238             :                 else
    1239        4535 :                         return mnre_match(s, re);
    1240             :         }
    1241             : }
    1242             : 
    1243             : static inline void
    1244        8559 : mnre_like_clean(struct RE **re)
    1245             : {
    1246        8559 :         if (*re) {
    1247         928 :                 mnre_destroy(*re);
    1248        7528 :                 *re = NULL;
    1249             :         }
    1250             : }
    1251             : 
    1252             : static str
    1253        1079 : BATPCRElike_imp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
    1254             :                                 const char *const *esc, const bit *isens, const bit *not)
    1255             : {
    1256        1079 :         str msg = MAL_SUCCEED;
    1257        1079 :         BAT *b = NULL, *pbn = NULL, *bn = NULL;
    1258        1079 :         const char *input = NULL;
    1259        1079 :         bool use_re = false,
    1260        1079 :                 use_strcmp = false,
    1261        1079 :                 empty = false,
    1262        1079 :                 isensitive = (bool) *isens,
    1263        1079 :                 anti = (bool) *not,
    1264        1079 :                 has_nil = false,
    1265        1079 :                 input_is_a_bat = isaBatType(getArgType(mb, pci, 1)),
    1266        1079 :                 pattern_is_a_bat = isaBatType(getArgType(mb, pci, 2));
    1267        1079 :         bat *r = getArgReference_bat(stk, pci, 0);
    1268        1079 :         BUN q = 0;
    1269        1079 :         bit *restrict ret = NULL;
    1270        1079 :         struct RE *mnre_simple = NULL;
    1271        1079 :         BATiter bi = (BATiter) { 0 }, pi;
    1272             : 
    1273        1079 :         (void) cntxt;
    1274        1079 :         if (input_is_a_bat) {
    1275        1080 :                 bat *bid = getArgReference_bat(stk, pci, 1);
    1276        1080 :                 if (!(b = BATdescriptor(*bid))) {
    1277           0 :                         msg = createException(MAL, "batalgebra.batpcrelike3",
    1278             :                                                                   SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1279           0 :                         goto bailout;
    1280             :                 }
    1281             :         }
    1282        1085 :         if (pattern_is_a_bat) {
    1283         112 :                 bat *pb = getArgReference_bat(stk, pci, 2);
    1284         112 :                 if (!(pbn = BATdescriptor(*pb))) {
    1285           0 :                         msg = createException(MAL, "batalgebra.batpcrelike3",
    1286             :                                                                   SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1287           0 :                         goto bailout;
    1288             :                 }
    1289             :         }
    1290        1085 :         assert((!b || ATOMstorage(b->ttype) == TYPE_str)
    1291             :                    && (!pbn || ATOMstorage(pbn->ttype) == TYPE_str));
    1292             : 
    1293        1085 :         q = BATcount(b ? b : pbn);
    1294        1085 :         if (!(bn = COLnew(b ? b->hseqbase : pbn->hseqbase, TYPE_bit, q, TRANSIENT))) {
    1295           0 :                 msg = createException(MAL, "batalgebra.batpcrelike3",
    1296             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1297           0 :                 goto bailout;
    1298             :         }
    1299        1084 :         ret = (bit *) Tloc(bn, 0);
    1300             : 
    1301        1084 :         if (pattern_is_a_bat) {
    1302         112 :                 pi = bat_iterator(pbn);
    1303         112 :                 if (b)
    1304         112 :                         bi = bat_iterator(b);
    1305             :                 else
    1306           0 :                         input = *getArgReference_str(stk, pci, 1);
    1307             : 
    1308        1147 :                 for (BUN p = 0; p < q; p++) {
    1309        1036 :                         const char *next_input = b ? BUNtvar(bi, p) : input,
    1310        1036 :                                 *np = BUNtvar(pi, p);
    1311             : 
    1312        1036 :                         if ((msg = choose_like_path(&use_re, &use_strcmp, &empty,
    1313             :                                                                                 np, *esc)) != MAL_SUCCEED) {
    1314           0 :                                 bat_iterator_end(&pi);
    1315           0 :                                 if (b)
    1316           0 :                                         bat_iterator_end(&bi);
    1317           0 :                                 goto bailout;
    1318             :                         }
    1319             : 
    1320        1071 :                         if (empty) {
    1321         459 :                                 ret[p] = bit_nil;
    1322             :                         } else {
    1323         612 :                                 if ((msg = mnre_like_build(&mnre_simple, np, isensitive,
    1324             :                                                                                  use_strcmp,
    1325         612 :                                                                                  (unsigned char) **esc)) != MAL_SUCCEED) {
    1326           0 :                                         bat_iterator_end(&pi);
    1327           0 :                                         if (b)
    1328           0 :                                                 bat_iterator_end(&bi);
    1329           0 :                                         goto bailout;
    1330             :                                 }
    1331         612 :                                 ret[p] = mnre_like_proj_apply(next_input, mnre_simple, np,
    1332             :                                                                                         isensitive, anti, use_strcmp);
    1333         612 :                                 mnre_like_clean(&mnre_simple);
    1334             :                         }
    1335        1071 :                         has_nil |= is_bit_nil(ret[p]);
    1336             :                 }
    1337         111 :                 bat_iterator_end(&pi);
    1338         112 :                 if (b)
    1339         112 :                         bat_iterator_end(&bi);
    1340             :         } else {
    1341         972 :                 const char *pat = *getArgReference_str(stk, pci, 2);
    1342         972 :                 if ((msg = choose_like_path(&use_re, &use_strcmp, &empty,
    1343             :                                                                         pat, *esc)) != MAL_SUCCEED)
    1344           5 :                         goto bailout;
    1345             : 
    1346         967 :                 bi = bat_iterator(b);
    1347        1868 :                 MT_thread_setalgorithm(empty ? "pcrelike: trivially empty" : use_strcmp
    1348             :                                                            ? "pcrelike: pattern matching using strcmp" :
    1349         900 :                                                            use_re ? "pcrelike: pattern matching using RE" :
    1350             :                                                            "pcrelike: pattern matching using pcre");
    1351             : 
    1352         969 :                 if (empty) {
    1353          43 :                         for (BUN p = 0; p < q; p++)
    1354          26 :                                 ret[p] = bit_nil;
    1355             :                         has_nil = true;
    1356             :                 } else {
    1357         951 :                         if ((msg = mnre_like_build(&mnre_simple, pat, isensitive, use_strcmp,
    1358         952 :                                                                          (unsigned char) **esc)) != MAL_SUCCEED) {
    1359           0 :                                 bat_iterator_end(&bi);
    1360           0 :                                 goto bailout;
    1361             :                         }
    1362        6512 :                         for (BUN p = 0; p < q; p++) {
    1363        5561 :                                 const char *s = BUNtvar(bi, p);
    1364        5574 :                                 ret[p] = mnre_like_proj_apply(s, mnre_simple, pat, isensitive,
    1365             :                                                                                         anti, use_strcmp);
    1366        5560 :                                 has_nil |= is_bit_nil(ret[p]);
    1367             :                         }
    1368             :                 }
    1369         968 :                 bat_iterator_end(&bi);
    1370             :         }
    1371             : 
    1372        1086 :   bailout:
    1373        1086 :         mnre_like_clean(&mnre_simple);
    1374        1086 :         if (bn && !msg) {
    1375        1081 :                 BATsetcount(bn, q);
    1376        1081 :                 bn->tnil = has_nil;
    1377        1081 :                 bn->tnonil = !has_nil;
    1378        1081 :                 bn->tkey = BATcount(bn) <= 1;
    1379        1081 :                 bn->tsorted = BATcount(bn) <= 1;
    1380        1081 :                 bn->trevsorted = BATcount(bn) <= 1;
    1381        1081 :                 *r = bn->batCacheid;
    1382        1081 :                 BBPkeepref(bn);
    1383           5 :         } else if (bn)
    1384           5 :                 BBPreclaim(bn);
    1385        1085 :         BBPreclaim(b);
    1386        1084 :         BBPreclaim(pbn);
    1387        1083 :         return msg;
    1388             : }
    1389             : 
    1390             : static str
    1391         926 : BATPCRElike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1392             : {
    1393         926 :         const char *esc = *getArgReference_str(stk, pci, 3);
    1394         926 :         const bit *ci = getArgReference_bit(stk, pci, 4);
    1395         926 :         bit no = FALSE;
    1396             : 
    1397         926 :         return BATPCRElike_imp(cntxt, mb, stk, pci, &esc, ci, &no);
    1398             : }
    1399             : 
    1400             : static str
    1401         158 : BATPCREnotlike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1402             : {
    1403         158 :         const char *esc = *getArgReference_str(stk, pci, 3);
    1404         158 :         const bit *ci = getArgReference_bit(stk, pci, 4);
    1405         158 :         bit yes = TRUE;
    1406             : 
    1407         158 :         return BATPCRElike_imp(cntxt, mb, stk, pci, &esc, ci, &yes);
    1408             : }
    1409             : 
    1410             : /* scan select loop with or without candidates */
    1411             : #define pcrescanloop(TEST, KEEP_NULLS)                                                                  \
    1412             :         do {                                                                                                                            \
    1413             :                 TRC_DEBUG(ALGO,                                                                                                 \
    1414             :                                   "PCREselect(b=%s#"BUNFMT",anti=%d): "                                     \
    1415             :                                   "scanselect %s\n", BATgetId(b), BATcount(b),                        \
    1416             :                                   anti, #TEST);                                                                                 \
    1417             :                 if (!s || BATtdense(s)) {                                                                               \
    1418             :                         for (; p < q; p++) {                                                                         \
    1419             :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,                                             \
    1420             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    1421             :                                 const char *restrict v = BUNtvar(bi, p - off);                  \
    1422             :                                 if ((TEST) || ((KEEP_NULLS) && strNil(v)))                              \
    1423             :                                         vals[cnt++] = p;                                                                        \
    1424             :                         }                                                                                                                       \
    1425             :                 } else {                                                                                                                \
    1426             :                         for (; p < ncands; p++) {                                                                    \
    1427             :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,                                             \
    1428             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    1429             :                                 oid o = canditer_next(ci);                                                              \
    1430             :                                 const char *restrict v = BUNtvar(bi, o - off);                  \
    1431             :                                 if ((TEST) || ((KEEP_NULLS) && strNil(v)))                              \
    1432             :                                         vals[cnt++] = o;                                                                        \
    1433             :                         }                                                                                                                       \
    1434             :                 }                                                                                                                               \
    1435             :         } while (0)
    1436             : 
    1437             : static str
    1438        6723 : mnre_likeselect(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
    1439             :                           BUN *rcnt, const char *pat, bool caseignore, bool anti,
    1440             :                           bool use_strcmp, uint32_t esc, bool keep_nulls)
    1441             : {
    1442        6723 :         BATiter bi = bat_iterator(b);
    1443        6738 :         BUN cnt = 0, ncands = ci->ncand;
    1444        6738 :         oid off = b->hseqbase, *restrict vals = Tloc(bn, 0);
    1445        6738 :         struct RE *re = NULL;
    1446        6738 :         str msg = MAL_SUCCEED;
    1447             : 
    1448        6738 :         size_t counter = 0;
    1449        6738 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1450             : 
    1451        6738 :         if ((msg = mnre_like_build(&re, pat, caseignore, use_strcmp,
    1452             :                                                          esc)) != MAL_SUCCEED)
    1453           0 :                 goto bailout;
    1454             : 
    1455        6716 :         if (use_strcmp) {
    1456         133 :                 if (caseignore) {
    1457          47 :                         if (anti)
    1458          58 :                                 pcrescanloop(!strNil(v)
    1459             :                                                          && GDKstrcasecmp(v, pat) != 0, keep_nulls);
    1460             :                         else
    1461         710 :                                 pcrescanloop(!strNil(v)
    1462             :                                                          && GDKstrcasecmp(v, pat) == 0, keep_nulls);
    1463             :                 } else {
    1464          86 :                         if (anti)
    1465          62 :                                 pcrescanloop(!strNil(v) && strcmp(v, pat) != 0, keep_nulls);
    1466             :                         else
    1467        9331 :                                 pcrescanloop(!strNil(v) && strcmp(v, pat) == 0, keep_nulls);
    1468             :                 }
    1469             :         } else {
    1470        6583 :                 if (caseignore) {
    1471         143 :                         if (anti) {
    1472          44 :                                 pcrescanloop(!strNil(v)
    1473             :                                                          && !mnre_match(v, re), keep_nulls);
    1474             :                         } else {
    1475       11445 :                                 pcrescanloop(!strNil(v)
    1476             :                                                          && mnre_match(v, re), keep_nulls);
    1477             :                         }
    1478             :                 } else {
    1479        6440 :                         if (anti)
    1480       43440 :                                 pcrescanloop(!strNil(v)
    1481             :                                                          && !mnre_match(v, re), keep_nulls);
    1482             :                         else
    1483      154955 :                                 pcrescanloop(!strNil(v)
    1484             :                                                          && mnre_match(v, re), keep_nulls);
    1485             :                 }
    1486             :         }
    1487             : 
    1488          54 :   bailout:
    1489        6714 :         bat_iterator_end(&bi);
    1490        6733 :         mnre_like_clean(&re);
    1491        6739 :         *rcnt = cnt;
    1492        6739 :         return msg;
    1493             : }
    1494             : 
    1495             : static str
    1496        6724 : PCRElikeselect(bat *ret, const bat *bid, const bat *sid, const char *const *pat,
    1497             :                            const char *const *esc, const bit *caseignore, const bit *anti)
    1498             : {
    1499        6724 :         BAT *b, *s = NULL, *bn = NULL, *old_s = NULL;
    1500        6724 :         str msg = MAL_SUCCEED;
    1501        6724 :         bool use_re = false,
    1502        6724 :                 use_strcmp = false,
    1503        6724 :                 empty = false;
    1504        6724 :         bool with_strimps = false;
    1505        6724 :         bool with_strimps_anti = false;
    1506        6724 :         BUN p = 0, q = 0, rcnt = 0;
    1507        6724 :         struct canditer ci;
    1508             : 
    1509        6724 :         if ((b = BATdescriptor(*bid)) == NULL) {
    1510           0 :                 msg = createException(MAL, "algebra.likeselect",
    1511             :                                                           SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1512           0 :                 goto bailout;
    1513             :         }
    1514        6740 :         if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) {
    1515           0 :                 msg = createException(MAL, "algebra.likeselect",
    1516             :                                                           SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1517           0 :                 goto bailout;
    1518             :         }
    1519             : 
    1520        6739 :         assert(ATOMstorage(b->ttype) == TYPE_str);
    1521             : 
    1522        6739 :         if ((msg = choose_like_path(&use_re, &use_strcmp, &empty,
    1523             :                                                                 *pat, *esc)) != MAL_SUCCEED)
    1524           0 :                 goto bailout;
    1525             : 
    1526        6664 :         if (empty) {
    1527           0 :                 if (!(bn = BATdense(0, 0, 0)))
    1528           0 :                         msg = createException(MAL, "algebra.likeselect",
    1529             :                                                                   SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1530             : 
    1531           0 :                 goto bailout;
    1532             :         }
    1533             :         /* Since the strimp pre-filtering of a LIKE query produces a superset of the actual result the complement of that
    1534             :          * set will necessarily reject some of the matching entries in the NOT LIKE query.
    1535             :          *
    1536             :          * In this case we run the PCRElikeselect as a LIKE query with strimps and return the complement of the result,
    1537             :          * taking extra care to not return NULLs. This currently means that we do not run strimps for NOT LIKE queries if
    1538             :          * the BAT contains NULLs.
    1539             :          */
    1540        6664 :         if (BAThasstrimps(b)) {
    1541          48 :                 if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
    1542          48 :                         BAT *tmp_s = STRMPfilter(b, s, *pat, *anti);
    1543          48 :                         if (tmp_s) {
    1544          48 :                                 old_s = s;
    1545          48 :                                 s = tmp_s;
    1546          48 :                                 if (!*anti)
    1547             :                                         with_strimps = true;
    1548             :                                 else
    1549           0 :                                         with_strimps_anti = true;
    1550             :                         }
    1551             :                 } else {                                /* If we cannot filter with the strimp just continue normally */
    1552           0 :                         GDKclrerr();
    1553             :                 }
    1554             :         }
    1555             : 
    1556             : 
    1557        6739 :         MT_thread_setalgorithm(use_strcmp
    1558        6739 :                                                    ? (with_strimps ?
    1559             :                                                           "pcrelike: pattern matching using strcmp with strimps"
    1560             :                                                           : (with_strimps_anti ?
    1561             :                                                                  "pcrelike: pattern matching using strcmp with strimps anti"
    1562        6739 :                                                                  : "pcrelike: pattern matching using strcmp")) :
    1563        6606 :                                                    use_re ? (with_strimps ?
    1564             :                                                                          "pcrelike: pattern matching using RE with strimps"
    1565             :                                                                          : (with_strimps_anti ?
    1566             :                                                                                 "pcrelike: patterm matching using RE with strimps anti"
    1567             :                                                                                 :
    1568             :                                                                                 "pcrelike: pattern matching using RE"))
    1569             :                                                    : (with_strimps ?
    1570             :                                                           "pcrelike: pattern matching using pcre with strimps"
    1571             :                                                           : (with_strimps_anti ?
    1572             :                                                                  "pcrelike: pattermatching using pcre with strimps anti"
    1573             :                                                                  : "pcrelike: pattern matching using pcre")));
    1574             : 
    1575        6736 :         canditer_init(&ci, b, s);
    1576        6738 :         if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
    1577           0 :                 msg = createException(MAL, "algebra.likeselect",
    1578             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1579           0 :                 goto bailout;
    1580             :         }
    1581             : 
    1582        6732 :         if (!s || BATtdense(s)) {
    1583        1666 :                 if (s) {
    1584        5012 :                         assert(BATtdense(s));
    1585        5012 :                         p = (BUN) s->tseqbase;
    1586        5012 :                         q = p + BATcount(s);
    1587        5012 :                         if ((oid) p < b->hseqbase)
    1588             :                                 p = b->hseqbase;
    1589        5012 :                         if ((oid) q > b->hseqbase + BATcount(b))
    1590             :                                 q = b->hseqbase + BATcount(b);
    1591             :                 } else {
    1592        1666 :                         p = b->hseqbase;
    1593        1666 :                         q = BATcount(b) + b->hseqbase;
    1594             :                 }
    1595             :         }
    1596             : 
    1597        6732 :         msg = mnre_likeselect(bn, b, s, &ci, p, q, &rcnt, *pat, *caseignore, *anti
    1598        1878 :                                                 && !with_strimps_anti, use_strcmp,
    1599        6732 :                                                 (unsigned char) **esc, with_strimps_anti);
    1600             : 
    1601        6737 :         if (!msg) {                                     /* set some properties */
    1602        6739 :                 BATsetcount(bn, rcnt);
    1603        6726 :                 bn->tsorted = true;
    1604        6726 :                 bn->trevsorted = bn->batCount <= 1;
    1605        6726 :                 bn->tkey = true;
    1606        6726 :                 bn->tnil = false;
    1607        6726 :                 bn->tnonil = true;
    1608        6726 :                 bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const oid *) Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
    1609        6726 :                 if (with_strimps_anti) {
    1610             :                         /* Reverse the result taking into account the original candidate list. */
    1611             :                         // BAT *rev = BATdiffcand(BATdense(b->hseqbase, 0, b->batCount), bn);
    1612           0 :                         BAT *rev;
    1613           0 :                         if (old_s) {
    1614           0 :                                 rev = BATdiffcand(old_s, bn);
    1615             : #ifndef NDEBUG
    1616           0 :                                 BAT *is = BATintersectcand(old_s, bn);
    1617           0 :                                 if (is) {
    1618           0 :                                         assert(is->batCount == bn->batCount);
    1619           0 :                                         BBPreclaim(is);
    1620             :                                 }
    1621           0 :                                 assert(rev->batCount == old_s->batCount - bn->batCount);
    1622             : #endif
    1623             :                         }
    1624             : 
    1625             :                         else
    1626           0 :                                 rev = BATnegcands(0, b->batCount, bn);
    1627             :                         /* BAT *rev = BATnegcands(0, b->batCount, bn); */
    1628           0 :                         BBPunfix(bn->batCacheid);
    1629           0 :                         bn = rev;
    1630             :                 }
    1631             :         }
    1632             : 
    1633             : 
    1634        6724 :   bailout:
    1635        6724 :         BBPreclaim(b);
    1636        6737 :         BBPreclaim(s);
    1637        6733 :         BBPreclaim(old_s);
    1638        6734 :         if (bn && !msg) {
    1639        6734 :                 *ret = bn->batCacheid;
    1640        6734 :                 BBPkeepref(bn);
    1641           0 :         } else if (bn)
    1642           0 :                 BBPreclaim(bn);
    1643        6737 :         return msg;
    1644             : }
    1645             : 
    1646             : #define APPEND(b, o)    (((oid *) b->theap->base)[b->batCount++] = (o))
    1647             : #define VALUE(s, x)             (s##vars + VarHeapVal(s##vals, (x), s##i.width))
    1648             : 
    1649             : /* nested loop implementation for PCRE join */
    1650             : #define pcre_join_loop(STRCMP, MNRE_MATCH)                                                              \
    1651             :         do {                                                                                                                            \
    1652             :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                                       \
    1653             :                         ro = canditer_next(&rci);                                                                   \
    1654             :                         vr = VALUE(r, ro - rbase);                                                                      \
    1655             :                         nl = 0;                                                                                                         \
    1656             :                         use_re = use_strcmp = empty = false;                                            \
    1657             :                         if ((msg = choose_like_path(&use_re, &use_strcmp, &empty, vr, esc))) \
    1658             :                                 goto bailout;                                                                                   \
    1659             :                         if (!empty) {                                                                                           \
    1660             :                                 if ((msg = mnre_like_build(&re, vr, false, use_strcmp, (unsigned char) *esc)) != MAL_SUCCEED) \
    1661             :                                         goto bailout;                                                                           \
    1662             :                                 canditer_reset(&lci);                                                                       \
    1663             :                                 TIMEOUT_LOOP_IDX_DECL(lidx, lci.ncand, qry_ctx) {               \
    1664             :                                         lo = canditer_next(&lci);                                                   \
    1665             :                                         vl = VALUE(l, lo - lbase);                                                      \
    1666             :                                         if (strNil(vl)) {                                                                       \
    1667             :                                                 continue;                                                                               \
    1668             :                                         } else {                                                                                        \
    1669             :                                                 if (use_strcmp) {                                                               \
    1670             :                                                         if (STRCMP)                                                                     \
    1671             :                                                                 continue;                                                               \
    1672             :                                                 } else {                                                                                \
    1673             :                                                         assert(re);                                                                     \
    1674             :                                                         if (MNRE_MATCH)                                                         \
    1675             :                                                                 continue;                                                               \
    1676             :                                                 }                                                                                               \
    1677             :                                         }                                                                                                       \
    1678             :                                         if (BATcount(r1) == BATcapacity(r1)) {                          \
    1679             :                                                 newcap = BATgrows(r1);                                                  \
    1680             :                                                 BATsetcount(r1, BATcount(r1));                                  \
    1681             :                                                 if (r2)                                                                                 \
    1682             :                                                         BATsetcount(r2, BATcount(r2));                          \
    1683             :                                                 if (BATextend(r1, newcap) != GDK_SUCCEED || (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \
    1684             :                                                         msg = createException(MAL, "pcre.join", SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    1685             :                                                         goto bailout;                                                           \
    1686             :                                                 }                                                                                               \
    1687             :                                                 assert(!r2 || BATcapacity(r1) == BATcapacity(r2)); \
    1688             :                                         }                                                                                                       \
    1689             :                                         if (BATcount(r1) > 0) {                                                              \
    1690             :                                                 if (lastl + 1 != lo)                                                    \
    1691             :                                                         r1->tseqbase = oid_nil;                                              \
    1692             :                                                 if (nl == 0) {                                                                  \
    1693             :                                                         if (r2)                                                                         \
    1694             :                                                                 r2->trevsorted = false;                                      \
    1695             :                                                         if (lastl > lo) {                                                    \
    1696             :                                                                 r1->tsorted = false;                                 \
    1697             :                                                                 r1->tkey = false;                                            \
    1698             :                                                         } else if (lastl < lo) {                                     \
    1699             :                                                                 r1->trevsorted = false;                                      \
    1700             :                                                         } else {                                                                        \
    1701             :                                                                 r1->tkey = false;                                            \
    1702             :                                                         }                                                                                       \
    1703             :                                                 }                                                                                               \
    1704             :                                         }                                                                                                       \
    1705             :                                         APPEND(r1, lo);                                                                         \
    1706             :                                         if (r2)                                                                                         \
    1707             :                                                 APPEND(r2, ro);                                                                 \
    1708             :                                         lastl = lo;                                                                                     \
    1709             :                                         nl++;                                                                                           \
    1710             :                                 }                                                                                                               \
    1711             :                                 mnre_like_clean(&re);                                                                               \
    1712             :                                 TIMEOUT_CHECK(qry_ctx,                                                                  \
    1713             :                                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    1714             :                         }                                                                                                                       \
    1715             :                         if (r2) {                                                                                                       \
    1716             :                                 if (nl > 1) {                                                                                        \
    1717             :                                         r2->tkey = false;                                                                    \
    1718             :                                         r2->tseqbase = oid_nil;                                                              \
    1719             :                                         r1->trevsorted = false;                                                              \
    1720             :                                 } else if (nl == 0) {                                                                   \
    1721             :                                         rskipped = BATcount(r2) > 0;                                         \
    1722             :                                 } else if (rskipped) {                                                                  \
    1723             :                                         r2->tseqbase = oid_nil;                                                              \
    1724             :                                 }                                                                                                               \
    1725             :                         } else if (nl > 1) {                                                                         \
    1726             :                                 r1->trevsorted = false;                                                                      \
    1727             :                         }                                                                                                                       \
    1728             :                 }                                                                                                                               \
    1729             :         } while (0)
    1730             : 
    1731             : static char *
    1732          59 : pcrejoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, const char *esc,
    1733             :                  bit caseignore, bit anti)
    1734             : {
    1735          59 :         struct canditer lci, rci;
    1736          59 :         const char *lvals, *rvals, *lvars, *rvars, *vl, *vr;
    1737          59 :         int rskipped = 0;                       /* whether we skipped values in r */
    1738          59 :         oid lbase, rbase, lo, ro, lastl = 0;    /* last value inserted into r1 */
    1739          59 :         BUN nl, newcap;
    1740          59 :         char *msg = MAL_SUCCEED;
    1741          59 :         struct RE *re = NULL;
    1742          59 :         bool use_re = false,
    1743          59 :                 use_strcmp = false,
    1744          59 :                 empty = false;
    1745          59 :         lng t0 = 0;
    1746             : 
    1747          59 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1748             : 
    1749          59 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    1750             : 
    1751         177 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1752          59 :         assert(ATOMtype(l->ttype) == TYPE_str);
    1753             : 
    1754          59 :         BAT *ol = NULL, *or = NULL;
    1755          59 :         if (caseignore) {
    1756          11 :                 ol = l;
    1757          11 :                 or = r;
    1758          11 :                 l = BATcasefold(l, NULL);
    1759          10 :                 r = BATcasefold(r, NULL);
    1760          11 :                 if (l == NULL || r == NULL) {
    1761           0 :                         BBPreclaim(l);
    1762           0 :                         BBPreclaim(r);
    1763           0 :                         throw(MAL, "pcre.join", GDK_EXCEPTION);
    1764             :                 }
    1765             :         }
    1766             : 
    1767          59 :         canditer_init(&lci, l, sl);
    1768          59 :         canditer_init(&rci, r, sr);
    1769             : 
    1770          59 :         BATiter li = bat_iterator(l);
    1771          59 :         BATiter ri = bat_iterator(r);
    1772          59 :         lbase = l->hseqbase;
    1773          59 :         rbase = r->hseqbase;
    1774          59 :         lvals = (const char *) li.base;
    1775          59 :         rvals = (const char *) ri.base;
    1776          59 :         assert(ri.vh && r->ttype);
    1777          59 :         lvars = li.vh->base;
    1778          59 :         rvars = ri.vh->base;
    1779             : 
    1780          59 :         r1->tkey = true;
    1781          59 :         r1->tsorted = true;
    1782          59 :         r1->trevsorted = true;
    1783          59 :         r1->tnil = false;
    1784          59 :         r1->tnonil = true;
    1785          59 :         if (r2) {
    1786          42 :                 r2->tkey = true;
    1787          42 :                 r2->tsorted = true;
    1788          42 :                 r2->trevsorted = true;
    1789          42 :                 r2->tnil = false;
    1790          42 :                 r2->tnonil = true;
    1791             :         }
    1792             : 
    1793          59 :         if (anti) {
    1794         654 :                 pcre_join_loop(strcmp(vl, vr) == 0, mnre_match(vl, re));
    1795             :         } else {
    1796         476 :                 pcre_join_loop(strcmp(vl, vr) != 0, !mnre_match(vl, re));
    1797             :         }
    1798          59 :         bat_iterator_end(&li);
    1799          58 :         bat_iterator_end(&ri);
    1800          58 :         if (ol) {
    1801          10 :                 BBPreclaim(l);
    1802          11 :                 BBPreclaim(r);
    1803          11 :                 l = ol;
    1804          11 :                 r = or;
    1805             :         }
    1806             : 
    1807          59 :         assert(!r2 || BATcount(r1) == BATcount(r2));
    1808             :         /* also set other bits of heap to correct value to indicate size */
    1809          59 :         BATsetcount(r1, BATcount(r1));
    1810          58 :         if (r2)
    1811          41 :                 BATsetcount(r2, BATcount(r2));
    1812          58 :         if (BATcount(r1) > 0) {
    1813          43 :                 if (BATtdense(r1))
    1814          14 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1815          43 :                 if (r2 && BATtdense(r2))
    1816          32 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1817             :         } else {
    1818          15 :                 r1->tseqbase = 0;
    1819          15 :                 if (r2)
    1820           8 :                         r2->tseqbase = 0;
    1821             :         }
    1822             : 
    1823          40 :         if (r2)
    1824          41 :                 TRC_DEBUG(ALGO,
    1825             :                                   "l=%s#" BUNFMT "[%s]%s%s,"
    1826             :                                   "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    1827             :                                   "sr=%s#" BUNFMT "%s%s -> "
    1828             :                                   "%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s (" LLFMT " usec)\n",
    1829             :                                   BATgetId(l), BATcount(l), ATOMname(l->ttype),
    1830             :                                   l->tsorted ? "-sorted" : "",
    1831             :                                   l->trevsorted ? "-revsorted" : "",
    1832             :                                   BATgetId(r), BATcount(r), ATOMname(r->ttype),
    1833             :                                   r->tsorted ? "-sorted" : "",
    1834             :                                   r->trevsorted ? "-revsorted" : "",
    1835             :                                   sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
    1836             :                                   sl && sl->tsorted ? "-sorted" : "",
    1837             :                                   sl && sl->trevsorted ? "-revsorted" : "",
    1838             :                                   sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
    1839             :                                   sr && sr->tsorted ? "-sorted" : "",
    1840             :                                   sr && sr->trevsorted ? "-revsorted" : "",
    1841             :                                   BATgetId(r1), BATcount(r1),
    1842             :                                   r1->tsorted ? "-sorted" : "",
    1843             :                                   r1->trevsorted ? "-revsorted" : "",
    1844             :                                   BATgetId(r2), BATcount(r2),
    1845             :                                   r2->tsorted ? "-sorted" : "",
    1846             :                                   r2->trevsorted ? "-revsorted" : "", GDKusec() - t0);
    1847             :         else
    1848          17 :                 TRC_DEBUG(ALGO,
    1849             :                                   "l=%s#" BUNFMT "[%s]%s%s,"
    1850             :                                   "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    1851             :                                   "sr=%s#" BUNFMT "%s%s -> "
    1852             :                                   "%s#" BUNFMT "%s%s (" LLFMT " usec)\n",
    1853             :                                   BATgetId(l), BATcount(l), ATOMname(l->ttype),
    1854             :                                   l->tsorted ? "-sorted" : "",
    1855             :                                   l->trevsorted ? "-revsorted" : "",
    1856             :                                   BATgetId(r), BATcount(r), ATOMname(r->ttype),
    1857             :                                   r->tsorted ? "-sorted" : "",
    1858             :                                   r->trevsorted ? "-revsorted" : "",
    1859             :                                   sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
    1860             :                                   sl && sl->tsorted ? "-sorted" : "",
    1861             :                                   sl && sl->trevsorted ? "-revsorted" : "",
    1862             :                                   sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
    1863             :                                   sr && sr->tsorted ? "-sorted" : "",
    1864             :                                   sr && sr->trevsorted ? "-revsorted" : "",
    1865             :                                   BATgetId(r1), BATcount(r1),
    1866             :                                   r1->tsorted ? "-sorted" : "",
    1867             :                                   r1->trevsorted ? "-revsorted" : "", GDKusec() - t0);
    1868             :         return MAL_SUCCEED;
    1869             : 
    1870           0 :   bailout:
    1871           0 :         bat_iterator_end(&li);
    1872           0 :         bat_iterator_end(&ri);
    1873           0 :         mnre_like_clean(&re);
    1874           0 :         assert(msg != MAL_SUCCEED);
    1875             :         return msg;
    1876             : }
    1877             : 
    1878             : static str
    1879          55 : PCREjoin(bat *r1, bat *r2, bat lid, bat rid, bat slid, bat srid, bat elid,
    1880             :                  bat ciid, bit anti)
    1881             : {
    1882          55 :         BAT *left = NULL, *right = NULL, *escape = NULL, *caseignore = NULL,
    1883          55 :                 *candleft = NULL, *candright = NULL;
    1884          55 :         BAT *result1 = NULL, *result2 = NULL;
    1885          55 :         char *msg = MAL_SUCCEED;
    1886          55 :         const char *esc = "";
    1887          55 :         bit ci;
    1888          55 :         BATiter bi;
    1889             : 
    1890          55 :         if ((left = BATdescriptor(lid)) == NULL)
    1891           0 :                 goto fail;
    1892          59 :         if ((right = BATdescriptor(rid)) == NULL)
    1893           0 :                 goto fail;
    1894          59 :         if ((escape = BATdescriptor(elid)) == NULL)
    1895           0 :                 goto fail;
    1896          59 :         if ((caseignore = BATdescriptor(ciid)) == NULL)
    1897           0 :                 goto fail;
    1898          59 :         if (!is_bat_nil(slid) && (candleft = BATdescriptor(slid)) == NULL)
    1899           0 :                 goto fail;
    1900          59 :         if (!is_bat_nil(srid) && (candright = BATdescriptor(srid)) == NULL)
    1901           0 :                 goto fail;
    1902          59 :         result1 = COLnew(0, TYPE_oid, BATcount(left), TRANSIENT);
    1903          59 :         if (r2)
    1904          42 :                 result2 = COLnew(0, TYPE_oid, BATcount(left), TRANSIENT);
    1905          59 :         if (!result1 || (r2 && !result2)) {
    1906           0 :                 msg = createException(MAL, "pcre.join",
    1907             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1908           0 :                 goto fail;
    1909             :         }
    1910          59 :         result1->tnil = false;
    1911          59 :         result1->tnonil = true;
    1912          59 :         result1->tkey = true;
    1913          59 :         result1->tsorted = true;
    1914          59 :         result1->trevsorted = true;
    1915          59 :         result1->tseqbase = 0;
    1916          59 :         if (r2) {
    1917          42 :                 result2->tnil = false;
    1918          42 :                 result2->tnonil = true;
    1919          42 :                 result2->tkey = true;
    1920          42 :                 result2->tsorted = true;
    1921          42 :                 result2->trevsorted = true;
    1922          42 :                 result2->tseqbase = 0;
    1923             :         }
    1924          59 :         if (BATcount(escape) != 1) {
    1925           0 :                 msg = createException(MAL, "pcre.join",
    1926             :                                                           SQLSTATE(42000)
    1927             :                                                           "At the moment, only one value is allowed for the escape input at pcre join");
    1928           0 :                 goto fail;
    1929             :         }
    1930          59 :         if (BATcount(caseignore) != 1) {
    1931           0 :                 msg = createException(MAL, "pcre.join",
    1932             :                                                           SQLSTATE(42000)
    1933             :                                                           "At the moment, only one value is allowed for the case ignore input at pcre join");
    1934           0 :                 goto fail;
    1935             :         }
    1936          59 :         bi = bat_iterator(caseignore);
    1937          59 :         ci = *(bit *) BUNtloc(bi, 0);
    1938          59 :         bat_iterator_end(&bi);
    1939          59 :         bi = bat_iterator(escape);
    1940          59 :         esc = BUNtvar(bi, 0);
    1941          59 :         msg = pcrejoin(result1, result2, left, right, candleft, candright, esc, ci,
    1942             :                                    anti);
    1943          57 :         bat_iterator_end(&bi);
    1944          59 :         if (msg)
    1945           0 :                 goto fail;
    1946          59 :         *r1 = result1->batCacheid;
    1947          59 :         BBPkeepref(result1);
    1948          59 :         if (r2) {
    1949          42 :                 *r2 = result2->batCacheid;
    1950          42 :                 BBPkeepref(result2);
    1951             :         }
    1952          59 :         BBPunfix(left->batCacheid);
    1953          59 :         BBPunfix(right->batCacheid);
    1954          59 :         BBPreclaim(escape);
    1955          59 :         BBPreclaim(caseignore);
    1956          59 :         BBPreclaim(candleft);
    1957          59 :         BBPreclaim(candright);
    1958             :         return MAL_SUCCEED;
    1959             : 
    1960           0 :   fail:
    1961           0 :         BBPreclaim(left);
    1962           0 :         BBPreclaim(right);
    1963           0 :         BBPreclaim(escape);
    1964           0 :         BBPreclaim(caseignore);
    1965           0 :         BBPreclaim(candleft);
    1966           0 :         BBPreclaim(candright);
    1967           0 :         BBPreclaim(result1);
    1968           0 :         BBPreclaim(result2);
    1969           0 :         if (msg)
    1970             :                 return msg;
    1971           0 :         throw(MAL, "pcre.join", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1972             : }
    1973             : 
    1974             : static str
    1975          38 : LIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *elid,
    1976             :                  const bat *cid, const bat *slid, const bat *srid,
    1977             :                  const bit *nil_matches, const lng *estimate, const bit *anti)
    1978             : {
    1979          38 :         (void) nil_matches;
    1980          38 :         (void) estimate;
    1981          38 :         return PCREjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0,
    1982          38 :                                         *elid, *cid, *anti);
    1983             : }
    1984             : 
    1985             : static str
    1986          17 : LIKEjoin1(bat *r1, const bat *lid, const bat *rid, const bat *elid,
    1987             :                   const bat *cid, const bat *slid, const bat *srid,
    1988             :                   const bit *nil_matches, const lng *estimate, const bit *anti)
    1989             : {
    1990          17 :         (void) nil_matches;
    1991          17 :         (void) estimate;
    1992          17 :         return PCREjoin(r1, NULL, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0,
    1993          17 :                                         *elid, *cid, *anti);
    1994             : }
    1995             : 
    1996             : #include "mel.h"
    1997             : mel_atom pcre_init_atoms[] = {
    1998             :  { .name="pcre", },  { .cmp=NULL }
    1999             : };
    2000             : mel_func pcre_init_funcs[] = {
    2001             :  command("pcre", "index", PCREindex, false, "match a pattern, return matched position (or 0 when not found)", args(1,3, arg("",int),arg("pat",pcre),arg("s",str))),
    2002             :  command("pcre", "match", PCREmatch, false, "Perl Compatible Regular Expression pattern matching against a string", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2003             :  command("pcre", "imatch", PCREimatch, false, "Caseless Perl Compatible Regular Expression pattern matching against a string", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2004             :  command("pcre", "patindex", PCREpatindex, false, "Location of the first POSIX pattern matching against a string", args(1,3, arg("",int),arg("pat",str),arg("s",str))),
    2005             :  command("pcre", "replace", PCREreplace_wrap, false, "Replace _all_ matches of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2006             :  command("pcre", "replace_first", PCREreplacefirst_wrap, false, "Replace _the first_ match of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2007             :  command("pcre", "pcre_quote", PCREquote, false, "Return a PCRE pattern string that matches the argument exactly.", args(1,2, arg("",str),arg("s",str))),
    2008             :  command("pcre", "sql2pcre", PCREsql2pcre, false, "Convert a SQL like pattern with the given escape character into a PCRE pattern.", args(1,3, arg("",str),arg("pat",str),arg("esc",str))),
    2009             :  command("str", "replace", PCREreplace_wrap, false, "", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2010             :  command("batpcre", "replace", PCREreplace_bat_wrap, false, "", args(1,5, batarg("",str),batarg("orig",str),arg("pat",str),arg("repl",str),arg("flag",str))),
    2011             :  command("batpcre", "replace_first", PCREreplacefirst_bat_wrap, false, "", args(1,5, batarg("",str),batarg("orig",str),arg("pat",str),arg("repl",str),arg("flag",str))),
    2012             :  command("algebra", "like", PCRElike, false, "", args(1,5, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2013             :  command("algebra", "not_like", PCREnotlike, false, "", args(1,5, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2014             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2015             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2016             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2017             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2018             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2019             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))),
    2020             :  command("algebra", "likeselect", PCRElikeselect, false, "Select all head values of the first input BAT for which the\ntail value is \"like\" the given (SQL-style) pattern and for\nwhich the head value occurs in the tail of the second input\nBAT.\nInput is a dense-headed BAT, output is a dense-headed BAT with in\nthe tail the head value of the input BAT for which the\nrelationship holds.  The output BAT is sorted on the tail value.", args(1,7, batarg("",oid),batarg("b",str),batarg("s",oid),arg("pat",str),arg("esc",str),arg("caseignore",bit),arg("anti",bit))),
    2021             :  command("algebra", "likejoin", LIKEjoin, false, "Join the string bat L with the pattern bat R\nwith optional candidate lists SL and SR using pattern escape string ESC\nand doing a case sensitive match.\nThe result is two aligned bats with oids of matching rows.", args(2,11, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2022             :  command("algebra", "likejoin", LIKEjoin1, false, "The same as LIKEjoin_esc, but only produce one output", args(1,10,batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    2023             :  { .imp=NULL }
    2024             : };
    2025             : #include "mal_import.h"
    2026             : #ifdef _MSC_VER
    2027             : #undef read
    2028             : #pragma section(".CRT$XCU",read)
    2029             : #endif
    2030         345 : LIB_STARTUP_FUNC(init_pcre_mal)
    2031         345 : { mal_module("pcre", pcre_init_atoms, pcre_init_funcs); }

Generated by: LCOV version 1.14