LCOV - code coverage report
Current view: top level - monetdb5/modules/atoms - str.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1111 1444 76.9 %
Date: 2024-10-07 21:21:43 Functions: 91 102 89.2 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  *  N.J. Nes, M.L. Kersten
      15             :  * The String Module
      16             :  * Strings can be created in many ways. Already in the built-in
      17             :  * operations each atom can be cast to a string using the str(atom)
      18             :  * mil command.  The string module gives the possibility of
      19             :  * construction string as a substring of the a given string (s). There
      20             :  * are two such construction functions.  The first is the substring
      21             :  * from some position (offset) until the end of the string. The second
      22             :  * start again on the given offset position but only copies count
      23             :  * number of bytes. The functions fail when the position and count
      24             :  * fall out of bounds. A negative position indicates that the position
      25             :  * is computed from the end of the source string.
      26             :  *
      27             :  * The strings can be compared using the "=" and "!=" operators.
      28             :  *
      29             :  * The operator "+" concatenates a string and an atom. The atom will
      30             :  * be converted to a string using the atom to string c function. The
      31             :  * string and the result of the conversion are concatenated to form a
      32             :  * new string. This string is returned.
      33             :  *
      34             :  * The length function returns the length of the string. The length is
      35             :  * the number of characters in the string. The maximum string length
      36             :  * handled by the kernel is 32-bits long.
      37             :  *
      38             :  * chrAt() returns the character at position index in the string
      39             :  * s. The function will fail when the index is out of range. The range
      40             :  * is from 0 to length(s)-1.
      41             :  *
      42             :  * The startsWith and endsWith functions test if the string s starts
      43             :  * with or ends with the given prefix or suffix.
      44             :  *
      45             :  * The toLower and toUpper functions cast the string to lower or upper
      46             :  * case characters.
      47             :  *
      48             :  * The search(str,chr) function searches for the first occurrence of a
      49             :  * character from the beginning of the string. The search(chr,str)
      50             :  * searches for the last occurrence (or first from the end of the
      51             :  * string). The last search function locates the position of first
      52             :  * occurrence of the string s2 in string s. All search functions
      53             :  * return -1 when the search failed.  Otherwise the position is
      54             :  * returned.
      55             :  *
      56             :  * All string functions fail when an incorrect string (NULL pointer)
      57             :  * is given.  In the current implementation, a fail is signaled by
      58             :  * returning nil, since this facilitates the use of the string module
      59             :  * in bulk operations.
      60             :  *
      61             :  * All functions in the module have now been converted to
      62             :  * Unicode. Internally, we use UTF-8 to store strings as Unicode in
      63             :  * zero-terminated byte-sequences.
      64             :  */
      65             : #include "monetdb_config.h"
      66             : #include "str.h"
      67             : #include <string.h>
      68             : #include "mal_interpreter.h"
      69             : #include "mutf8.h"
      70             : 
      71             : #define UTF8_assert(s)          assert(checkUTF8(s))
      72             : 
      73             : /* return the number of codepoints in `s' before `end'. */
      74             : static inline int
      75         737 : UTF8_strpos(const char *s, const char *end)
      76             : {
      77         737 :         int pos = 0;
      78             : 
      79         737 :         UTF8_assert(s);
      80             : 
      81         734 :         if (s > end) {
      82             :                 return -1;
      83             :         }
      84       56183 :         while (s < end) {
      85             :                 /* just count leading bytes of encoded code points; only works
      86             :                  * for correctly encoded UTF-8 */
      87       55449 :                 pos += (*s++ & 0xC0) != 0x80;
      88             :         }
      89             :         return pos;
      90             : }
      91             : 
      92             : /* return a pointer to the byte that starts the pos'th (0-based)
      93             :  * codepoint in s */
      94             : static inline char *
      95     5828113 : UTF8_strtail(const char *s, int pos)
      96             : {
      97     5828113 :         UTF8_assert(s);
      98    86735728 :         while (*s) {
      99    86014540 :                 if ((*s & 0xC0) != 0x80) {
     100    86014470 :                         if (pos <= 0)
     101             :                                 break;
     102    80346177 :                         pos--;
     103             :                 }
     104    80346247 :                 s++;
     105             :         }
     106     6389481 :         return (char *) s;
     107             : }
     108             : 
     109             : /* copy n Unicode codepoints from s to dst, return pointer to new end */
     110             : static inline str
     111         215 : UTF8_strncpy(char *restrict dst, const char *restrict s, int n)
     112             : {
     113         215 :         UTF8_assert(s);
     114        1449 :         while (*s && n) {
     115        1234 :                 if ((*s & 0xF8) == 0xF0) {
     116             :                         /* 4 byte UTF-8 sequence */
     117           0 :                         *dst++ = *s++;
     118           0 :                         *dst++ = *s++;
     119           0 :                         *dst++ = *s++;
     120           0 :                         *dst++ = *s++;
     121        1234 :                 } else if ((*s & 0xF0) == 0xE0) {
     122             :                         /* 3 byte UTF-8 sequence */
     123           6 :                         *dst++ = *s++;
     124           6 :                         *dst++ = *s++;
     125           6 :                         *dst++ = *s++;
     126        1228 :                 } else if ((*s & 0xE0) == 0xC0) {
     127             :                         /* 2 byte UTF-8 sequence */
     128           0 :                         *dst++ = *s++;
     129           0 :                         *dst++ = *s++;
     130             :                 } else {
     131             :                         /* 1 byte UTF-8 "sequence" */
     132        1228 :                         *dst++ = *s++;
     133             :                 }
     134        1234 :                 n--;
     135             :         }
     136         215 :         *dst = '\0';
     137         215 :         return dst;
     138             : }
     139             : 
     140             : /* return number of Unicode codepoints in s; s is not nil */
     141             : int
     142    25117864 : UTF8_strlen(const char *s)
     143             : {                                                               /* This function assumes, s is never nil */
     144    25117864 :         size_t pos = 0;
     145             : 
     146    25117864 :         UTF8_assert(s);
     147    50249758 :         assert(!strNil(s));
     148             : 
     149   926414304 :         while (*s) {
     150             :                 /* just count leading bytes of encoded code points; only works
     151             :                  * for correctly encoded UTF-8 */
     152   901289425 :                 pos += (*s++ & 0xC0) != 0x80;
     153             :         }
     154    25124879 :         assert(pos < INT_MAX);
     155    25124879 :         return (int) pos;
     156             : }
     157             : 
     158             : /* return (int) strlen(s); s is not nil */
     159             : int
     160     2998144 : str_strlen(const char *s)
     161             : {                                                               /* This function assumes s is never nil */
     162     2998144 :         UTF8_assert(s);
     163     5998620 :         assert(!strNil(s));
     164             : 
     165     2999310 :         return (int) strlen(s);
     166             : }
     167             : 
     168             : /* return the display width of s */
     169             : int
     170     4650138 : UTF8_strwidth(const char *S)
     171             : {
     172     4650138 :         if (strNil(S))
     173      504986 :                 return int_nil;
     174             : 
     175     4145152 :         const uint8_t *s = (const uint8_t *) S;
     176     4145152 :         int len = 0;
     177             : 
     178   142418231 :         for (uint32_t state = 0, codepoint = 0; *s; s++) {
     179   138273079 :                 switch (decode(&state, &codepoint, (uint8_t) *s)) {
     180   138263852 :                 case UTF8_ACCEPT: {
     181   138263852 :                         int n = charwidth(codepoint);
     182   138263852 :                         if (n >= 0)
     183   138260028 :                                 len += n;
     184             :                         else
     185        3824 :                                 len++;                  /* assume width 1 if unprintable */
     186             :                         break;
     187             :                 }
     188             :                 default:
     189             :                         break;
     190             :                 case UTF8_REJECT:
     191           0 :                         assert(0);
     192             :                 }
     193             :         }
     194     4145152 :         return len;
     195             : }
     196             : 
     197             : /*
     198             :  * Here you find the wrappers around the version 4 library code
     199             :  * It also contains the direct implementation of the string
     200             :  * matching support routines.
     201             :  */
     202             : #include "mal_exception.h"
     203             : 
     204             : /*
     205             :  * The SQL like function return a boolean
     206             :  */
     207             : static bool
     208           0 : STRlike(const char *s, const char *pat, const char *esc)
     209             : {
     210           0 :         const char *t, *p;
     211             : 
     212           0 :         t = s;
     213           0 :         for (p = pat; *p && *t; p++) {
     214           0 :                 if (esc && *p == *esc) {
     215           0 :                         p++;
     216           0 :                         if (*p != *t)
     217             :                                 return false;
     218           0 :                         t++;
     219           0 :                 } else if (*p == '_')
     220           0 :                         t++;
     221           0 :                 else if (*p == '%') {
     222           0 :                         p++;
     223           0 :                         while (*p == '%')
     224           0 :                                 p++;
     225           0 :                         if (*p == 0)
     226             :                                 return true;    /* tail is acceptable */
     227           0 :                         for (; *p && *t; t++)
     228           0 :                                 if (STRlike(t, p, esc))
     229             :                                         return true;
     230           0 :                         if (*p == 0 && *t == 0)
     231             :                                 return true;
     232             :                         return false;
     233           0 :                 } else if (*p == *t)
     234           0 :                         t++;
     235             :                 else
     236             :                         return false;
     237             :         }
     238           0 :         if (*p == '%' && *(p + 1) == 0)
     239             :                 return true;
     240           0 :         return *t == 0 && *p == 0;
     241             : }
     242             : 
     243             : static str
     244           0 : STRlikewrap3(bit *ret, const char *const *s, const char *const *pat, const char *const *esc)
     245             : {
     246           0 :         if (strNil(*s) || strNil(*pat) || strNil(*esc))
     247           0 :                 *ret = bit_nil;
     248             :         else
     249           0 :                 *ret = (bit) STRlike(*s, *pat, *esc);
     250           0 :         return MAL_SUCCEED;
     251             : }
     252             : 
     253             : static str
     254           0 : STRlikewrap(bit *ret, const char *const *s, const char *const *pat)
     255             : {
     256           0 :         if (strNil(*s) || strNil(*pat))
     257           0 :                 *ret = bit_nil;
     258             :         else
     259           0 :                 *ret = (bit) STRlike(*s, *pat, NULL);
     260           0 :         return MAL_SUCCEED;
     261             : }
     262             : 
     263             : static str
     264           0 : STRtostr(str *res, const char *const *src)
     265             : {
     266           0 :         if (*src == 0)
     267           0 :                 *res = GDKstrdup(str_nil);
     268             :         else
     269           0 :                 *res = GDKstrdup(*src);
     270           0 :         if (*res == NULL)
     271           0 :                 throw(MAL, "str.str", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     272             :         return MAL_SUCCEED;
     273             : }
     274             : 
     275             : static str
     276          97 : STRLength(int *res, const char *const *arg1)
     277             : {
     278          97 :         const char *s = *arg1;
     279             : 
     280         194 :         *res = strNil(s) ? int_nil : UTF8_strlen(s);
     281          97 :         return MAL_SUCCEED;
     282             : }
     283             : 
     284             : static str
     285           3 : STRBytes(int *res, const char *const *arg1)
     286             : {
     287           3 :         const char *s = *arg1;
     288             : 
     289           6 :         *res = strNil(s) ? int_nil : str_strlen(s);
     290           3 :         return MAL_SUCCEED;
     291             : }
     292             : 
     293             : str
     294        3787 : str_tail(str *buf, size_t *buflen, const char *s, int off)
     295             : {
     296        3787 :         if (off < 0) {
     297           1 :                 off += UTF8_strlen(s);
     298           1 :                 if (off < 0)
     299             :                         off = 0;
     300             :         }
     301        3787 :         const char *tail = UTF8_strtail(s, off);
     302        3786 :         size_t nextlen = strlen(tail) + 1;
     303        3786 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.tail");
     304        3786 :         strcpy(*buf, tail);
     305        3786 :         return MAL_SUCCEED;
     306             : }
     307             : 
     308             : static str
     309           1 : STRTail(str *res, const char *const *arg1, const int *offset)
     310             : {
     311           1 :         str buf = NULL, msg = MAL_SUCCEED;
     312           1 :         const char *s = *arg1;
     313           1 :         int off = *offset;
     314             : 
     315           2 :         if (strNil(s) || is_int_nil(off)) {
     316           0 :                 *res = GDKstrdup(str_nil);
     317             :         } else {
     318           1 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     319             : 
     320           1 :                 *res = NULL;
     321           1 :                 if (!(buf = GDKmalloc(buflen)))
     322           0 :                         throw(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     323           1 :                 if ((msg = str_tail(&buf, &buflen, s, off)) != MAL_SUCCEED) {
     324           0 :                         GDKfree(buf);
     325           0 :                         return msg;
     326             :                 }
     327           1 :                 *res = GDKstrdup(buf);
     328             :         }
     329             : 
     330           1 :         GDKfree(buf);
     331           1 :         if (!*res)
     332           0 :                 msg = createException(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     333             :         return msg;
     334             : }
     335             : 
     336             : /* copy the substring s[off:off+l] into *buf, replacing *buf with a
     337             :  * freshly allocated buffer if the substring doesn't fit; off is 0
     338             :  * based, and both off and l count in Unicode codepoints (i.e. not
     339             :  * bytes); if off < 0, off counts from the end of the string */
     340             : str
     341     3422088 : str_Sub_String(str *buf, size_t *buflen, const char *s, int off, int l)
     342             : {
     343     3422088 :         size_t len;
     344             : 
     345     3422088 :         if (off < 0) {
     346           4 :                 off += UTF8_strlen(s);
     347           4 :                 if (off < 0) {
     348           3 :                         l += off;
     349           3 :                         off = 0;
     350             :                 }
     351             :         }
     352             :         /* here, off >= 0 */
     353     3422088 :         if (l < 0) {
     354        1040 :                 strcpy(*buf, "");
     355        1040 :                 return MAL_SUCCEED;
     356             :         }
     357     3421048 :         s = UTF8_strtail(s, off);
     358     3563372 :         len = (size_t) (UTF8_strtail(s, l) - s + 1);
     359     3563925 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.substring");
     360     3563925 :         strcpy_len(*buf, s, len);
     361     3563925 :         return MAL_SUCCEED;
     362             : }
     363             : 
     364             : static str
     365           4 : STRSubString(str *res, const char *const *arg1, const int *offset, const int *length)
     366             : {
     367           4 :         str buf = NULL, msg = MAL_SUCCEED;
     368           4 :         const char *s = *arg1;
     369           4 :         int off = *offset, len = *length;
     370             : 
     371           7 :         if (strNil(s) || is_int_nil(off) || is_int_nil(len)) {
     372           1 :                 *res = GDKstrdup(str_nil);
     373             :         } else {
     374           3 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     375             : 
     376           3 :                 *res = NULL;
     377           3 :                 if (!(buf = GDKmalloc(buflen)))
     378           0 :                         throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     379           3 :                 if ((msg = str_Sub_String(&buf, &buflen, s, off, len)) != MAL_SUCCEED) {
     380           0 :                         GDKfree(buf);
     381           0 :                         return msg;
     382             :                 }
     383           3 :                 *res = GDKstrdup(buf);
     384             :         }
     385             : 
     386           4 :         GDKfree(buf);
     387           4 :         if (!*res)
     388           0 :                 msg = createException(MAL, "str.substring",
     389             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     390             :         return msg;
     391             : }
     392             : 
     393             : str
     394           4 : str_from_wchr(str *buf, size_t *buflen, int c)
     395             : {
     396           4 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, 5, "str.unicode");
     397           4 :         str s = *buf;
     398           4 :         UTF8_PUTCHAR(c, s);
     399           4 :         *s = 0;
     400           4 :         return MAL_SUCCEED;
     401           0 :   illegal:
     402           0 :         throw(MAL, "str.unicode", SQLSTATE(42000) "Illegal Unicode code point");
     403             : }
     404             : 
     405             : static str
     406           2 : STRFromWChr(str *res, const int *c)
     407             : {
     408           2 :         str buf = NULL, msg = MAL_SUCCEED;
     409           2 :         int cc = *c;
     410             : 
     411           2 :         if (is_int_nil(cc)) {
     412           0 :                 *res = GDKstrdup(str_nil);
     413             :         } else {
     414           2 :                 size_t buflen = MAX(strlen(str_nil) + 1, 8);
     415             : 
     416           2 :                 *res = NULL;
     417           2 :                 if (!(buf = GDKmalloc(buflen)))
     418           0 :                         throw(MAL, "str.unicode", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     419           2 :                 if ((msg = str_from_wchr(&buf, &buflen, cc)) != MAL_SUCCEED) {
     420           0 :                         GDKfree(buf);
     421           0 :                         return msg;
     422             :                 }
     423           2 :                 *res = GDKstrdup(buf);
     424             :         }
     425             : 
     426           2 :         GDKfree(buf);
     427           2 :         if (!*res)
     428           0 :                 msg = createException(MAL, "str.unicode",
     429             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     430             :         return msg;
     431             : }
     432             : 
     433             : /* return the Unicode code point of arg1 at position at */
     434             : str
     435          31 : str_wchr_at(int *res, const char *s, int at)
     436             : {
     437             :         /* 64bit: should have lng arg */
     438          60 :         if (strNil(s) || is_int_nil(at) || at < 0) {
     439           2 :                 *res = int_nil;
     440           2 :                 return MAL_SUCCEED;
     441             :         }
     442          29 :         s = UTF8_strtail(s, at);
     443          29 :         if (s == NULL || *s == 0) {
     444           6 :                 *res = int_nil;
     445           6 :                 return MAL_SUCCEED;
     446             :         }
     447          23 :         uint32_t state = 0, codepoint;
     448          25 :         while (*s) {
     449          25 :                 if (decode(&state, &codepoint, (uint8_t) *s) == UTF8_ACCEPT) {
     450          23 :                         *res = codepoint;
     451          23 :                         return MAL_SUCCEED;
     452             :                 }
     453           2 :                 s++;
     454             :         }
     455           0 :         throw(MAL, "str.unicodeAt", SQLSTATE(42000) "Illegal Unicode code point");
     456             : }
     457             : 
     458             : static str
     459           0 : STRWChrAt(int *res, const char *const *arg1, const int *at)
     460             : {
     461           0 :         return str_wchr_at(res, *arg1, *at);
     462             : }
     463             : 
     464             : static inline str
     465      104013 : doStrConvert(str *res, const char *arg1, gdk_return (*func)(char **restrict, size_t *restrict, const char *restrict))
     466             : {
     467      104013 :         str buf = NULL, msg = MAL_SUCCEED;
     468             : 
     469      104013 :         if (strNil(arg1)) {
     470         647 :                 *res = GDKstrdup(str_nil);
     471             :         } else {
     472      103366 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     473             : 
     474      103366 :                 *res = NULL;
     475      103366 :                 if (!(buf = GDKmalloc(buflen)))
     476           0 :                         throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     477      103368 :                 if ((*func)(&buf, &buflen, arg1) != GDK_SUCCEED) {
     478           0 :                         GDKfree(buf);
     479           0 :                         throw(MAL, "str.lower", GDK_EXCEPTION);
     480             :                 }
     481      103367 :                 *res = GDKstrdup(buf);
     482             :         }
     483             : 
     484      104016 :         GDKfree(buf);
     485      104016 :         if (!*res)
     486           0 :                 msg = createException(MAL, "str.lower",
     487             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     488             :         return msg;
     489             : }
     490             : 
     491             : static inline str
     492        2071 : STRlower(str *res, const char *const *arg1)
     493             : {
     494        1947 :         return doStrConvert(res, *arg1, GDKtolower);
     495             : }
     496             : 
     497             : static inline str
     498      101942 : STRupper(str *res, const char *const *arg1)
     499             : {
     500      101942 :         return doStrConvert(res, *arg1, GDKtoupper);
     501             : }
     502             : 
     503             : static inline str
     504           0 : STRcasefold(str *res, const char *const *arg1)
     505             : {
     506           0 :         return doStrConvert(res, *arg1, GDKcasefold);
     507             : }
     508             : 
     509             : /* returns whether arg1 starts with arg2 */
     510             : int
     511        2000 : str_is_prefix(const char *s, const char *prefix, int plen)
     512             : {
     513        2000 :         return strncmp(s, prefix, plen);
     514             : }
     515             : 
     516             : int
     517          73 : str_is_iprefix(const char *s, const char *prefix, int plen)
     518             : {
     519          73 :         return GDKstrncasecmp(s, prefix, SIZE_MAX, plen);
     520             : }
     521             : 
     522             : int
     523        2106 : str_is_suffix(const char *s, const char *suffix, int sul)
     524             : {
     525        2106 :         int sl = str_strlen(s);
     526             : 
     527        2106 :         if (sl < sul)
     528             :                 return -1;
     529             :         else
     530        2097 :                 return strcmp(s + sl - sul, suffix);
     531             : }
     532             : 
     533             : /* case insensitive endswith check */
     534             : int
     535          69 : str_is_isuffix(const char *s, const char *suffix, int sul)
     536             : {
     537          69 :         const char *e = s + strlen(s);
     538          69 :         const char *sf;
     539             : 
     540          69 :         (void) sul;
     541             :         /* note that the uppercase and lowercase forms of a character aren't
     542             :          * necessarily the same length in their UTF-8 encodings */
     543         478 :         for (sf = suffix; *sf && e > s; sf++) {
     544         409 :                 if ((*sf & 0xC0) != 0x80) {
     545         353 :                         while ((*--e & 0xC0) == 0x80)
     546             :                                 ;
     547             :                 }
     548             :         }
     549          71 :         while ((*sf & 0xC0) == 0x80)
     550           2 :                 sf++;
     551          69 :         return *sf != 0 || GDKstrcasecmp(e, suffix) != 0;
     552             : }
     553             : 
     554             : int
     555        8052 : str_contains(const char *h, const char *n, int nlen)
     556             : {
     557        8052 :         (void) nlen;
     558        8052 :         return strstr(h, n) == NULL;
     559             : }
     560             : 
     561             : int
     562        1268 : str_icontains(const char *h, const char *n, int nlen)
     563             : {
     564        1268 :         (void) nlen;
     565        1268 :         return GDKstrcasestr(h, n) == NULL;
     566             : }
     567             : 
     568             : static str
     569           4 : STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     570             : {
     571           4 :         (void) cntxt;
     572           4 :         (void) mb;
     573             : 
     574           4 :         bit *r = getArgReference_bit(stk, pci, 0);
     575           4 :         const char *s1 = *getArgReference_str(stk, pci, 1);
     576           4 :         const char *s2 = *getArgReference_str(stk, pci, 2);
     577           4 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
     578             : 
     579           8 :         if (strNil(s1) || strNil(s2)) {
     580           0 :                 *r = bit_nil;
     581             :         } else {
     582           4 :                 int s2_len = str_strlen(s2);
     583           8 :                 *r = icase ?
     584           2 :                         str_is_iprefix(s1, s2, s2_len) == 0 :
     585           2 :                         str_is_prefix(s1, s2, s2_len) == 0;
     586             :         }
     587           4 :         return MAL_SUCCEED;
     588             : }
     589             : 
     590             : static str
     591           2 : STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     592             : {
     593           2 :         (void) cntxt;
     594           2 :         (void) mb;
     595             : 
     596           2 :         bit *r = getArgReference_bit(stk, pci, 0);
     597           2 :         const char *s1 = *getArgReference_str(stk, pci, 1);
     598           2 :         const char *s2 = *getArgReference_str(stk, pci, 2);
     599           2 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
     600             : 
     601           4 :         if (strNil(s1) || strNil(s2)) {
     602           0 :                 *r = bit_nil;
     603             :         } else {
     604           2 :                 int s2_len = str_strlen(s2);
     605           4 :                 *r = icase ?
     606           2 :                         str_is_isuffix(s1, s2, s2_len) == 0 :
     607           0 :                         str_is_suffix(s1, s2, s2_len) == 0;
     608             :         }
     609           2 :         return MAL_SUCCEED;
     610             : }
     611             : 
     612             : /* returns whether haystack contains needle */
     613             : static str
     614           2 : STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     615             : {
     616           2 :         (void) cntxt;
     617           2 :         (void) mb;
     618             : 
     619           2 :         bit *r = getArgReference_bit(stk, pci, 0);
     620           2 :         const char *s1 = *getArgReference_str(stk, pci, 1);
     621           2 :         const char *s2 = *getArgReference_str(stk, pci, 2);
     622           2 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
     623             : 
     624           4 :         if (strNil(s1) || strNil(s2)) {
     625           0 :                 *r = bit_nil;
     626             :         } else {
     627           2 :                 int s2_len = str_strlen(s2);
     628           4 :                 *r = icase ?
     629           2 :                         str_icontains(s1, s2, s2_len) == 0 :
     630           0 :                         str_contains(s1, s2, s2_len) == 0;
     631             :         }
     632           2 :         return MAL_SUCCEED;
     633             : }
     634             : 
     635             : int
     636        3436 : str_search(const char *haystack, const char *needle)
     637             : {
     638        3436 :         needle = strstr(haystack, needle);
     639        3436 :         if (needle == NULL)
     640             :                 return -1;
     641             : 
     642         737 :         return UTF8_strpos(haystack, needle);
     643             : }
     644             : 
     645             : int
     646           0 : str_isearch(const char *haystack, const char *needle)
     647             : {
     648           0 :         needle = GDKstrcasestr(haystack, needle);
     649           0 :         if (needle == NULL)
     650             :                 return -1;
     651             : 
     652           0 :         return UTF8_strpos(haystack, needle);
     653             : }
     654             : 
     655             : /* find first occurrence of needle in haystack */
     656             : static str
     657           0 : STRstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     658             : {
     659           0 :         (void) cntxt;
     660           0 :         (void) mb;
     661           0 :         bit *res = getArgReference(stk, pci, 0);
     662           0 :         const char *haystack = *getArgReference_str(stk, pci, 1);
     663           0 :         const char *needle = *getArgReference_str(stk, pci, 2);
     664           0 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
     665             : 
     666           0 :         if (strNil(haystack) || strNil(needle)) {
     667           0 :                 *res = bit_nil;
     668             :         } else {
     669           0 :                 *res = icase ?
     670           0 :                         str_isearch(haystack, needle) :
     671           0 :                         str_search(haystack, needle);
     672             :         }
     673           0 :         return MAL_SUCCEED;
     674             : }
     675             : 
     676             : int
     677           0 : str_reverse_str_search(const char *haystack, const char *needle)
     678             : {
     679           0 :         int nulen = UTF8_strlen(needle);
     680           0 :         size_t nlen = strlen(needle);
     681             : 
     682           0 :         for (int pos = str_strlen(haystack) - 1; pos >= 0; pos--) {
     683           0 :                 if ((haystack[pos] & 0xC0) != 0x80) {
     684           0 :                         if (nulen > 0)
     685           0 :                                 nulen--;
     686           0 :                         else if (strncmp(haystack + pos, needle, nlen) == 0)
     687           0 :                                 return pos;
     688             :                 }
     689             :         }
     690             :         return -1;
     691             : }
     692             : 
     693             : int
     694           0 : str_reverse_str_isearch(const char *haystack, const char *needle)
     695             : {
     696           0 :         int nulen = UTF8_strlen(needle);
     697           0 :         size_t nlen = strlen(needle);
     698             : 
     699           0 :         for (int pos = str_strlen(haystack) - 1; pos >= 0; pos--) {
     700           0 :                 if ((haystack[pos] & 0xC0) != 0x80) {
     701           0 :                         if (nulen > 0)
     702           0 :                                 nulen--;
     703           0 :                         else if (GDKstrncasecmp(haystack + pos, needle, SIZE_MAX, nlen) == 0)
     704           0 :                                 return pos;
     705             :                 }
     706             :         }
     707             :         return -1;
     708             : }
     709             : 
     710             : /* find last occurrence of arg2 in arg1 */
     711             : static str
     712           0 : STRrevstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     713             : {
     714           0 :         (void) cntxt;
     715           0 :         (void) mb;
     716           0 :         int *res = getArgReference_int(stk, pci, 0);
     717           0 :         const str haystack = *getArgReference_str(stk, pci, 1);
     718           0 :         const str needle = *getArgReference_str(stk, pci, 2);
     719           0 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
     720             : 
     721           0 :         if (strNil(haystack) || strNil(needle)) {
     722           0 :                 *res = bit_nil;
     723             :         } else {
     724           0 :                 *res = icase ?
     725           0 :                         str_reverse_str_isearch(haystack, needle) :
     726           0 :                         str_reverse_str_search(haystack, needle);
     727             :         }
     728           0 :         return MAL_SUCCEED;
     729             : }
     730             : 
     731             : str
     732          37 : str_splitpart(str *buf, size_t *buflen, const char *s, const char *s2, int f)
     733             : {
     734          37 :         size_t len;
     735          37 :         char *p = NULL;
     736             : 
     737          37 :         if (f <= 0)
     738           4 :                 throw(MAL, "str.splitpart",
     739             :                           SQLSTATE(42000) "field position must be greater than zero");
     740             : 
     741          33 :         len = strlen(s2);
     742          33 :         if (len) {
     743          42 :                 while ((p = strstr(s, s2)) != NULL && f > 1) {
     744          13 :                         s = p + len;
     745          13 :                         f--;
     746             :                 }
     747             :         }
     748             : 
     749          33 :         if (f != 1) {
     750          12 :                 strcpy(*buf, "");
     751          12 :                 return MAL_SUCCEED;
     752             :         }
     753             : 
     754          21 :         if (p == NULL) {
     755          10 :                 len = strlen(s);
     756             :         } else {
     757          11 :                 len = (size_t) (p - s);
     758             :         }
     759             : 
     760          21 :         len++;
     761          21 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.splitpart");
     762          21 :         strcpy_len(*buf, s, len);
     763          21 :         return MAL_SUCCEED;
     764             : }
     765             : 
     766             : static str
     767          23 : STRsplitpart(str *res, const char *const *haystack, const char *const *needle, const int *field)
     768             : {
     769          23 :         str buf = NULL, msg = MAL_SUCCEED;
     770          23 :         const char *s = *haystack, *s2 = *needle;
     771          23 :         int f = *field;
     772             : 
     773          69 :         if (strNil(s) || strNil(s2) || is_int_nil(f)) {
     774           0 :                 *res = GDKstrdup(str_nil);
     775             :         } else {
     776          23 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     777             : 
     778          23 :                 *res = NULL;
     779          23 :                 if (!(buf = GDKmalloc(buflen)))
     780           4 :                         throw(MAL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     781          23 :                 if ((msg = str_splitpart(&buf, &buflen, s, s2, f)) != MAL_SUCCEED) {
     782           4 :                         GDKfree(buf);
     783           4 :                         return msg;
     784             :                 }
     785          19 :                 *res = GDKstrdup(buf);
     786             :         }
     787             : 
     788          19 :         GDKfree(buf);
     789          19 :         if (!*res)
     790           0 :                 msg = createException(MAL, "str.splitpart",
     791             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     792             :         return msg;
     793             : }
     794             : 
     795             : /* returns number of bytes to remove from left to strip the codepoints in rm */
     796             : static size_t
     797         342 : lstrip(const char *s, size_t len, const uint32_t *rm, size_t nrm)
     798             : {
     799         342 :         uint32_t state = 0, codepoint;
     800         342 :         size_t skip = 0;
     801             : 
     802         508 :         for (size_t n = 0; n < len;) {
     803         493 :                 if (decode(&state, &codepoint, (uint8_t) s[n++]) == UTF8_ACCEPT) {
     804             :                         size_t i;
     805        7739 :                         for (i = 0; i < nrm; i++) {
     806        7412 :                                 if (rm[i] == codepoint) {
     807             :                                         break;
     808             :                                 }
     809             :                         }
     810         469 :                         if (i == nrm)
     811         327 :                                 return skip;
     812             :                         skip = n;
     813             :                 }
     814             :         }
     815             :         return skip;
     816             : }
     817             : 
     818             : /* returns the resulting length of s after stripping codepoints in rm
     819             :  * from the right */
     820             : static size_t
     821         411 : rstrip(const char *s, size_t len, const uint32_t *rm, size_t nrm)
     822             : {
     823         411 :         uint32_t c;
     824         411 :         size_t i, n;
     825             : 
     826         578 :         while (len > 0) {
     827         567 :                 UTF8_LASTCHAR(c, n, s, len);
     828         567 :                 assert(n > 0 && n <= len);
     829        9713 :                 for (i = 0; i < nrm; i++) {
     830        9314 :                         if (rm[i] == c) {
     831         168 :                                 len -= n;
     832         168 :                                 break;
     833             :                         }
     834             :                 }
     835         567 :                 if (i == nrm)
     836             :                         break;
     837             :         }
     838         411 :         return len;
     839             : }
     840             : 
     841             : const uint32_t whitespace[] = {
     842             :         ' ',                                            /* space */
     843             :         '\t',                                           /* tab (character tabulation) */
     844             :         '\n',                                           /* line feed */
     845             :         '\r',                                           /* carriage return */
     846             :         '\f',                                           /* form feed */
     847             :         '\v',                                           /* vertical tab (line tabulation) */
     848             : /* below the code points that have the Unicode Zs (space separator) property */
     849             :         0x00A0,                                         /* no-break space */
     850             :         0x1680,                                         /* ogham space mark */
     851             :         0x2000,                                         /* en quad */
     852             :         0x2001,                                         /* em quad */
     853             :         0x2002,                                         /* en space */
     854             :         0x2003,                                         /* em space */
     855             :         0x2004,                                         /* three-per-em space */
     856             :         0x2005,                                         /* four-per-em space */
     857             :         0x2006,                                         /* six-per-em space */
     858             :         0x2007,                                         /* figure space */
     859             :         0x2008,                                         /* punctuation space */
     860             :         0x2009,                                         /* thin space */
     861             :         0x200A,                                         /* hair space */
     862             :         0x202F,                                         /* narrow no-break space */
     863             :         0x205F,                                         /* medium mathematical space */
     864             :         0x3000,                                         /* ideographic space */
     865             : /* below the code points that have the Unicode Zl (line separator) property */
     866             :         0x2028,                                         /* line separator */
     867             : /* below the code points that have the Unicode Zp (paragraph separator)
     868             :  * property */
     869             :         0x2029,                                         /* paragraph separator */
     870             : };
     871             : 
     872             : #define NSPACES         (sizeof(whitespace) / sizeof(whitespace[0]))
     873             : 
     874             : str
     875         279 : str_strip(str *buf, size_t *buflen, const char *s)
     876             : {
     877         279 :         size_t len = strlen(s);
     878         279 :         size_t n = lstrip(s, len, whitespace, NSPACES);
     879         279 :         s += n;
     880         279 :         len -= n;
     881         279 :         n = rstrip(s, len, whitespace, NSPACES);
     882             : 
     883         279 :         n++;
     884         279 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip");
     885         279 :         strcpy_len(*buf, s, n);
     886         279 :         return MAL_SUCCEED;
     887             : }
     888             : 
     889             : /* remove all whitespace from either side of arg1 */
     890             : static str
     891           8 : STRStrip(str *res, const char *const *arg1)
     892             : {
     893           8 :         str buf = NULL, msg = MAL_SUCCEED;
     894           8 :         const char *s = *arg1;
     895             : 
     896           8 :         if (strNil(s)) {
     897           0 :                 *res = GDKstrdup(str_nil);
     898             :         } else {
     899           8 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     900             : 
     901           8 :                 *res = NULL;
     902           8 :                 if (!(buf = GDKmalloc(buflen)))
     903           0 :                         throw(MAL, "str.strip", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     904           8 :                 if ((msg = str_strip(&buf, &buflen, s)) != MAL_SUCCEED) {
     905           0 :                         GDKfree(buf);
     906           0 :                         return msg;
     907             :                 }
     908           8 :                 *res = GDKstrdup(buf);
     909             :         }
     910             : 
     911           8 :         GDKfree(buf);
     912           8 :         if (!*res)
     913           0 :                 msg = createException(MAL, "str.strip",
     914             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     915             :         return msg;
     916             : }
     917             : 
     918             : str
     919          27 : str_ltrim(str *buf, size_t *buflen, const char *s)
     920             : {
     921          27 :         size_t len = strlen(s);
     922          27 :         size_t n = lstrip(s, len, whitespace, NSPACES);
     923          27 :         size_t nallocate = len - n + 1;
     924             : 
     925          27 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim");
     926          27 :         strcpy_len(*buf, s + n, nallocate);
     927          27 :         return MAL_SUCCEED;
     928             : }
     929             : 
     930             : /* remove all whitespace from the start (left) of arg1 */
     931             : static str
     932          19 : STRLtrim(str *res, const char *const *arg1)
     933             : {
     934          19 :         str buf = NULL, msg = MAL_SUCCEED;
     935          19 :         const char *s = *arg1;
     936             : 
     937          19 :         if (strNil(s)) {
     938           0 :                 *res = GDKstrdup(str_nil);
     939             :         } else {
     940          19 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     941             : 
     942          19 :                 *res = NULL;
     943          19 :                 if (!(buf = GDKmalloc(buflen)))
     944           0 :                         throw(MAL, "str.ltrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     945          19 :                 if ((msg = str_ltrim(&buf, &buflen, s)) != MAL_SUCCEED) {
     946           0 :                         GDKfree(buf);
     947           0 :                         return msg;
     948             :                 }
     949          19 :                 *res = GDKstrdup(buf);
     950             :         }
     951             : 
     952          19 :         GDKfree(buf);
     953          19 :         if (!*res)
     954           0 :                 msg = createException(MAL, "str.ltrim",
     955             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     956             :         return msg;
     957             : }
     958             : 
     959             : str
     960          96 : str_rtrim(str *buf, size_t *buflen, const char *s)
     961             : {
     962          96 :         size_t len = strlen(s);
     963          96 :         size_t n = rstrip(s, len, whitespace, NSPACES);
     964             : 
     965          96 :         n++;
     966          96 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim");
     967          96 :         strcpy_len(*buf, s, n);
     968          96 :         return MAL_SUCCEED;
     969             : }
     970             : 
     971             : /* remove all whitespace from the end (right) of arg1 */
     972             : static str
     973           6 : STRRtrim(str *res, const char *const *arg1)
     974             : {
     975           6 :         str buf = NULL, msg = MAL_SUCCEED;
     976           6 :         const char *s = *arg1;
     977             : 
     978           6 :         if (strNil(s)) {
     979           0 :                 *res = GDKstrdup(str_nil);
     980             :         } else {
     981           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     982             : 
     983           6 :                 *res = NULL;
     984           6 :                 if (!(buf = GDKmalloc(buflen)))
     985           0 :                         throw(MAL, "str.rtrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     986           6 :                 if ((msg = str_rtrim(&buf, &buflen, s)) != MAL_SUCCEED) {
     987           0 :                         GDKfree(buf);
     988           0 :                         return msg;
     989             :                 }
     990           6 :                 *res = GDKstrdup(buf);
     991             :         }
     992             : 
     993           6 :         GDKfree(buf);
     994           6 :         if (!*res)
     995           0 :                 msg = createException(MAL, "str.rtrim",
     996             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     997             :         return msg;
     998             : }
     999             : 
    1000             : /* return a list of codepoints in s */
    1001             : static str
    1002          49 : trimchars(str *buf, size_t *buflen, size_t *n, const char *s, size_t len_s,
    1003             :                   const char *malfunc)
    1004             : {
    1005          49 :         size_t len = 0, nlen = len_s * sizeof(int);
    1006          49 :         uint32_t *cbuf;
    1007             : 
    1008          49 :         assert(s);
    1009          49 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
    1010          49 :         cbuf = *(uint32_t **) buf;
    1011             : 
    1012          49 :         uint32_t state = 0;
    1013          49 :         uint32_t codepoint;
    1014         288 :         while (*s) {
    1015         239 :                 if (decode(&state, &codepoint, (uint8_t) *s) == UTF8_ACCEPT) {
    1016         180 :                         cbuf[len++] = codepoint;
    1017             :                 }
    1018         239 :                 s++;
    1019             :         }
    1020          49 :         if (state != UTF8_ACCEPT)
    1021           0 :                 throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
    1022          49 :         *n = len;
    1023          49 :         return MAL_SUCCEED;
    1024             : }
    1025             : 
    1026             : str
    1027          23 : str_strip2(str *buf, size_t *buflen, const char *s, const char *s2)
    1028             : {
    1029          23 :         str msg = MAL_SUCCEED;
    1030          23 :         size_t len, n, n2, n3;
    1031             : 
    1032          23 :         if ((n2 = strlen(s2)) == 0) {
    1033           0 :                 len = strlen(s) + 1;
    1034           0 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.strip2");
    1035           0 :                 strcpy(*buf, s);
    1036           0 :                 return MAL_SUCCEED;
    1037             :         } else {
    1038          23 :                 if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) != MAL_SUCCEED)
    1039             :                         return msg;
    1040          23 :                 len = strlen(s);
    1041          23 :                 n = lstrip(s, len, *(uint32_t **) buf, n3);
    1042          23 :                 s += n;
    1043          23 :                 len -= n;
    1044          23 :                 n = rstrip(s, len, *(uint32_t **) buf, n3);
    1045             : 
    1046          23 :                 n++;
    1047          23 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
    1048          23 :                 strcpy_len(*buf, s, n);
    1049          23 :                 return MAL_SUCCEED;
    1050             :         }
    1051             : }
    1052             : 
    1053             : /* remove the longest string containing only characters from arg2 from
    1054             :  * either side of arg1 */
    1055             : static str
    1056          20 : STRStrip2(str *res, const char *const *arg1, const char *const *arg2)
    1057             : {
    1058          20 :         str buf = NULL, msg = MAL_SUCCEED;
    1059          20 :         const char *s = *arg1, *s2 = *arg2;
    1060             : 
    1061          38 :         if (strNil(s) || strNil(s2)) {
    1062           3 :                 *res = GDKstrdup(str_nil);
    1063             :         } else {
    1064          17 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
    1065             : 
    1066          17 :                 *res = NULL;
    1067          17 :                 if (!(buf = GDKmalloc(buflen)))
    1068           0 :                         throw(MAL, "str.strip2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1069          17 :                 if ((msg = str_strip2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
    1070           0 :                         GDKfree(buf);
    1071           0 :                         return msg;
    1072             :                 }
    1073          17 :                 *res = GDKstrdup(buf);
    1074             :         }
    1075             : 
    1076          20 :         GDKfree(buf);
    1077          20 :         if (!*res)
    1078           0 :                 msg = createException(MAL, "str.strip2",
    1079             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1080             :         return msg;
    1081             : }
    1082             : 
    1083             : str
    1084          14 : str_ltrim2(str *buf, size_t *buflen, const char *s, const char *s2)
    1085             : {
    1086          14 :         str msg = MAL_SUCCEED;
    1087          14 :         size_t len, n, n2, n3, nallocate;
    1088             : 
    1089          14 :         if ((n2 = strlen(s2)) == 0) {
    1090           1 :                 len = strlen(s) + 1;
    1091           1 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.ltrim2");
    1092           1 :                 strcpy(*buf, s);
    1093           1 :                 return MAL_SUCCEED;
    1094             :         } else {
    1095          13 :                 if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
    1096             :                         return msg;
    1097          13 :                 len = strlen(s);
    1098          13 :                 n = lstrip(s, len, *(uint32_t **) buf, n3);
    1099          13 :                 nallocate = len - n + 1;
    1100             : 
    1101          13 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
    1102          13 :                 strcpy_len(*buf, s + n, nallocate);
    1103          13 :                 return MAL_SUCCEED;
    1104             :         }
    1105             : }
    1106             : 
    1107             : /* remove the longest string containing only characters from arg2 from
    1108             :  * the start (left) of arg1 */
    1109             : static str
    1110           8 : STRLtrim2(str *res, const char *const *arg1, const char *const *arg2)
    1111             : {
    1112           8 :         str buf = NULL, msg = MAL_SUCCEED;
    1113           8 :         const char *s = *arg1, *s2 = *arg2;
    1114             : 
    1115          16 :         if (strNil(s) || strNil(s2)) {
    1116           0 :                 *res = GDKstrdup(str_nil);
    1117             :         } else {
    1118           8 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
    1119             : 
    1120           8 :                 *res = NULL;
    1121           8 :                 if (!(buf = GDKmalloc(buflen)))
    1122           0 :                         throw(MAL, "str.ltrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1123           8 :                 if ((msg = str_ltrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
    1124           0 :                         GDKfree(buf);
    1125           0 :                         return msg;
    1126             :                 }
    1127           8 :                 *res = GDKstrdup(buf);
    1128             :         }
    1129             : 
    1130           8 :         GDKfree(buf);
    1131           8 :         if (!*res)
    1132           0 :                 msg = createException(MAL, "str.ltrim2",
    1133             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1134             :         return msg;
    1135             : }
    1136             : 
    1137             : str
    1138          15 : str_rtrim2(str *buf, size_t *buflen, const char *s, const char *s2)
    1139             : {
    1140          15 :         str msg = MAL_SUCCEED;
    1141          15 :         size_t len, n, n2, n3;
    1142             : 
    1143          15 :         if ((n2 = strlen(s2)) == 0) {
    1144           2 :                 len = strlen(s) + 1;
    1145           2 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.rtrim2");
    1146           2 :                 strcpy(*buf, s);
    1147           2 :                 return MAL_SUCCEED;
    1148             :         } else {
    1149          13 :                 if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
    1150             :                         return msg;
    1151          13 :                 len = strlen(s);
    1152          13 :                 n = rstrip(s, len, *(uint32_t **) buf, n3);
    1153          13 :                 n++;
    1154             : 
    1155          13 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
    1156          13 :                 strcpy_len(*buf, s, n);
    1157          13 :                 return MAL_SUCCEED;
    1158             :         }
    1159             : }
    1160             : 
    1161             : /* remove the longest string containing only characters from arg2 from
    1162             :  * the end (right) of arg1 */
    1163             : static str
    1164           9 : STRRtrim2(str *res, const char *const *arg1, const char *const *arg2)
    1165             : {
    1166           9 :         str buf = NULL, msg = MAL_SUCCEED;
    1167           9 :         const char *s = *arg1, *s2 = *arg2;
    1168             : 
    1169          18 :         if (strNil(s) || strNil(s2)) {
    1170           0 :                 *res = GDKstrdup(str_nil);
    1171             :         } else {
    1172           9 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
    1173             : 
    1174           9 :                 *res = NULL;
    1175           9 :                 if (!(buf = GDKmalloc(buflen)))
    1176           0 :                         throw(MAL, "str.rtrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1177           9 :                 if ((msg = str_rtrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
    1178           0 :                         GDKfree(buf);
    1179           0 :                         return msg;
    1180             :                 }
    1181           9 :                 *res = GDKstrdup(buf);
    1182             :         }
    1183             : 
    1184           9 :         GDKfree(buf);
    1185           9 :         if (!*res)
    1186           0 :                 msg = createException(MAL, "str.rtrim2",
    1187             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1188             :         return msg;
    1189             : }
    1190             : 
    1191             : static str
    1192          60 : pad(str *buf, size_t *buflen, const char *s, const char *pad, int len, int left,
    1193             :         const char *malfunc)
    1194             : {
    1195          60 :         size_t slen, padlen, repeats, residual, i, nlen;
    1196          60 :         char *res;
    1197             : 
    1198          60 :         if (len < 0)
    1199             :                 len = 0;
    1200             : 
    1201          60 :         slen = (size_t) UTF8_strlen(s);
    1202          60 :         if (slen > (size_t) len) {
    1203             :                 /* truncate */
    1204          20 :                 pad = UTF8_strtail(s, len);
    1205          20 :                 slen = pad - s + 1;
    1206             : 
    1207          20 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
    1208          20 :                 strcpy_len(*buf, s, slen);
    1209          20 :                 return MAL_SUCCEED;
    1210             :         }
    1211             : 
    1212          40 :         padlen = (size_t) UTF8_strlen(pad);
    1213          40 :         if (slen == (size_t) len || padlen == 0) {
    1214             :                 /* nothing to do (no padding if there is no pad string) */
    1215           0 :                 slen = strlen(s) + 1;
    1216           0 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
    1217           0 :                 strcpy(*buf, s);
    1218           0 :                 return MAL_SUCCEED;
    1219             :         }
    1220             : 
    1221          40 :         repeats = ((size_t) len - slen) / padlen;
    1222          40 :         residual = ((size_t) len - slen) % padlen;
    1223          40 :         if (residual > 0)
    1224          20 :                 residual = (size_t) (UTF8_strtail(pad, (int) residual) - pad);
    1225          40 :         padlen = strlen(pad);
    1226          40 :         slen = strlen(s);
    1227             : 
    1228          40 :         nlen = slen + repeats * padlen + residual + 1;
    1229          40 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
    1230          40 :         res = *buf;
    1231          40 :         if (left) {
    1232          87 :                 for (i = 0; i < repeats; i++)
    1233          67 :                         memcpy(res + i * padlen, pad, padlen);
    1234          20 :                 if (residual > 0)
    1235          10 :                         memcpy(res + repeats * padlen, pad, residual);
    1236          20 :                 if (slen > 0)
    1237          20 :                         memcpy(res + repeats * padlen + residual, s, slen);
    1238             :         } else {
    1239          20 :                 if (slen > 0)
    1240          20 :                         memcpy(res, s, slen);
    1241          87 :                 for (i = 0; i < repeats; i++)
    1242          67 :                         memcpy(res + slen + i * padlen, pad, padlen);
    1243          20 :                 if (residual > 0)
    1244          10 :                         memcpy(res + slen + repeats * padlen, pad, residual);
    1245             :         }
    1246          40 :         res[repeats * padlen + residual + slen] = 0;
    1247          40 :         return MAL_SUCCEED;
    1248             : }
    1249             : 
    1250             : str
    1251           8 : str_lpad(str *buf, size_t *buflen, const char *s, int len)
    1252             : {
    1253           4 :         return pad(buf, buflen, s, " ", len, 1, "str.lpad");
    1254             : }
    1255             : 
    1256             : /* Fill up 'arg1' to length 'len' by prepending whitespaces.
    1257             :  * If 'arg1' is already longer than 'len', then it's truncated on the right
    1258             :  * (NB: this is the PostgreSQL definition).
    1259             :  *
    1260             :  * Example: lpad('hi', 5)
    1261             :  * Result: '   hi'
    1262             :  */
    1263             : static str
    1264           4 : STRLpad(str *res, const char *const *arg1, const int *len)
    1265             : {
    1266           4 :         str buf = NULL, msg = MAL_SUCCEED;
    1267           4 :         const char *s = *arg1;
    1268           4 :         int l = *len;
    1269             : 
    1270           8 :         if (strNil(s) || is_int_nil(l)) {
    1271           0 :                 *res = GDKstrdup(str_nil);
    1272             :         } else {
    1273           4 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1274             : 
    1275           4 :                 *res = NULL;
    1276           4 :                 if (!(buf = GDKmalloc(buflen)))
    1277           0 :                         throw(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1278           4 :                 if ((msg = str_lpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    1279           0 :                         GDKfree(buf);
    1280           0 :                         return msg;
    1281             :                 }
    1282           4 :                 *res = GDKstrdup(buf);
    1283             :         }
    1284             : 
    1285           4 :         GDKfree(buf);
    1286           4 :         if (!*res)
    1287           0 :                 msg = createException(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1288             :         return msg;
    1289             : }
    1290             : 
    1291             : str
    1292           8 : str_rpad(str *buf, size_t *buflen, const char *s, int len)
    1293             : {
    1294           4 :         return pad(buf, buflen, s, " ", len, 0, "str.lpad");
    1295             : }
    1296             : 
    1297             : /* Fill up 'arg1' to length 'len' by appending whitespaces.
    1298             :  * If 'arg1' is already longer than 'len', then it's truncated (on the right)
    1299             :  * (NB: this is the PostgreSQL definition).
    1300             :  *
    1301             :  * Example: rpad('hi', 5)
    1302             :  * Result: 'hi   '
    1303             :  */
    1304             : static str
    1305           4 : STRRpad(str *res, const char *const *arg1, const int *len)
    1306             : {
    1307           4 :         str buf = NULL, msg = MAL_SUCCEED;
    1308           4 :         const char *s = *arg1;
    1309           4 :         int l = *len;
    1310             : 
    1311           8 :         if (strNil(s) || is_int_nil(l)) {
    1312           0 :                 *res = GDKstrdup(str_nil);
    1313             :         } else {
    1314           4 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1315             : 
    1316           4 :                 *res = NULL;
    1317           4 :                 if (!(buf = GDKmalloc(buflen)))
    1318           0 :                         throw(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1319           4 :                 if ((msg = str_rpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    1320           0 :                         GDKfree(buf);
    1321           0 :                         return msg;
    1322             :                 }
    1323           4 :                 *res = GDKstrdup(buf);
    1324             :         }
    1325             : 
    1326           4 :         GDKfree(buf);
    1327           4 :         if (!*res)
    1328           0 :                 msg = createException(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1329             :         return msg;
    1330             : }
    1331             : 
    1332             : str
    1333          22 : str_lpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
    1334             : {
    1335          16 :         return pad(buf, buflen, s, s2, len, 1, "str.lpad2");
    1336             : }
    1337             : 
    1338             : /* Fill up 'arg1' to length 'len' by prepending characters from 'arg2'
    1339             :  * If 'arg1' is already longer than 'len', then it's truncated on the right
    1340             :  * (NB: this is the PostgreSQL definition).
    1341             :  *
    1342             :  * Example: lpad('hi', 5, 'xy')
    1343             :  * Result: xyxhi
    1344             :  */
    1345             : static str
    1346           6 : STRLpad3(str *res, const char *const *arg1, const int *len, const char *const *arg2)
    1347             : {
    1348           6 :         str buf = NULL, msg = MAL_SUCCEED;
    1349           6 :         const char *s = *arg1, *s2 = *arg2;
    1350           6 :         int l = *len;
    1351             : 
    1352          18 :         if (strNil(s) || strNil(s2) || is_int_nil(l)) {
    1353           0 :                 *res = GDKstrdup(str_nil);
    1354             :         } else {
    1355           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1356             : 
    1357           6 :                 *res = NULL;
    1358           6 :                 if (!(buf = GDKmalloc(buflen)))
    1359           0 :                         throw(MAL, "str.lpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1360           6 :                 if ((msg = str_lpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
    1361           0 :                         GDKfree(buf);
    1362           0 :                         return msg;
    1363             :                 }
    1364           6 :                 *res = GDKstrdup(buf);
    1365             :         }
    1366             : 
    1367           6 :         GDKfree(buf);
    1368           6 :         if (!*res)
    1369           0 :                 msg = createException(MAL, "str.lpad2",
    1370             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1371             :         return msg;
    1372             : }
    1373             : 
    1374             : str
    1375          22 : str_rpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
    1376             : {
    1377          16 :         return pad(buf, buflen, s, s2, len, 0, "str.rpad2");
    1378             : }
    1379             : 
    1380             : /* Fill up 'arg1' to length 'len' by appending characters from 'arg2'
    1381             :  * If 'arg1' is already longer than 'len', then it's truncated (on the right)
    1382             :  * (NB: this is the PostgreSQL definition).
    1383             :  *
    1384             :  * Example: rpad('hi', 5, 'xy')
    1385             :  * Result: hixyx
    1386             :  */
    1387             : static str
    1388           6 : STRRpad3(str *res, const char *const *arg1, const int *len, const char *const *arg2)
    1389             : {
    1390           6 :         str buf = NULL, msg = MAL_SUCCEED;
    1391           6 :         const char *s = *arg1, *s2 = *arg2;
    1392           6 :         int l = *len;
    1393             : 
    1394          18 :         if (strNil(s) || strNil(s2) || is_int_nil(l)) {
    1395           0 :                 *res = GDKstrdup(str_nil);
    1396             :         } else {
    1397           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1398             : 
    1399           6 :                 *res = NULL;
    1400           6 :                 if (!(buf = GDKmalloc(buflen)))
    1401           0 :                         throw(MAL, "str.rpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1402           6 :                 if ((msg = str_rpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
    1403           0 :                         GDKfree(buf);
    1404           0 :                         return msg;
    1405             :                 }
    1406           6 :                 *res = GDKstrdup(buf);
    1407             :         }
    1408             : 
    1409           6 :         GDKfree(buf);
    1410           6 :         if (!*res)
    1411           0 :                 msg = createException(MAL, "str.rpad2",
    1412             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1413             :         return msg;
    1414             : }
    1415             : 
    1416             : str
    1417      116171 : str_substitute(str *buf, size_t *buflen, const char *s, const char *src,
    1418             :                            const char *dst, bit repeat)
    1419             : {
    1420      116171 :         size_t lsrc = strlen(src), ldst = strlen(dst), n, l = strlen(s);
    1421      116171 :         char *b, *fnd;
    1422      116171 :         const char *pfnd;
    1423             : 
    1424      116171 :         if (!lsrc || !l) {                      /* s/src is an empty string, there's nothing to substitute */
    1425           7 :                 l++;
    1426           7 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, l, "str.substitute");
    1427           7 :                 strcpy(*buf, s);
    1428           7 :                 return MAL_SUCCEED;
    1429             :         }
    1430             : 
    1431      116164 :         n = l + ldst;
    1432      116164 :         if (repeat && ldst > lsrc)
    1433       76978 :                 n = (ldst * l) / lsrc;  /* max length */
    1434             : 
    1435      116164 :         n++;
    1436      116164 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.substitute");
    1437      116164 :         b = *buf;
    1438      116164 :         pfnd = s;
    1439      124944 :         do {
    1440      124944 :                 fnd = strstr(pfnd, src);
    1441      124944 :                 if (fnd == NULL)
    1442             :                         break;
    1443        8749 :                 n = fnd - pfnd;
    1444        8749 :                 if (n > 0) {
    1445        7478 :                         strcpy_len(b, pfnd, n + 1);
    1446        7478 :                         b += n;
    1447             :                 }
    1448        8749 :                 if (ldst > 0) {
    1449         711 :                         strcpy_len(b, dst, ldst + 1);
    1450         742 :                         b += ldst;
    1451             :                 }
    1452        8780 :                 if (*fnd == 0)
    1453             :                         break;
    1454        8780 :                 pfnd = fnd + lsrc;
    1455        8780 :         } while (repeat);
    1456      116195 :         strcpy(b, pfnd);
    1457      116195 :         return MAL_SUCCEED;
    1458             : }
    1459             : 
    1460             : static str
    1461         196 : STRSubstitute(str *res, const char *const *arg1, const char *const *arg2, const char *const *arg3,
    1462             :                           const bit *g)
    1463             : {
    1464         196 :         str buf = NULL, msg = MAL_SUCCEED;
    1465         196 :         const char *s = *arg1, *s2 = *arg2, *s3 = *arg3;
    1466             : 
    1467         587 :         if (strNil(s) || strNil(s2) || strNil(s3)) {
    1468           2 :                 *res = GDKstrdup(str_nil);
    1469             :         } else {
    1470         194 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1471             : 
    1472         194 :                 *res = NULL;
    1473         194 :                 if (!(buf = GDKmalloc(buflen)))
    1474           0 :                         throw(MAL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1475         194 :                 if ((msg = str_substitute(&buf, &buflen, s, s2, s3, *g)) != MAL_SUCCEED) {
    1476           0 :                         GDKfree(buf);
    1477           0 :                         return msg;
    1478             :                 }
    1479         194 :                 *res = GDKstrdup(buf);
    1480             :         }
    1481             : 
    1482         196 :         GDKfree(buf);
    1483         196 :         if (!*res)
    1484           0 :                 msg = createException(MAL, "str.substitute",
    1485             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1486             :         return msg;
    1487             : }
    1488             : 
    1489             : static str
    1490           9 : STRascii(int *ret, const char *const *s)
    1491             : {
    1492           9 :         return str_wchr_at(ret, *s, 0);
    1493             : }
    1494             : 
    1495             : str
    1496        3782 : str_substring_tail(str *buf, size_t *buflen, const char *s, int start)
    1497             : {
    1498        3782 :         if (start < 1)
    1499             :                 start = 1;
    1500        3782 :         start--;
    1501        3776 :         return str_tail(buf, buflen, s, start);
    1502             : }
    1503             : 
    1504             : static str
    1505           6 : STRsubstringTail(str *res, const char *const *arg1, const int *start)
    1506             : {
    1507           6 :         str buf = NULL, msg = MAL_SUCCEED;
    1508           6 :         const char *s = *arg1;
    1509           6 :         int st = *start;
    1510             : 
    1511          12 :         if (strNil(s) || is_int_nil(st)) {
    1512           0 :                 *res = GDKstrdup(str_nil);
    1513             :         } else {
    1514           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1515             : 
    1516           6 :                 *res = NULL;
    1517           6 :                 if (!(buf = GDKmalloc(buflen)))
    1518           0 :                         throw(MAL, "str.substringTail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1519           6 :                 if ((msg = str_substring_tail(&buf, &buflen, s, st)) != MAL_SUCCEED) {
    1520           0 :                         GDKfree(buf);
    1521           0 :                         return msg;
    1522             :                 }
    1523           6 :                 *res = GDKstrdup(buf);
    1524             :         }
    1525             : 
    1526           6 :         GDKfree(buf);
    1527           6 :         if (!*res)
    1528           0 :                 msg = createException(MAL, "str.substringTail",
    1529             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1530             :         return msg;
    1531             : }
    1532             : 
    1533             : str
    1534     3570321 : str_sub_string(str *buf, size_t *buflen, const char *s, int start, int l)
    1535             : {
    1536     3570321 :         if (start < 1)
    1537             :                 start = 1;
    1538     3570321 :         start--;
    1539     3570300 :         return str_Sub_String(buf, buflen, s, start, l);
    1540             : }
    1541             : 
    1542             : static str
    1543          24 : STRsubstring(str *res, const char *const *arg1, const int *start, const int *ll)
    1544             : {
    1545          24 :         str buf = NULL, msg = MAL_SUCCEED;
    1546          24 :         const char *s = *arg1;
    1547          24 :         int st = *start, l = *ll;
    1548             : 
    1549          48 :         if (strNil(s) || is_int_nil(st) || is_int_nil(l)) {
    1550           3 :                 *res = GDKstrdup(str_nil);
    1551             :         } else {
    1552          21 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1553             : 
    1554          21 :                 *res = NULL;
    1555          21 :                 if (!(buf = GDKmalloc(buflen)))
    1556           0 :                         throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1557          21 :                 if ((msg = str_sub_string(&buf, &buflen, s, st, l)) != MAL_SUCCEED) {
    1558           0 :                         GDKfree(buf);
    1559           0 :                         return msg;
    1560             :                 }
    1561          21 :                 *res = GDKstrdup(buf);
    1562             :         }
    1563             : 
    1564          24 :         GDKfree(buf);
    1565          24 :         if (!*res)
    1566           0 :                 msg = createException(MAL, "str.substring",
    1567             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1568             :         return msg;
    1569             : }
    1570             : 
    1571             : static str
    1572          20 : STRprefix(str *res, const char *const *arg1, const int *ll)
    1573             : {
    1574          20 :         str buf = NULL, msg = MAL_SUCCEED;
    1575          20 :         const char *s = *arg1;
    1576          20 :         int l = *ll;
    1577             : 
    1578          40 :         if (strNil(s) || is_int_nil(l)) {
    1579           0 :                 *res = GDKstrdup(str_nil);
    1580             :         } else {
    1581          20 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1582             : 
    1583          20 :                 *res = NULL;
    1584          20 :                 if (!(buf = GDKmalloc(buflen)))
    1585           0 :                         throw(MAL, "str.prefix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1586          20 :                 if ((msg = str_Sub_String(&buf, &buflen, s, 0, l)) != MAL_SUCCEED) {
    1587           0 :                         GDKfree(buf);
    1588           0 :                         return msg;
    1589             :                 }
    1590          20 :                 *res = GDKstrdup(buf);
    1591             :         }
    1592             : 
    1593          20 :         GDKfree(buf);
    1594          20 :         if (!*res)
    1595           0 :                 msg = createException(MAL, "str.prefix",
    1596             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1597             :         return msg;
    1598             : }
    1599             : 
    1600             : str
    1601           9 : str_suffix(str *buf, size_t *buflen, const char *s, int l)
    1602             : {
    1603           9 :         int start = (int) (strlen(s) - l);
    1604           9 :         return str_Sub_String(buf, buflen, s, start, l);
    1605             : }
    1606             : 
    1607             : static str
    1608           5 : STRsuffix(str *res, const char *const *arg1, const int *ll)
    1609             : {
    1610           5 :         str buf = NULL, msg = MAL_SUCCEED;
    1611           5 :         const char *s = *arg1;
    1612           5 :         int l = *ll;
    1613             : 
    1614          10 :         if (strNil(s) || is_int_nil(l)) {
    1615           0 :                 *res = GDKstrdup(str_nil);
    1616             :         } else {
    1617           5 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1618             : 
    1619           5 :                 *res = NULL;
    1620           5 :                 if (!(buf = GDKmalloc(buflen)))
    1621           0 :                         throw(MAL, "str.suffix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1622           5 :                 if ((msg = str_suffix(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    1623           0 :                         GDKfree(buf);
    1624           0 :                         return msg;
    1625             :                 }
    1626           5 :                 *res = GDKstrdup(buf);
    1627             :         }
    1628             : 
    1629           5 :         GDKfree(buf);
    1630           5 :         if (!*res)
    1631           0 :                 msg = createException(MAL, "str.suffix",
    1632             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1633             :         return msg;
    1634             : }
    1635             : 
    1636             : int
    1637        3358 : str_locate2(const char *needle, const char *haystack, int start)
    1638             : {
    1639        3358 :         int off, res;
    1640        3358 :         const char *s;
    1641             : 
    1642        3358 :         off = start <= 0 ? 1 : start;
    1643        3358 :         s = UTF8_strtail(haystack, off - 1);
    1644        3377 :         res = str_search(s, needle);
    1645        3415 :         return res >= 0 ? res + off : 0;
    1646             : }
    1647             : 
    1648             : static str
    1649       28758 : STRlocate3(int *ret, const char *const *needle, const char *const *haystack, const int *start)
    1650             : {
    1651       28758 :         const char *s = *needle, *s2 = *haystack;
    1652       28758 :         int st = *start;
    1653             : 
    1654       57583 :         *ret = (strNil(s) || strNil(s2) || is_int_nil(st)) ?
    1655       28758 :                 int_nil :
    1656          67 :                 str_locate2(s, s2, st);
    1657       28758 :         return MAL_SUCCEED;
    1658             : }
    1659             : 
    1660             : static str
    1661          16 : STRlocate(int *ret, const char *const *needle, const char *const *haystack)
    1662             : {
    1663          16 :         const char *s = *needle, *s2 = *haystack;
    1664             : 
    1665          45 :         *ret = (strNil(s) || strNil(s2)) ? int_nil : str_locate2(s, s2, 1);
    1666          16 :         return MAL_SUCCEED;
    1667             : }
    1668             : 
    1669             : str
    1670         222 : str_insert(str *buf, size_t *buflen, const char *s, int strt, int l,
    1671             :                    const char *s2)
    1672             : {
    1673         222 :         str v;
    1674         222 :         int l1 = UTF8_strlen(s);
    1675         222 :         size_t nextlen;
    1676             : 
    1677         222 :         if (l < 0)
    1678           0 :                 throw(MAL, "str.insert",
    1679             :                           SQLSTATE(42000)
    1680             :                           "The number of characters for insert function must be non negative");
    1681         222 :         if (strt < 0) {
    1682           0 :                 if (-strt <= l1)
    1683           0 :                         strt = l1 + strt;
    1684             :                 else
    1685             :                         strt = 0;
    1686             :         }
    1687         222 :         if (strt > l1)
    1688             :                 strt = l1;
    1689             : 
    1690         222 :         nextlen = strlen(s) + strlen(s2) + 1;
    1691         222 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.insert");
    1692         222 :         v = *buf;
    1693         222 :         if (strt > 0)
    1694         215 :                 v = UTF8_strncpy(v, s, strt);
    1695         222 :         strcpy(v, s2);
    1696         222 :         if (strt + l < l1)
    1697          10 :                 strcat(v, UTF8_strtail(s, strt + l));
    1698             :         return MAL_SUCCEED;
    1699             : }
    1700             : 
    1701             : static str
    1702         224 : STRinsert(str *res, const char *const *input, const int *start, const int *nchars,
    1703             :                   const char *const *input2)
    1704             : {
    1705         224 :         str buf = NULL, msg = MAL_SUCCEED;
    1706         224 :         const char *s = *input, *s2 = *input2;
    1707         224 :         int st = *start, n = *nchars;
    1708             : 
    1709         447 :         if (strNil(s) || is_int_nil(st) || is_int_nil(n) || strNil(s2)) {
    1710           2 :                 *res = GDKstrdup(str_nil);
    1711             :         } else {
    1712         222 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1713             : 
    1714         222 :                 *res = NULL;
    1715         222 :                 if (!(buf = GDKmalloc(buflen)))
    1716           0 :                         throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1717         222 :                 if ((msg = str_insert(&buf, &buflen, s, st, n, s2)) != MAL_SUCCEED) {
    1718           0 :                         GDKfree(buf);
    1719           0 :                         return msg;
    1720             :                 }
    1721         222 :                 *res = GDKstrdup(buf);
    1722             :         }
    1723             : 
    1724         224 :         GDKfree(buf);
    1725         224 :         if (!*res)
    1726           0 :                 msg = createException(MAL, "str.insert",
    1727             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1728             :         return msg;
    1729             : }
    1730             : 
    1731             : static str
    1732         196 : STRreplace(str *ret, const char *const *s1, const char *const *s2, const char *const *s3)
    1733             : {
    1734         196 :         bit flag = TRUE;
    1735         196 :         return STRSubstitute(ret, s1, s2, s3, &flag);
    1736             : }
    1737             : 
    1738             : str
    1739          15 : str_repeat(str *buf, size_t *buflen, const char *s, int c)
    1740             : {
    1741          15 :         size_t l = strlen(s), nextlen;
    1742             : 
    1743          15 :         if (l >= INT_MAX)
    1744           0 :                 throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1745          15 :         nextlen = (size_t) c *l + 1;
    1746             : 
    1747          15 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.repeat");
    1748          15 :         str t = *buf;
    1749          15 :         *t = 0;
    1750      160043 :         for (int i = c; i > 0; i--, t += l)
    1751      160028 :                 strcpy(t, s);
    1752             :         return MAL_SUCCEED;
    1753             : }
    1754             : 
    1755             : static str
    1756          11 : STRrepeat(str *res, const char *const *arg1, const int *c)
    1757             : {
    1758          11 :         str buf = NULL, msg = MAL_SUCCEED;
    1759          11 :         const char *s = *arg1;
    1760          11 :         int cc = *c;
    1761             : 
    1762          21 :         if (strNil(s) || is_int_nil(cc) || cc < 0) {
    1763           1 :                 *res = GDKstrdup(str_nil);
    1764             :         } else {
    1765          10 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1766             : 
    1767          10 :                 *res = NULL;
    1768          10 :                 if (!(buf = GDKmalloc(buflen)))
    1769           0 :                         throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1770          10 :                 if ((msg = str_repeat(&buf, &buflen, s, cc)) != MAL_SUCCEED) {
    1771           0 :                         GDKfree(buf);
    1772           0 :                         return msg;
    1773             :                 }
    1774          10 :                 *res = GDKstrdup(buf);
    1775             :         }
    1776             : 
    1777          11 :         GDKfree(buf);
    1778          11 :         if (!*res)
    1779           0 :                 msg = createException(MAL, "str.repeat",
    1780             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1781             :         return msg;
    1782             : }
    1783             : 
    1784             : static str
    1785           1 : STRspace(str *res, const int *ll)
    1786             : {
    1787           1 :         str buf = NULL, msg = MAL_SUCCEED;
    1788           1 :         int l = *ll;
    1789             : 
    1790           1 :         if (is_int_nil(l) || l < 0) {
    1791           0 :                 *res = GDKstrdup(str_nil);
    1792             :         } else {
    1793           1 :                 const char space[] = " ", *s = space;
    1794           1 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    1795             : 
    1796           1 :                 *res = NULL;
    1797           1 :                 if (!(buf = GDKmalloc(buflen)))
    1798           0 :                         throw(MAL, "str.space", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1799           1 :                 if ((msg = str_repeat(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    1800           0 :                         GDKfree(buf);
    1801           0 :                         return msg;
    1802             :                 }
    1803           1 :                 *res = GDKstrdup(buf);
    1804             :         }
    1805             : 
    1806           1 :         GDKfree(buf);
    1807           1 :         if (!*res)
    1808           0 :                 msg = createException(MAL, "str.space",
    1809             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1810             :         return msg;
    1811             : }
    1812             : 
    1813             : static str
    1814           4 : STRasciify(str *r, const char *const *s)
    1815             : {
    1816           4 :         char *buf = NULL;
    1817           4 :         size_t buflen = 0;
    1818           4 :         if (GDKasciify(&buf, &buflen, *s) != GDK_SUCCEED)
    1819           0 :                 throw(MAL, "str.asciify", GDK_EXCEPTION);
    1820           4 :         *r = buf;
    1821           4 :         return MAL_SUCCEED;
    1822             : }
    1823             : 
    1824             : static inline void
    1825         327 : BBPnreclaim(int nargs, ...)
    1826             : {
    1827         327 :         va_list valist;
    1828         327 :         va_start(valist, nargs);
    1829        1436 :         for (int i = 0; i < nargs; i++) {
    1830        1098 :                 BAT *b = va_arg(valist, BAT *);
    1831        1753 :                 BBPreclaim(b);
    1832             :         }
    1833         338 :         va_end(valist);
    1834         338 : }
    1835             : 
    1836             : #define HANDLE_TIMEOUT(qc)                                                                      \
    1837             :         do {                                                                                                    \
    1838             :                 TIMEOUT_ERROR(qc, __FILE__, __func__, __LINE__);        \
    1839             :                 msg = createException(MAL, fname, GDK_EXCEPTION);       \
    1840             :         } while (0)
    1841             : 
    1842             : #define scanloop(TEST, canditer_next)                                           \
    1843             :         do {                                                                                                    \
    1844             :                 const oid off = b->hseqbase;                                         \
    1845             :                 TIMEOUT_LOOP(ci.ncand, qry_ctx) {                                       \
    1846             :                         oid o = canditer_next(&ci);                                         \
    1847             :                         const char *restrict v = BUNtvar(bi, o - off);  \
    1848             :                         assert(rcnt < BATcapacity(bn));                                      \
    1849             :                         if (TEST)                                                                               \
    1850             :                                 vals[rcnt++] = o;                                                       \
    1851             :                 }                                                                                                       \
    1852             :         } while (0)
    1853             : 
    1854             : static str
    1855          64 : STRselect(MalStkPtr stk, InstrPtr pci,
    1856             :                   int (*str_icmp)(const char *, const char *, int),
    1857             :                   int (*str_cmp)(const char *, const char *, int),
    1858             :                   const char *fname)
    1859             : {
    1860          64 :         str msg = MAL_SUCCEED;
    1861             : 
    1862          64 :         bat *r_id = getArgReference_bat(stk, pci, 0);
    1863          64 :         bat b_id = *getArgReference_bat(stk, pci, 1);
    1864          64 :         bat cb_id = *getArgReference_bat(stk, pci, 2);
    1865          64 :         const char *key = *getArgReference_str(stk, pci, 3);
    1866          64 :         bit icase = pci->argc != 5;
    1867          64 :         bit anti = pci->argc == 5 ? *getArgReference_bit(stk, pci, 4) :
    1868          26 :                 *getArgReference_bit(stk, pci, 5);
    1869             : 
    1870          64 :         BAT *b, *cb = NULL, *bn = NULL, *old_s = NULL;;
    1871          64 :         BUN rcnt = 0;
    1872          64 :         struct canditer ci;
    1873          64 :         bool with_strimps = false,
    1874          64 :                 with_strimps_anti = false;
    1875             : 
    1876          64 :         if (!(b = BATdescriptor(b_id)))
    1877           0 :                 throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1878             : 
    1879          64 :         if (!is_bat_nil(cb_id) && !(cb = BATdescriptor(cb_id))) {
    1880           0 :                 BBPreclaim(b);
    1881           0 :                 throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1882             :         }
    1883             : 
    1884          65 :         assert(ATOMstorage(b->ttype) == TYPE_str);
    1885             : 
    1886          65 :         if (BAThasstrimps(b)) {
    1887           8 :                 BAT *tmp_s;
    1888           8 :                 if (STRMPcreate(b, NULL) == GDK_SUCCEED && (tmp_s = STRMPfilter(b, cb, key, anti)) != NULL) {
    1889           8 :                         old_s = cb;
    1890           8 :                         cb = tmp_s;
    1891           8 :                         if (!anti)
    1892             :                                 with_strimps = true;
    1893             :                         else
    1894           0 :                                 with_strimps_anti = true;
    1895             :                 } else {
    1896             :                         /* strimps failed, continue without */
    1897           0 :                         GDKclrerr();
    1898             :                 }
    1899             :         }
    1900             : 
    1901         122 :         MT_thread_setalgorithm(with_strimps ?
    1902          57 :                                                    "string_select: strcmp function using strimps" :
    1903             :                                                    (with_strimps_anti ?
    1904             :                                                         "string_select: strcmp function using strimps anti"
    1905             :                                                         : "string_select: strcmp function with no accelerator"));
    1906             : 
    1907          65 :         canditer_init(&ci, b, cb);
    1908          65 :         if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
    1909           0 :                 BBPnreclaim(2, b, cb);
    1910           0 :                 throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1911             :         }
    1912             : 
    1913         128 :         if (!strNil(key)) {
    1914          64 :                 BATiter bi = bat_iterator(b);
    1915          65 :                 QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1916          65 :                 if (icase)
    1917          27 :                         str_cmp = str_icmp;
    1918          65 :                 oid *vals = Tloc(bn, 0);
    1919          65 :                 const int klen = str_strlen(key);
    1920          65 :                 if (ci.tpe == cand_dense) {
    1921          65 :                         if (with_strimps_anti)
    1922           0 :                                 scanloop(strNil(v) || str_cmp(v, key, klen) == 0, canditer_next_dense);
    1923          65 :                         else if (anti)
    1924           0 :                                 scanloop(!strNil(v) && str_cmp(v, key, klen) != 0, canditer_next_dense);
    1925             :                         else
    1926        3197 :                                 scanloop(!strNil(v) && str_cmp(v, key, klen) == 0, canditer_next_dense);
    1927             :                 } else {
    1928           0 :                         if (with_strimps_anti)
    1929           0 :                                 scanloop(strNil(v) || str_cmp(v, key, klen) == 0, canditer_next);
    1930           0 :                         else if (anti)
    1931           0 :                                 scanloop(!strNil(v) && str_cmp(v, key, klen) != 0, canditer_next);
    1932             :                         else
    1933           0 :                                 scanloop(!strNil(v) && str_cmp(v, key, klen) == 0, canditer_next);
    1934             :                 }
    1935          65 :                 bat_iterator_end(&bi);
    1936          65 :                 TIMEOUT_CHECK(qry_ctx, HANDLE_TIMEOUT(qry_ctx));
    1937             : 
    1938           0 :                 if (!msg) {
    1939          65 :                         BATsetcount(bn, rcnt);
    1940          65 :                         bn->tsorted = true;
    1941          65 :                         bn->trevsorted = bn->batCount <= 1;
    1942          65 :                         bn->tkey = true;
    1943          65 :                         bn->tnil = false;
    1944          65 :                         bn->tnonil = true;
    1945         130 :                         bn->tseqbase = rcnt == 0 ?
    1946          65 :                                 0 : rcnt == 1 ?
    1947          16 :                                 *(const oid *) Tloc(bn, 0) : rcnt == ci.ncand && ci.tpe == cand_dense ? ci.hseq : oid_nil;
    1948             : 
    1949          65 :                         if (with_strimps_anti) {
    1950           0 :                                 BAT *rev;
    1951           0 :                                 if (old_s) {
    1952           0 :                                         rev = BATdiffcand(old_s, bn);
    1953             : #ifndef NDEBUG
    1954           0 :                                         BAT *is = BATintersectcand(old_s, bn);
    1955           0 :                                         if (is) {
    1956           0 :                                                 assert(is->batCount == bn->batCount);
    1957           0 :                                                 BBPreclaim(is);
    1958             :                                         }
    1959           0 :                                         assert(rev->batCount == old_s->batCount - bn->batCount);
    1960             : #endif
    1961             :                                 } else
    1962           0 :                                         rev = BATnegcands(0, b->batCount, bn);
    1963             : 
    1964           0 :                                 BBPreclaim(bn);
    1965           0 :                                 bn = rev;
    1966           0 :                                 if (bn == NULL)
    1967           0 :                                         msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1968             :                         }
    1969             :                 }
    1970             :         }
    1971             : 
    1972          65 :         if (bn && !msg) {
    1973          65 :                 *r_id = bn->batCacheid;
    1974          65 :                 BBPkeepref(bn);
    1975             :         } else {
    1976           0 :                 BBPreclaim(bn);
    1977             :         }
    1978             : 
    1979          65 :         BBPnreclaim(3, b, cb, old_s);
    1980          65 :         return msg;
    1981             : }
    1982             : 
    1983             : /**
    1984             :  * @r_id: result oid
    1985             :  * @b_id: input bat oid
    1986             :  * @cb_id: input bat candidates oid
    1987             :  * @key: input string
    1988             :  * @icase: ignore case
    1989             :  * @anti: anti join
    1990             :  */
    1991             : static str
    1992          23 : STRstartswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1993             : {
    1994          23 :         (void) cntxt;
    1995          23 :         (void) mb;
    1996          23 :         return STRselect(stk, pci,
    1997             :                                          str_is_iprefix, str_is_prefix, "str.startswithselect");
    1998             : }
    1999             : 
    2000             : /**
    2001             :  * @r_id: result oid
    2002             :  * @b_id: input bat oid
    2003             :  * @cb_id: input bat candidates oid
    2004             :  * @key: input string
    2005             :  * @icase: ignore case
    2006             :  * @anti: anti join
    2007             :  */
    2008             : static str
    2009          15 : STRendswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    2010             : {
    2011          15 :         (void) cntxt;
    2012          15 :         (void) mb;
    2013          15 :         return STRselect(stk, pci,
    2014             :                                          str_is_isuffix, str_is_suffix, "str.endswithselect");
    2015             : }
    2016             : 
    2017             : /**
    2018             :  * @r_id: result oid
    2019             :  * @b_id: input bat oid
    2020             :  * @cb_id: input bat candidates oid
    2021             :  * @key: input string
    2022             :  * @icase: ignore case
    2023             :  * @anti: anti join
    2024             :  */
    2025             : static str
    2026          26 : STRcontainsselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    2027             : {
    2028          26 :         (void) cntxt;
    2029          26 :         (void) mb;
    2030          26 :         return STRselect(stk, pci,
    2031             :                                          str_icontains, str_contains, "str.containsselect");
    2032             : }
    2033             : 
    2034             : #define APPEND(b, o) (((oid *) b->theap->base)[b->batCount++] = (o))
    2035             : #define VALUE(s, x)  (s##vars + VarHeapVal(s##vals, (x), s##i.width))
    2036             : 
    2037             : #define set_empty_bat_props(B)                                  \
    2038             :         do {                                                                            \
    2039             :                 B->tnil = false;                                             \
    2040             :                 B->tnonil = true;                                            \
    2041             :                 B->tkey = true;                                                      \
    2042             :                 B->tsorted = true;                                           \
    2043             :                 B->trevsorted = true;                                        \
    2044             :                 B->tseqbase = 0;                                             \
    2045             :         } while (0)
    2046             : 
    2047             : #define CONTAINS_JOIN_LOOP(STR_CMP, STR_LEN)                                                    \
    2048             :         do {                                                                                                                            \
    2049             :                 canditer_init(&rci, r, cr);                                                                         \
    2050             :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                                       \
    2051             :                         BAT *filtered_sl = NULL;                                                                        \
    2052             :                         GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit, qry_ctx)); \
    2053             :                         ro = canditer_next(&rci);                                                                   \
    2054             :                         vr = VALUE(r, ro - rbase);                                                                      \
    2055             :                         matches = 0;                                                                                            \
    2056             :                         if (!strNil(vr)) {                                                                                      \
    2057             :                                 vr_len = STR_LEN;                                                                               \
    2058             :                                 if (with_strimps)                                                                               \
    2059             :                                         filtered_sl = STRMPfilter(l, cl, vr, anti);                     \
    2060             :                                 if (filtered_sl)                                                                                \
    2061             :                                         canditer_init(&lci, l, filtered_sl);                                \
    2062             :                                 else                                                                                                    \
    2063             :                                         canditer_init(&lci, l, cl);                                                 \
    2064             :                                 for (BUN lidx = 0; lidx < lci.ncand; lidx++) {                       \
    2065             :                                         lo = canditer_next(&lci);                                                   \
    2066             :                                         vl = VALUE(l, lo - lbase);                                                      \
    2067             :                                         if (strNil(vl))                                                                         \
    2068             :                                                 continue;                                                                               \
    2069             :                                         if (STR_CMP)                                                                            \
    2070             :                                                 continue;                                                                               \
    2071             :                                         if (BATcount(rl) == BATcapacity(rl)) {                          \
    2072             :                                                 newcap = BATgrows(rl);                                                  \
    2073             :                                                 BATsetcount(rl, BATcount(rl));                                  \
    2074             :                                                 if (rr)                                                                                 \
    2075             :                                                         BATsetcount(rr, BATcount(rr));                          \
    2076             :                                                 if (BATextend(rl, newcap) != GDK_SUCCEED ||             \
    2077             :                                                         (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
    2078             :                                                         msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);     \
    2079             :                                                         goto exit;                                                                      \
    2080             :                                                 }                                                                                               \
    2081             :                                                 assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
    2082             :                                         }                                                                                                       \
    2083             :                                         if (BATcount(rl) > 0) {                                                              \
    2084             :                                                 if (lastl + 1 != lo)                                                    \
    2085             :                                                         rl->tseqbase = oid_nil;                                              \
    2086             :                                                 if (matches == 0) {                                                             \
    2087             :                                                         if (rr)                                                                         \
    2088             :                                                                 rr->trevsorted = false;                                      \
    2089             :                                                         if (lastl > lo) {                                                    \
    2090             :                                                                 rl->tsorted = false;                                 \
    2091             :                                                                 rl->tkey = false;                                            \
    2092             :                                                         } else if (lastl < lo) {                                     \
    2093             :                                                                 rl->trevsorted = false;                                      \
    2094             :                                                         } else {                                                                        \
    2095             :                                                                 rl->tkey = false;                                            \
    2096             :                                                         }                                                                                       \
    2097             :                                                 }                                                                                               \
    2098             :                                         }                                                                                                       \
    2099             :                                         APPEND(rl, lo);                                                                         \
    2100             :                                         if (rr)                                                                                         \
    2101             :                                                 APPEND(rr, ro);                                                                 \
    2102             :                                         lastl = lo;                                                                                     \
    2103             :                                         matches++;                                                                                      \
    2104             :                                 }                                                                                                               \
    2105             :                                 BBPreclaim(filtered_sl);                                                                \
    2106             :                         }                                                                                                                       \
    2107             :                         if (rr) {                                                                                                       \
    2108             :                                 if (matches > 1) {                                                                           \
    2109             :                                         rr->tkey = false;                                                                    \
    2110             :                                         rr->tseqbase = oid_nil;                                                              \
    2111             :                                         rl->trevsorted = false;                                                              \
    2112             :                                 } else if (matches == 0) {                                                              \
    2113             :                                         rskipped = BATcount(rr) > 0;                                         \
    2114             :                                 } else if (rskipped) {                                                                  \
    2115             :                                         rr->tseqbase = oid_nil;                                                              \
    2116             :                                 }                                                                                                               \
    2117             :                         } else if (matches > 1) {                                                                    \
    2118             :                                 rl->trevsorted = false;                                                                      \
    2119             :                         }                                                                                                                       \
    2120             :                 }                                                                                                                               \
    2121             :         } while (0)
    2122             : 
    2123             : #define STR_JOIN_NESTED_LOOP(STR_CMP, STR_LEN, FNAME)                                   \
    2124             :         do {                                                                                                                            \
    2125             :                 canditer_init(&rci, r, cr);                                                                         \
    2126             :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                                       \
    2127             :                         GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit, qry_ctx)); \
    2128             :                         ro = canditer_next(&rci);                                                                   \
    2129             :                         vr = VALUE(r, ro - rbase);                                                                      \
    2130             :                         matches = 0;                                                                                            \
    2131             :                         if (!strNil(vr)) {                                                                                      \
    2132             :                                 vr_len = STR_LEN;                                                                               \
    2133             :                                 canditer_init(&lci, l, cl);                                                         \
    2134             :                                 for (BUN lidx = 0; lidx < lci.ncand; lidx++) {                       \
    2135             :                                         lo = canditer_next(&lci);                                                   \
    2136             :                                         vl = VALUE(l, lo - lbase);                                                      \
    2137             :                                         if (strNil(vl))                                                                         \
    2138             :                                                 continue;                                                                               \
    2139             :                                         if (!(STR_CMP))                                                                         \
    2140             :                                                 continue;                                                                               \
    2141             :                                         if (BATcount(rl) == BATcapacity(rl)) {                          \
    2142             :                                                 newcap = BATgrows(rl);                                                  \
    2143             :                                                 BATsetcount(rl, BATcount(rl));                                  \
    2144             :                                                 if (rr)                                                                                 \
    2145             :                                                         BATsetcount(rr, BATcount(rr));                          \
    2146             :                                                 if (BATextend(rl, newcap) != GDK_SUCCEED ||             \
    2147             :                                                         (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
    2148             :                                                         msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    2149             :                                                         goto exit;                                                                      \
    2150             :                                                 }                                                                                               \
    2151             :                                                 assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
    2152             :                                         }                                                                                                       \
    2153             :                                         if (BATcount(rl) > 0) {                                                              \
    2154             :                                                 if (last_lo + 1 != lo)                                                  \
    2155             :                                                         rl->tseqbase = oid_nil;                                              \
    2156             :                                                 if (matches == 0) {                                                             \
    2157             :                                                         if (rr)                                                                         \
    2158             :                                                                 rr->trevsorted = false;                                      \
    2159             :                                                         if (last_lo > lo) {                                                  \
    2160             :                                                                 rl->tsorted = false;                                 \
    2161             :                                                                 rl->tkey = false;                                            \
    2162             :                                                         } else if (last_lo < lo) {                                   \
    2163             :                                                                 rl->trevsorted = false;                                      \
    2164             :                                                         } else {                                                                        \
    2165             :                                                                 rl->tkey = false;                                            \
    2166             :                                                         }                                                                                       \
    2167             :                                                 }                                                                                               \
    2168             :                                         }                                                                                                       \
    2169             :                                         APPEND(rl, lo);                                                                         \
    2170             :                                         if (rr)                                                                                         \
    2171             :                                                 APPEND(rr, ro);                                                                 \
    2172             :                                         last_lo = lo;                                                                           \
    2173             :                                         matches++;                                                                                      \
    2174             :                                 }                                                                                                               \
    2175             :                         }                                                                                                                       \
    2176             :                         if (rr) {                                                                                                       \
    2177             :                                 if (matches > 1) {                                                                           \
    2178             :                                         rr->tkey = false;                                                                    \
    2179             :                                         rr->tseqbase = oid_nil;                                                              \
    2180             :                                         rl->trevsorted = false;                                                              \
    2181             :                                 } else if (matches == 0) {                                                              \
    2182             :                                         rskipped = BATcount(rr) > 0;                                         \
    2183             :                                 } else if (rskipped) {                                                                  \
    2184             :                                         rr->tseqbase = oid_nil;                                                              \
    2185             :                                 }                                                                                                               \
    2186             :                         } else if (matches > 1) {                                                                    \
    2187             :                                 rl->trevsorted = false;                                                                      \
    2188             :                         }                                                                                                                       \
    2189             :                 }                                                                                                                               \
    2190             :         } while (0)
    2191             : 
    2192             : #define STARTSWITH_SORTED_LOOP(STR_CMP, STR_LEN, FNAME)                                 \
    2193             :         do {                                                                                                                            \
    2194             :                 canditer_init(&rci, sorted_r, sorted_cr);                                           \
    2195             :                 canditer_init(&lci, sorted_l, sorted_cl);                                           \
    2196             :                 for (lx = 0; lx < lci.ncand; lx++) {                                                 \
    2197             :                         lo = canditer_next(&lci);                                                                   \
    2198             :                         vl = VALUE(l, lo - lbase);                                                                      \
    2199             :                         if (!strNil(vl))                                                                                        \
    2200             :                                 break;                                                                                                  \
    2201             :                 }                                                                                                                               \
    2202             :                 for (rx = 0; rx < rci.ncand; rx++) {                                                 \
    2203             :                         ro = canditer_next(&rci);                                                                   \
    2204             :                         vr = VALUE(r, ro - rbase);                                                                      \
    2205             :                         if (!strNil(vr)) {                                                                                      \
    2206             :                                 canditer_setidx(&rci, rx);                                                          \
    2207             :                                 break;                                                                                                  \
    2208             :                         }                                                                                                                       \
    2209             :                 }                                                                                                                               \
    2210             :                 for (; rx < rci.ncand; rx++) {                                                                       \
    2211             :                         GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit, qry_ctx)); \
    2212             :                         ro = canditer_next(&rci);                                                                   \
    2213             :                         vr = VALUE(r, ro - rbase);                                                                      \
    2214             :                         vr_len = STR_LEN;                                                                                       \
    2215             :                         matches = 0;                                                                                            \
    2216             :                         for (canditer_setidx(&lci, lx), n = lx; n < lci.ncand; n++) { \
    2217             :                                 lo = canditer_next_dense(&lci);                                                     \
    2218             :                                 vl = VALUE(l, lo - lbase);                                                              \
    2219             :                                 cmp = STR_CMP;                                                                                  \
    2220             :                                 if (cmp < 0) {                                                                                       \
    2221             :                                         lx++;                                                                                           \
    2222             :                                         continue;                                                                                       \
    2223             :                                 }                                                                                                               \
    2224             :                                 else if (cmp > 0)                                                                            \
    2225             :                                         break;                                                                                          \
    2226             :                                 if (BATcount(rl) == BATcapacity(rl)) {                                  \
    2227             :                                         newcap = BATgrows(rl);                                                          \
    2228             :                                         BATsetcount(rl, BATcount(rl));                                          \
    2229             :                                         if (rr)                                                                                         \
    2230             :                                                 BATsetcount(rr, BATcount(rr));                                  \
    2231             :                                         if (BATextend(rl, newcap) != GDK_SUCCEED ||                     \
    2232             :                                                 (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
    2233             :                                                 msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    2234             :                                                 goto exit;                                                                              \
    2235             :                                         }                                                                                                       \
    2236             :                                         assert(!rr || BATcapacity(rl) == BATcapacity(rr));      \
    2237             :                                 }                                                                                                               \
    2238             :                                 if (BATcount(rl) > 0) {                                                                      \
    2239             :                                         if (last_lo + 1 != lo)                                                          \
    2240             :                                                 rl->tseqbase = oid_nil;                                                      \
    2241             :                                         if (matches == 0) {                                                                     \
    2242             :                                                 if (rr)                                                                                 \
    2243             :                                                         rr->trevsorted = false;                                              \
    2244             :                                                 if (last_lo > lo) {                                                          \
    2245             :                                                         rl->tsorted = false;                                         \
    2246             :                                                         rl->tkey = false;                                                    \
    2247             :                                                 } else if (last_lo < lo) {                                           \
    2248             :                                                         rl->trevsorted = false;                                              \
    2249             :                                                 } else {                                                                                \
    2250             :                                                         rl->tkey = false;                                                    \
    2251             :                                                 }                                                                                               \
    2252             :                                         }                                                                                                       \
    2253             :                                 }                                                                                                               \
    2254             :                                 APPEND(rl, lo);                                                                                 \
    2255             :                                 if (rr)                                                                                                 \
    2256             :                                         APPEND(rr, ro);                                                                         \
    2257             :                                 last_lo = lo;                                                                                   \
    2258             :                                 matches++;                                                                                              \
    2259             :                         }                                                                                                                       \
    2260             :                         if (rr) {                                                                                                       \
    2261             :                                 if (matches > 1) {                                                                           \
    2262             :                                         rr->tkey = false;                                                                    \
    2263             :                                         rr->tseqbase = oid_nil;                                                              \
    2264             :                                         rl->trevsorted = false;                                                              \
    2265             :                                 } else if (matches == 0) {                                                              \
    2266             :                                         rskipped = BATcount(rr) > 0;                                         \
    2267             :                                 } else if (rskipped) {                                                                  \
    2268             :                                         rr->tseqbase = oid_nil;                                                              \
    2269             :                                 }                                                                                                               \
    2270             :                         } else if (matches > 1) {                                                                    \
    2271             :                                 rl->trevsorted = false;                                                                      \
    2272             :                         }                                                                                                                       \
    2273             :                 }                                                                                                                               \
    2274             :         } while (0)
    2275             : 
    2276             : static void
    2277        1006 : do_strrev(char *dst, const char *src, size_t len)
    2278             : {
    2279        1006 :         dst[len] = 0;
    2280        1006 :         if (strNil(src)) {
    2281           8 :                 assert(len == strlen(str_nil));
    2282           8 :                 strcpy(dst, str_nil);
    2283           8 :                 return;
    2284             :         }
    2285        7493 :         while (*src) {
    2286        6495 :                 if ((*src & 0xF8) == 0xF0) {
    2287           0 :                         assert(len >= 4);
    2288           0 :                         dst[len - 4] = *src++;
    2289           0 :                         assert((*src & 0xC0) == 0x80);
    2290           0 :                         dst[len - 3] = *src++;
    2291           0 :                         assert((*src & 0xC0) == 0x80);
    2292           0 :                         dst[len - 2] = *src++;
    2293           0 :                         assert((*src & 0xC0) == 0x80);
    2294           0 :                         dst[len - 1] = *src++;
    2295           0 :                         len -= 4;
    2296        6495 :                 } else if ((*src & 0xF0) == 0xE0) {
    2297           0 :                         assert(len >= 3);
    2298           0 :                         dst[len - 3] = *src++;
    2299           0 :                         assert((*src & 0xC0) == 0x80);
    2300           0 :                         dst[len - 2] = *src++;
    2301           0 :                         assert((*src & 0xC0) == 0x80);
    2302           0 :                         dst[len - 1] = *src++;
    2303           0 :                         len -= 3;
    2304        6495 :                 } else if ((*src & 0xE0) == 0xC0) {
    2305          72 :                         assert(len >= 2);
    2306          72 :                         dst[len - 2] = *src++;
    2307          72 :                         assert((*src & 0xC0) == 0x80);
    2308          72 :                         dst[len - 1] = *src++;
    2309          72 :                         len -= 2;
    2310             :                 } else {
    2311        6423 :                         assert(len >= 1);
    2312        6423 :                         assert((*src & 0x80) == 0);
    2313        6423 :                         dst[--len] = *src++;
    2314             :                 }
    2315             :         }
    2316         998 :         assert(len == 0);
    2317             : }
    2318             : 
    2319             : static BAT *
    2320          52 : batstr_strrev(BAT *b)
    2321             : {
    2322          52 :         BAT *bn = NULL;
    2323          52 :         BATiter bi;
    2324          52 :         BUN p, q;
    2325          52 :         const char *src;
    2326          52 :         size_t len;
    2327          52 :         char *dst;
    2328          52 :         size_t dstlen;
    2329             : 
    2330          52 :         dstlen = 1024;
    2331          52 :         dst = GDKmalloc(dstlen);
    2332          54 :         if (dst == NULL)
    2333             :                 return NULL;
    2334             : 
    2335          54 :         assert(b->ttype == TYPE_str);
    2336             : 
    2337          54 :         bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
    2338          56 :         if (bn == NULL) {
    2339           0 :                 GDKfree(dst);
    2340           0 :                 return NULL;
    2341             :         }
    2342             : 
    2343          56 :         bi = bat_iterator(b);
    2344        1054 :         BATloop(b, p, q) {
    2345         998 :                 src = (const char *) BUNtail(bi, p);
    2346        1002 :                 len = strlen(src);
    2347        1002 :                 if (len >= dstlen) {
    2348           0 :                         char *ndst;
    2349           0 :                         dstlen = len + 1024;
    2350           0 :                         ndst = GDKrealloc(dst, dstlen);
    2351           0 :                         if (ndst == NULL) {
    2352           0 :                                 bat_iterator_end(&bi);
    2353           0 :                                 BBPreclaim(bn);
    2354           0 :                                 GDKfree(dst);
    2355           0 :                                 return NULL;
    2356             :                         }
    2357             :                         dst = ndst;
    2358             :                 }
    2359        1002 :                 do_strrev(dst, src, len);
    2360         996 :                 if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
    2361           0 :                         bat_iterator_end(&bi);
    2362           0 :                         BBPreclaim(bn);
    2363           0 :                         GDKfree(dst);
    2364           0 :                         return NULL;
    2365             :                 }
    2366             :         }
    2367             : 
    2368          56 :         bat_iterator_end(&bi);
    2369          56 :         GDKfree(dst);
    2370          56 :         return bn;
    2371             : }
    2372             : 
    2373             : static BAT *
    2374          42 : batstr_strlower(BAT *b)
    2375             : {
    2376          42 :         BAT *bn = NULL;
    2377          42 :         BATiter bi;
    2378          42 :         BUN p, q;
    2379             : 
    2380          42 :         assert(b->ttype == TYPE_str);
    2381             : 
    2382          42 :         bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
    2383          44 :         if (bn == NULL)
    2384             :                 return NULL;
    2385             : 
    2386          44 :         bi = bat_iterator(b);
    2387         166 :         BATloop(b, p, q) {
    2388         124 :                 const char *vb = BUNtail(bi, p);
    2389         124 :                 char *vb_low = NULL;
    2390         124 :                 if (STRlower(&vb_low, &vb)) {
    2391           0 :                         bat_iterator_end(&bi);
    2392           0 :                         BBPreclaim(bn);
    2393           0 :                         return NULL;
    2394             :                 }
    2395         126 :                 if (BUNappend(bn, vb_low, false) != GDK_SUCCEED) {
    2396           0 :                         GDKfree(vb_low);
    2397           0 :                         bat_iterator_end(&bi);
    2398           0 :                         BBPreclaim(bn);
    2399           0 :                         return NULL;
    2400             :                 }
    2401         125 :                 GDKfree(vb_low);
    2402             :         }
    2403          42 :         bat_iterator_end(&bi);
    2404          42 :         return bn;
    2405             : }
    2406             : 
    2407             : static str
    2408          28 : str_join_nested(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr,
    2409             :                                 bit anti, int (*str_cmp)(const char *, const char *, int),
    2410             :                                 const char *fname)
    2411             : {
    2412          28 :         str msg = MAL_SUCCEED;
    2413             : 
    2414          28 :         size_t counter = 0;
    2415          28 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2416             : 
    2417          28 :         TRC_DEBUG(ALGO,
    2418             :                           "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
    2419             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    2420             :                           "sr=%s#" BUNFMT "%s%s)\n",
    2421             :                           fname, "nested loop",
    2422             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    2423             :                           l->tsorted ? "-sorted" : "",
    2424             :                           l->trevsorted ? "-revsorted" : "",
    2425             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    2426             :                           r->tsorted ? "-sorted" : "",
    2427             :                           r->trevsorted ? "-revsorted" : "",
    2428             :                           cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
    2429             :                           cl && cl->tsorted ? "-sorted" : "",
    2430             :                           cl && cl->trevsorted ? "-revsorted" : "",
    2431             :                           cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
    2432             :                           cr && cr->tsorted ? "-sorted" : "",
    2433             :                           cr && cr->trevsorted ? "-revsorted" : "");
    2434             : 
    2435          84 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2436          28 :         assert(ATOMtype(l->ttype) == TYPE_str);
    2437             : 
    2438          28 :         BATiter li = bat_iterator(l);
    2439          28 :         BATiter ri = bat_iterator(r);
    2440          28 :         assert(ri.vh && r->ttype);
    2441             : 
    2442          28 :         struct canditer lci, rci;
    2443          28 :         oid lbase = l->hseqbase,
    2444          28 :                 rbase = r->hseqbase,
    2445          28 :                 lo, ro, last_lo = 0;
    2446          28 :         const char *lvals = (const char *) li.base,
    2447          28 :                 *rvals = (const char *) ri.base,
    2448          28 :                 *lvars = li.vh->base,
    2449          28 :                 *rvars = ri.vh->base,
    2450             :                 *vl, *vr;
    2451          28 :         BUN matches, newcap;
    2452          28 :         int rskipped = 0, vr_len = 0;
    2453             : 
    2454          28 :         if (anti)
    2455           0 :                 STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) != 0), str_strlen(vr), fname);
    2456             :         else
    2457         640 :                 STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) == 0), str_strlen(vr), fname);
    2458             : 
    2459          28 :         assert(!rr || BATcount(rl) == BATcount(rr));
    2460          28 :         BATsetcount(rl, BATcount(rl));
    2461          28 :         if (rr)
    2462          28 :                 BATsetcount(rr, BATcount(rr));
    2463             : 
    2464          28 :         if (BATcount(rl) > 0) {
    2465          16 :                 if (BATtdense(rl))
    2466          10 :                         rl->tseqbase = ((oid *) rl->theap->base)[0];
    2467          16 :                 if (rr && BATtdense(rr))
    2468          13 :                         rr->tseqbase = ((oid *) rr->theap->base)[0];
    2469             :         } else {
    2470          12 :                 rl->tseqbase = 0;
    2471          12 :                 if (rr)
    2472          12 :                         rr->tseqbase = 0;
    2473             :         }
    2474             : 
    2475          28 :         TRC_DEBUG(ALGO,
    2476             :                           "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
    2477             :                           fname,
    2478             :                           BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
    2479             :                           rl->tsorted ? "-sorted" : "",
    2480             :                           rl->trevsorted ? "-revsorted" : "",
    2481             :                           rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
    2482             :                           rr && rr->tsorted ? "-sorted" : "",
    2483             :                           rr && rr->trevsorted ? "-revsorted" : "");
    2484             : 
    2485          28 : exit:
    2486          28 :         bat_iterator_end(&li);
    2487          27 :         bat_iterator_end(&ri);
    2488          27 :         return msg;
    2489             : }
    2490             : 
    2491             : static str
    2492          60 : contains_join(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr, bit anti,
    2493             :                           int (*str_cmp)(const char *, const char *, int),
    2494             :                           const char *fname)
    2495             : {
    2496          60 :         str msg = MAL_SUCCEED;
    2497             : 
    2498          60 :         size_t counter = 0;
    2499          60 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2500             : 
    2501          60 :         TRC_DEBUG(ALGO,
    2502             :                           "(%s, l=%s#" BUNFMT "[%s]%s%s,"
    2503             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    2504             :                           "sr=%s#" BUNFMT "%s%s)\n",
    2505             :                           fname,
    2506             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    2507             :                           l->tsorted ? "-sorted" : "",
    2508             :                           l->trevsorted ? "-revsorted" : "",
    2509             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    2510             :                           r->tsorted ? "-sorted" : "",
    2511             :                           r->trevsorted ? "-revsorted" : "",
    2512             :                           cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
    2513             :                           cl && cl->tsorted ? "-sorted" : "",
    2514             :                           cl && cl->trevsorted ? "-revsorted" : "",
    2515             :                           cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
    2516             :                           cr && cr->tsorted ? "-sorted" : "",
    2517             :                           cr && cr->trevsorted ? "-revsorted" : "");
    2518             : 
    2519          60 :         bool with_strimps = false;
    2520             : 
    2521          60 :         if (BAThasstrimps(l)) {
    2522          16 :                 with_strimps = true;
    2523          16 :                 if (STRMPcreate(l, NULL) != GDK_SUCCEED) {
    2524           0 :                         GDKclrerr();
    2525           0 :                         with_strimps = false;
    2526             :                 }
    2527             :         }
    2528             : 
    2529         198 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2530          66 :         assert(ATOMtype(l->ttype) == TYPE_str);
    2531             : 
    2532          66 :         BATiter li = bat_iterator(l);
    2533          66 :         BATiter ri = bat_iterator(r);
    2534          66 :         assert(ri.vh && r->ttype);
    2535             : 
    2536          66 :         struct canditer lci, rci;
    2537          66 :         oid lbase = l->hseqbase,
    2538          66 :                 rbase = r->hseqbase,
    2539          66 :                 lo, ro, lastl = 0;
    2540          66 :         const char *lvals = (const char *) li.base,
    2541          66 :                 *rvals = (const char *) ri.base,
    2542          66 :                 *lvars = li.vh->base,
    2543          66 :                 *rvars = ri.vh->base,
    2544             :                 *vl, *vr;
    2545          66 :         int rskipped = 0, vr_len = 0;
    2546          66 :         BUN matches, newcap;
    2547             : 
    2548          66 :         if (anti)
    2549           0 :                 CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) == 0, str_strlen(vr));
    2550             :         else
    2551       21752 :                 CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr));
    2552             : 
    2553          66 :         assert(!rr || BATcount(rl) == BATcount(rr));
    2554          66 :         BATsetcount(rl, BATcount(rl));
    2555          66 :         if (rr)
    2556          66 :                 BATsetcount(rr, BATcount(rr));
    2557          66 :         if (BATcount(rl) > 0) {
    2558          53 :                 if (BATtdense(rl))
    2559          18 :                         rl->tseqbase = ((oid *) rl->theap->base)[0];
    2560          53 :                 if (rr && BATtdense(rr))
    2561          18 :                         rr->tseqbase = ((oid *) rr->theap->base)[0];
    2562             :         } else {
    2563          13 :                 rl->tseqbase = 0;
    2564          13 :                 if (rr)
    2565          13 :                         rr->tseqbase = 0;
    2566             :         }
    2567             : 
    2568          66 :         TRC_DEBUG(ALGO,
    2569             :                           "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
    2570             :                           fname,
    2571             :                           BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
    2572             :                           rl->tsorted ? "-sorted" : "",
    2573             :                           rl->trevsorted ? "-revsorted" : "",
    2574             :                           rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
    2575             :                           rr && rr->tsorted ? "-sorted" : "",
    2576             :                           rr && rr->trevsorted ? "-revsorted" : "");
    2577          66 : exit:
    2578          66 :         bat_iterator_end(&li);
    2579          66 :         bat_iterator_end(&ri);
    2580          66 :         return msg;
    2581             : }
    2582             : 
    2583             : static str
    2584          55 : startswith_join(BAT **rl_ptr, BAT **rr_ptr, BAT *l, BAT *r, BAT *cl, BAT *cr,
    2585             :                                 bit anti, int (*str_cmp)(const char *, const char *, int),
    2586             :                                 const char *fname)
    2587             : {
    2588          55 :         str msg = MAL_SUCCEED;
    2589          55 :         gdk_return rc;
    2590             : 
    2591          55 :         size_t counter = 0;
    2592          55 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2593             : 
    2594          55 :         assert(*rl_ptr && *rr_ptr);
    2595             : 
    2596          55 :         BAT *sorted_l = NULL, *sorted_r = NULL,
    2597          55 :                 *sorted_cl = NULL, *sorted_cr = NULL,
    2598          55 :                 *ord_sorted_l = NULL, *ord_sorted_r = NULL,
    2599          55 :                 *proj_rl = NULL, *proj_rr = NULL,
    2600          55 :                 *rl = *rl_ptr, *rr = *rr_ptr;
    2601             : 
    2602          55 :         TRC_DEBUG(ALGO,
    2603             :                           "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
    2604             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    2605             :                           "sr=%s#" BUNFMT "%s%s)\n",
    2606             :                           fname, "sorted inputs",
    2607             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    2608             :                           l->tsorted ? "-sorted" : "",
    2609             :                           l->trevsorted ? "-revsorted" : "",
    2610             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    2611             :                           r->tsorted ? "-sorted" : "",
    2612             :                           r->trevsorted ? "-revsorted" : "",
    2613             :                           cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
    2614             :                           cl && cl->tsorted ? "-sorted" : "",
    2615             :                           cl && cl->trevsorted ? "-revsorted" : "",
    2616             :                           cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
    2617             :                           cr && cr->tsorted ? "-sorted" : "",
    2618             :                           cr && cr->trevsorted ? "-revsorted" : "");
    2619             : 
    2620          55 :         bool l_sorted = BATordered(l);
    2621          56 :         bool r_sorted = BATordered(r);
    2622             : 
    2623          56 :         if (l_sorted == FALSE) {
    2624          38 :                 rc = BATsort(&sorted_l, &ord_sorted_l, NULL,
    2625             :                                          l, NULL, NULL, false, false, false);
    2626          38 :                 if (rc != GDK_SUCCEED) {
    2627           0 :                         throw(MAL, fname, "Sorting left input failed");
    2628             :                 } else {
    2629          38 :                         if (cl) {
    2630           0 :                                 rc = BATsort(&sorted_cl, NULL, NULL,
    2631             :                                                          cl, ord_sorted_l, NULL, false, false, false);
    2632           0 :                                 if (rc != GDK_SUCCEED) {
    2633           0 :                                         BBPnreclaim(2, sorted_l, ord_sorted_l);
    2634           0 :                                         throw(MAL, fname, "Sorting left candidates input failed");
    2635             :                                 }
    2636             :                         }
    2637             :                 }
    2638             :         } else {
    2639          18 :                 sorted_l = l;
    2640          18 :                 sorted_cl = cl;
    2641             :         }
    2642             : 
    2643          56 :         if (r_sorted == FALSE) {
    2644          36 :                 rc = BATsort(&sorted_r, &ord_sorted_r, NULL,
    2645             :                                          r, NULL, NULL, false, false, false);
    2646          35 :                 if (rc != GDK_SUCCEED) {
    2647           0 :                         BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
    2648           0 :                         throw(MAL, fname, "Sorting right input failed");
    2649             :                 } else {
    2650          35 :                         if (cr) {
    2651           0 :                                 rc = BATsort(&sorted_cr, NULL, NULL,
    2652             :                                                          cr, ord_sorted_r, NULL, false, false, false);
    2653           0 :                                 if (rc != GDK_SUCCEED) {
    2654           0 :                                         BBPnreclaim(5, sorted_l, ord_sorted_l, sorted_cl, sorted_r, ord_sorted_r);
    2655           0 :                                         throw(MAL, fname, "Sorting right candidates input failed");
    2656             :                                 }
    2657             :                         }
    2658             :                 }
    2659             :         } else {
    2660          20 :                 sorted_r = r;
    2661          20 :                 sorted_cr = cr;
    2662             :         }
    2663             : 
    2664          55 :         assert(BATordered(sorted_l) && BATordered(sorted_r));
    2665             : 
    2666          56 :         BATiter li = bat_iterator(sorted_l);
    2667          56 :         BATiter ri = bat_iterator(sorted_r);
    2668          56 :         assert(ri.vh && r->ttype);
    2669             : 
    2670          56 :         struct canditer lci, rci;
    2671          56 :         oid lbase = sorted_l->hseqbase,
    2672          56 :                 rbase = sorted_r->hseqbase,
    2673          56 :                 lo, ro, last_lo = 0;
    2674          56 :         const char *lvals = (const char *) li.base,
    2675          56 :                 *rvals = (const char *) ri.base,
    2676          56 :                 *lvars = li.vh->base,
    2677          56 :                 *rvars = ri.vh->base,
    2678             :                 *vl, *vr;
    2679          56 :         BUN matches, newcap, n = 0, rx = 0, lx = 0;
    2680          56 :         int rskipped = 0, vr_len = 0, cmp = 0;
    2681             : 
    2682          56 :         if (anti)
    2683           0 :                 STR_JOIN_NESTED_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr), fname);
    2684             :         else
    2685        2104 :                 STARTSWITH_SORTED_LOOP(str_cmp(vl, vr, vr_len), str_strlen(vr), fname);
    2686             : 
    2687          56 :         assert(!rr || BATcount(rl) == BATcount(rr));
    2688          56 :         BATsetcount(rl, BATcount(rl));
    2689          56 :         if (rr)
    2690          56 :                 BATsetcount(rr, BATcount(rr));
    2691             : 
    2692          56 :         if (BATcount(rl) > 0) {
    2693          32 :                 if (BATtdense(rl))
    2694          20 :                         rl->tseqbase = ((oid *) rl->theap->base)[0];
    2695          32 :                 if (rr && BATtdense(rr))
    2696          18 :                         rr->tseqbase = ((oid *) rr->theap->base)[0];
    2697             :         } else {
    2698          24 :                 rl->tseqbase = 0;
    2699          24 :                 if (rr)
    2700          24 :                         rr->tseqbase = 0;
    2701             :         }
    2702             : 
    2703          56 :         if (l_sorted == FALSE) {
    2704          37 :                 proj_rl = BATproject(rl, ord_sorted_l);
    2705          35 :                 if (!proj_rl) {
    2706           0 :                         msg = createException(MAL, fname, "Project left pre-sort order failed");
    2707           0 :                         goto exit;
    2708             :                 } else {
    2709          35 :                         BBPreclaim(rl);
    2710          37 :                         *rl_ptr = proj_rl;
    2711             :                 }
    2712             :         }
    2713             : 
    2714          56 :         if (rr && r_sorted == FALSE) {
    2715          36 :                 proj_rr = BATproject(rr, ord_sorted_r);
    2716          34 :                 if (!proj_rr) {
    2717           0 :                         BBPreclaim(proj_rl);
    2718           0 :                         msg = createException(MAL, fname, "Project right pre-sort order failed");
    2719           0 :                         goto exit;
    2720             :                 } else {
    2721          34 :                         BBPreclaim(rr);
    2722          36 :                         *rr_ptr = proj_rr;
    2723             :                 }
    2724             :         }
    2725             : 
    2726          56 :         TRC_DEBUG(ALGO,
    2727             :                           "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
    2728             :                           fname,
    2729             :                           BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
    2730             :                           rl->tsorted ? "-sorted" : "",
    2731             :                           rl->trevsorted ? "-revsorted" : "",
    2732             :                           rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
    2733             :                           rr && rr->tsorted ? "-sorted" : "",
    2734             :                           rr && rr->trevsorted ? "-revsorted" : "");
    2735             : 
    2736          56 : exit:
    2737          56 :         if (l_sorted == FALSE)
    2738          38 :                 BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
    2739             : 
    2740          56 :         if (r_sorted == FALSE)
    2741          35 :                 BBPnreclaim(3, sorted_r, ord_sorted_r, sorted_cr);
    2742             : 
    2743          54 :         bat_iterator_end(&li);
    2744          55 :         bat_iterator_end(&ri);
    2745          55 :         return msg;
    2746             : }
    2747             : 
    2748             : static str
    2749         143 : STRjoin(bat *rl_id, bat *rr_id, const bat l_id, const bat r_id,
    2750             :                 const bat cl_id, const bat cr_id, const bit anti, bool icase,
    2751             :                 int (*str_cmp)(const char *, const char *, int), const char *fname)
    2752             : {
    2753         143 :         str msg = MAL_SUCCEED;
    2754             : 
    2755         143 :         BAT *rl = NULL, *rr = NULL, *l = NULL, *r = NULL, *cl = NULL, *cr = NULL;
    2756             : 
    2757         143 :         if (!(l = BATdescriptor(l_id)) || !(r = BATdescriptor(r_id))) {
    2758           0 :                 BBPnreclaim(2, l, r);
    2759           0 :                 throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    2760             :         }
    2761             : 
    2762         150 :         if ((cl_id && !is_bat_nil(cl_id) && (cl = BATdescriptor(cl_id)) == NULL) ||
    2763         150 :                 (cr_id && !is_bat_nil(cr_id) && (cr = BATdescriptor(cr_id)) == NULL)) {
    2764           0 :                 BBPnreclaim(4, l, r, cl, cr);
    2765           0 :                 throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    2766             :         }
    2767             : 
    2768         150 :         rl = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
    2769         140 :         if (rr_id)
    2770         148 :                 rr = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
    2771             : 
    2772         142 :         if (!rl || (rr_id && !rr)) {
    2773           0 :                 BBPnreclaim(6, l, r, cl, cr, rl, rr);
    2774           0 :                 throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    2775             :         }
    2776             : 
    2777         142 :         set_empty_bat_props(rl);
    2778         142 :         if (rr_id)
    2779         142 :                 set_empty_bat_props(rr);
    2780             : 
    2781         436 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2782         142 :         assert(ATOMtype(l->ttype) == TYPE_str);
    2783             : 
    2784         142 :         BAT *nl = l, *nr = r;
    2785             : 
    2786         142 :         if (strcmp(fname, "str.containsjoin") == 0) {
    2787          63 :                 msg = contains_join(rl, rr, l, r, cl, cr, anti, str_cmp, fname);
    2788          66 :                 if (msg) {
    2789           0 :                         BBPnreclaim(6, rl, rr, l, r, cl, cr);
    2790           0 :                         return msg;
    2791             :                 }
    2792             :         } else {
    2793          79 :                 struct canditer lci, rci;
    2794          79 :                 canditer_init(&lci, l, cl);
    2795          84 :                 canditer_init(&rci, r, cr);
    2796          83 :                 BUN lcnt = lci.ncand, rcnt = rci.ncand;
    2797          83 :                 BUN nl_cost = lci.ncand * rci.ncand,
    2798          83 :                         sorted_cost =
    2799          83 :                         (BUN) floor(0.8 * (lcnt*log2((double)lcnt)
    2800          83 :                                                            + rcnt*log2((double)rcnt)));
    2801             : 
    2802          83 :                 if (nl_cost < sorted_cost) {
    2803          28 :                         msg = str_join_nested(rl, rr, nl, nr, cl, cr, anti, str_cmp, fname);
    2804             :                 } else {
    2805          55 :                         BAT *l_low = NULL, *r_low = NULL, *l_rev = NULL, *r_rev = NULL;
    2806          55 :                         if (icase) {
    2807          22 :                                 l_low = batstr_strlower(nl);
    2808          22 :                                 if (l_low == NULL) {
    2809           0 :                                         BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
    2810           0 :                                         throw(MAL, fname, "Failed lowering strings of left input");
    2811             :                                 }
    2812          22 :                                 r_low = batstr_strlower(nr);
    2813          22 :                                 if (r_low == NULL) {
    2814           0 :                                         BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_low);
    2815           0 :                                         throw(MAL, fname, "Failed lowering strings of right input");
    2816             :                                 }
    2817          22 :                                 BBPnreclaim(2, nl, nr);
    2818          22 :                                 nl = l_low;
    2819          22 :                                 nr = r_low;
    2820             :                         }
    2821          55 :                         if (strcmp(fname, "str.endswithjoin") == 0) {
    2822          27 :                                 l_rev = batstr_strrev(nl);
    2823          28 :                                 if (l_rev == NULL) {
    2824           0 :                                         BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
    2825           0 :                                         throw(MAL, fname, "Failed reversing strings of left input");
    2826             :                                 }
    2827          28 :                                 r_rev = batstr_strrev(nr);
    2828          28 :                                 if (r_rev == NULL) {
    2829           0 :                                         BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_rev);
    2830           0 :                                         throw(MAL, fname, "Failed reversing strings of right input");
    2831             :                                 }
    2832          28 :                                 BBPnreclaim(2, nl, nr);
    2833          28 :                                 nl = l_rev;
    2834          28 :                                 nr = r_rev;
    2835             :                         }
    2836          55 :                         msg = startswith_join(&rl, &rr, nl, nr, cl, cr, anti, str_is_prefix, fname);
    2837             :                 }
    2838             :         }
    2839             : 
    2840         145 :         if (!msg) {
    2841         145 :                 *rl_id = rl->batCacheid;
    2842         145 :                 BBPkeepref(rl);
    2843         148 :                 if (rr_id) {
    2844         148 :                         *rr_id = rr->batCacheid;
    2845         148 :                         BBPkeepref(rr);
    2846             :                 }
    2847             :         } else {
    2848           0 :                 BBPnreclaim(2, rl, rr);
    2849             :         }
    2850             : 
    2851         148 :         BBPnreclaim(4, nl, nr, cl, cr);
    2852         148 :         return msg;
    2853             : }
    2854             : 
    2855             : #define STRJOIN_MAPARGS(STK, PCI, RL_ID, RR_ID, L_ID, R_ID, CL_ID, CR_ID, IC_ID, ANTI) \
    2856             :         do {                                                                                                                            \
    2857             :                 RL_ID = getArgReference(STK, PCI, 0);                                                   \
    2858             :                 RR_ID = PCI->retc == 1 ? 0 : getArgReference(STK, PCI, 1);           \
    2859             :                 int i = PCI->retc == 1 ? 1 : 2;                                                                      \
    2860             :                 L_ID = getArgReference(STK, PCI, i++);                                                  \
    2861             :                 R_ID = getArgReference(STK, PCI, i++);                                                  \
    2862             :                 IC_ID = PCI->argc - PCI->retc == 7 ?                                                      \
    2863             :                         NULL : getArgReference(stk, pci, i++);                                          \
    2864             :                 CL_ID = getArgReference(STK, PCI, i++);                                                 \
    2865             :                 CR_ID = getArgReference(STK, PCI, i++);                                                 \
    2866             :                 ANTI = PCI->argc - PCI->retc == 7 ?                                                               \
    2867             :                         getArgReference(STK, PCI, 8) : getArgReference(STK, PCI, 9); \
    2868             :         } while (0)
    2869             : 
    2870             : static inline str
    2871          81 : ignorecase(const bat *ic_id, bool *icase, str fname)
    2872             : {
    2873          81 :         BAT *c = NULL;
    2874             : 
    2875          81 :         if ((c = BATdescriptor(*ic_id)) == NULL)
    2876           0 :                 throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    2877             : 
    2878          94 :         BUN cnt = BATcount(c);
    2879          94 :         if (cnt < 1) {
    2880           1 :                 BBPreclaim(c);
    2881           1 :                 throw(MAL, fname, SQLSTATE(42000) "Missing ignore case value\n");
    2882             :         }
    2883             : 
    2884          93 :         BATiter bi = bat_iterator(c);
    2885          94 :         *icase = *(bit *) BUNtloc(bi, 0);
    2886          94 :         for(BUN i = 1; i<cnt; i++) {
    2887           0 :                 if (*icase != *(bit*)BUNtloc(bi, i)) {
    2888           0 :                         bat_iterator_end(&bi);
    2889           0 :                         BBPreclaim(c);
    2890           0 :                         throw(MAL, fname, SQLSTATE(42000) "Multiple ignore case values passed, only one expected\n");
    2891             :                 }
    2892             :         }
    2893          94 :         bat_iterator_end(&bi);
    2894          94 :         BBPreclaim(c);
    2895          94 :         return MAL_SUCCEED;
    2896             : }
    2897             : 
    2898             : /**
    2899             :  * @rl_id: result left oid
    2900             :  * @rr_id: result right oid
    2901             :  * @l_id: left oid
    2902             :  * @r_id: right oid
    2903             :  * @cl_id: candidates left oid
    2904             :  * @cr_id: candidates right oid
    2905             :  * @ic_id: ignore case oid
    2906             :  * @anti: anti join oid
    2907             :  */
    2908             : static str
    2909          35 : STRstartswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    2910             : {
    2911          35 :         (void)cntxt;
    2912          35 :         (void)mb;
    2913             : 
    2914          35 :         str msg = MAL_SUCCEED;
    2915          35 :         bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
    2916          35 :                 *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
    2917          35 :         bit *anti = NULL;
    2918          35 :         bool icase = false;
    2919             : 
    2920          74 :         STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
    2921             : 
    2922          40 :         if (pci->argc - pci->retc == 8)
    2923          34 :                 msg = ignorecase(ic_id, &icase, "str.startswithjoin");
    2924             : 
    2925          75 :         return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
    2926             :                                                            cl_id ? *cl_id : 0,
    2927             :                                                            cr_id ? *cr_id : 0,
    2928          63 :                                                            *anti, icase, icase ? str_is_iprefix : str_is_prefix,
    2929             :                                                            "str.startswithjoin");
    2930             : }
    2931             : 
    2932             : /**
    2933             :  * @rl_id: result left oid
    2934             :  * @rr_id: result right oid
    2935             :  * @l_id: left oid
    2936             :  * @r_id: right oid
    2937             :  * @cl_id: candidates left oid
    2938             :  * @cr_id: candidates right oid
    2939             :  * @ic_id: ignore case oid
    2940             :  * @anti: anti join oid
    2941             :  */
    2942             : static str
    2943          39 : STRendswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    2944             : {
    2945          39 :         (void) cntxt;
    2946          39 :         (void) mb;
    2947             : 
    2948          39 :         str msg = MAL_SUCCEED;
    2949          39 :         bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
    2950          39 :                 *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
    2951          39 :         bit *anti = NULL;
    2952          39 :         bool icase = false;
    2953             : 
    2954          79 :         STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
    2955             : 
    2956          42 :         if (pci->argc - pci->retc == 8)
    2957          34 :                 msg = ignorecase(ic_id, &icase, "str.endswithjoin");
    2958             : 
    2959          76 :         return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
    2960             :                                                            cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
    2961          66 :                                                            *anti, icase, icase ? str_is_isuffix : str_is_suffix,
    2962             :                                                            "str.endswithjoin");
    2963             : }
    2964             : 
    2965             : /**
    2966             :  * @rl_id: result left oid
    2967             :  * @rr_id: result right oid
    2968             :  * @l_id: left oid
    2969             :  * @r_id: right oid
    2970             :  * @cl_id: candidates left oid
    2971             :  * @cr_id: candidates right oid
    2972             :  * @ic_id: ignore case oid
    2973             :  * @anti: anti join oid
    2974             :  */
    2975             : static str
    2976          55 : STRcontainsjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    2977             : {
    2978          55 :         (void) cntxt;
    2979          55 :         (void) mb;
    2980             : 
    2981          55 :         str msg = MAL_SUCCEED;
    2982          55 :         bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
    2983          55 :                 *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
    2984          55 :         bit *anti = NULL;
    2985          55 :         bool icase = false;
    2986             : 
    2987         117 :         STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
    2988             : 
    2989          62 :         if (pci->argc - pci->retc == 8)
    2990          26 :                 msg = ignorecase(ic_id, &icase, "str.containsjoin");
    2991             : 
    2992          88 :         return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
    2993             :                                                            cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
    2994         106 :                                                            *anti, icase, icase ? str_icontains : str_contains,
    2995             :                                                            "str.containsjoin");
    2996             : }
    2997             : 
    2998             : #include "mel.h"
    2999             : mel_func str_init_funcs[] = {
    3000             :  command("str", "str", STRtostr, false, "Noop routine.", args(1,2, arg("",str),arg("s",str))),
    3001             :  command("str", "string", STRTail, false, "Return the tail s[offset..n]\nof a string s[0..n].", args(1,3, arg("",str),arg("s",str),arg("offset",int))),
    3002             :  command("str", "string3", STRSubString, false, "Return substring s[offset..offset+count] of a string s[0..n]", args(1,4, arg("",str),arg("s",str),arg("offset",int),arg("count",int))),
    3003             :  command("str", "length", STRLength, false, "Return the length of a string.", args(1,2, arg("",int),arg("s",str))),
    3004             :  command("str", "nbytes", STRBytes, false, "Return the string length in bytes.", args(1,2, arg("",int),arg("s",str))),
    3005             :  command("str", "unicodeAt", STRWChrAt, false, "get a unicode character\n(as an int) from a string position.", args(1,3, arg("",int),arg("s",str),arg("index",int))),
    3006             :  command("str", "unicode", STRFromWChr, false, "convert a unicode to a character.", args(1,2, arg("",str),arg("wchar",int))),
    3007             :  pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring.", args(1,3, arg("",bit),arg("s",str),arg("prefix",str))),
    3008             :  pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
    3009             :  pattern("str", "endswith", STRendswith, false, "Check if string ends with substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
    3010             :  pattern("str", "endswith", STRendswith, false, "Check if string ends with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
    3011             :  pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle.", args(1,3, arg("",bit),arg("haystack",str),arg("needle",str))),
    3012             :  pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle, icase flag.", args(1,4, arg("",bit),arg("haystack",str),arg("needle",str),arg("icase",bit))),
    3013             :  command("str", "toLower", STRlower, false, "Convert a string to lower case.", args(1,2, arg("",str),arg("s",str))),
    3014             :  command("str", "toUpper", STRupper, false, "Convert a string to upper case.", args(1,2, arg("",str),arg("s",str))),
    3015             :  command("str", "caseFold", STRcasefold, false, "Fold the case of a string.", args(1,2, arg("",str),arg("s",str))),
    3016             :  pattern("str", "search", STRstr_search, false, "Search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
    3017             :  pattern("str", "search", STRstr_search, false, "Search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
    3018             :  pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
    3019             :  pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
    3020             :  command("str", "splitpart", STRsplitpart, false, "Split string on delimiter. Returns\ngiven field (counting from one.)", args(1,4, arg("",str),arg("s",str),arg("needle",str),arg("field",int))),
    3021             :  command("str", "trim", STRStrip, false, "Strip whitespaces around a string.", args(1,2, arg("",str),arg("s",str))),
    3022             :  command("str", "ltrim", STRLtrim, false, "Strip whitespaces from start of a string.", args(1,2, arg("",str),arg("s",str))),
    3023             :  command("str", "rtrim", STRRtrim, false, "Strip whitespaces from end of a string.", args(1,2, arg("",str),arg("s",str))),
    3024             :  command("str", "trim2", STRStrip2, false, "Remove the longest string containing only characters from the second string around the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
    3025             :  command("str", "ltrim2", STRLtrim2, false, "Remove the longest string containing only characters from the second string from the start of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
    3026             :  command("str", "rtrim2", STRRtrim2, false, "Remove the longest string containing only characters from the second string from the end of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
    3027             :  command("str", "lpad", STRLpad, false, "Fill up a string to the given length prepending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
    3028             :  command("str", "rpad", STRRpad, false, "Fill up a string to the given length appending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
    3029             :  command("str", "lpad3", STRLpad3, false, "Fill up the first string to the given length prepending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
    3030             :  command("str", "rpad3", STRRpad3, false, "Fill up the first string to the given length appending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
    3031             :  command("str", "substitute", STRSubstitute, false, "Substitute first occurrence of 'src' by\n'dst'.  Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.", args(1,5, arg("",str),arg("s",str),arg("src",str),arg("dst",str),arg("rep",bit))),
    3032             :  command("str", "like", STRlikewrap, false, "SQL pattern match function", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    3033             :  command("str", "like3", STRlikewrap3, false, "SQL pattern match function", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
    3034             :  command("str", "ascii", STRascii, false, "Return unicode of head of string", args(1,2, arg("",int),arg("s",str))),
    3035             :  command("str", "substring", STRsubstringTail, false, "Extract the tail of a string", args(1,3, arg("",str),arg("s",str),arg("start",int))),
    3036             :  command("str", "substring3", STRsubstring, false, "Extract a substring from str starting at start, for length len", args(1,4, arg("",str),arg("s",str),arg("start",int),arg("len",int))),
    3037             :  command("str", "prefix", STRprefix, false, "Extract the prefix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    3038             :  command("str", "suffix", STRsuffix, false, "Extract the suffix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    3039             :  command("str", "stringleft", STRprefix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    3040             :  command("str", "stringright", STRsuffix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    3041             :  command("str", "locate", STRlocate, false, "Locate the start position of a string", args(1,3, arg("",int),arg("s1",str),arg("s2",str))),
    3042             :  command("str", "locate3", STRlocate3, false, "Locate the start position of a string", args(1,4, arg("",int),arg("s1",str),arg("s2",str),arg("start",int))),
    3043             :  command("str", "insert", STRinsert, false, "Insert a string into another", args(1,5, arg("",str),arg("s",str),arg("start",int),arg("l",int),arg("s2",str))),
    3044             :  command("str", "replace", STRreplace, false, "Insert a string into another", args(1,4, arg("",str),arg("s",str),arg("pat",str),arg("s2",str))),
    3045             :  command("str", "repeat", STRrepeat, false, "", args(1,3, arg("",str),arg("s2",str),arg("c",int))),
    3046             :  command("str", "space", STRspace, false, "", args(1,2, arg("",str),arg("l",int))),
    3047             :  command("str", "asciify", STRasciify, false, "Transform string from UTF8 to ASCII", args(1, 2, arg("out",str), arg("in",str))),
    3048             :  pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("anti",bit))),
    3049             :  pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("caseignore",bit),arg("anti",bit))),
    3050             :  pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("anti",bit))),
    3051             :  pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("caseignore",bit),arg("anti",bit))),
    3052             :  pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("anti",bit))),
    3053             :  pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("caseignore",bit),arg("anti",bit))),
    3054             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    3055             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    3056             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    3057             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    3058             :  pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    3059             :  pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    3060             :  pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    3061             :  pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    3062             :  pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    3063             :  pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    3064             :  pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    3065             :  pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    3066             :  { .imp=NULL }
    3067             : };
    3068             : #include "mal_import.h"
    3069             : #ifdef _MSC_VER
    3070             : #undef read
    3071             : #pragma section(".CRT$XCU",read)
    3072             : #endif
    3073         323 : LIB_STARTUP_FUNC(init_str_mal)
    3074         323 : { mal_module2("str", NULL, str_init_funcs, NULL, NULL); }

Generated by: LCOV version 1.14