LCOV - code coverage report
Current view: top level - monetdb5/modules/atoms - str.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1276 1656 77.1 %
Date: 2024-04-25 20:03:45 Functions: 99 109 90.8 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  *  N.J. Nes, M.L. Kersten
      15             :  * The String Module
      16             :  * Strings can be created in many ways. Already in the built-in
      17             :  * operations each atom can be cast to a string using the str(atom)
      18             :  * mil command.  The string module gives the possibility of
      19             :  * construction string as a substring of the a given string (s). There
      20             :  * are two such construction functions.  The first is the substring
      21             :  * from some position (offset) until the end of the string. The second
      22             :  * start again on the given offset position but only copies count
      23             :  * number of bytes. The functions fail when the position and count
      24             :  * fall out of bounds. A negative position indicates that the position
      25             :  * is computed from the end of the source string.
      26             :  *
      27             :  * The strings can be compared using the "=" and "!=" operators.
      28             :  *
      29             :  * The operator "+" concatenates a string and an atom. The atom will
      30             :  * be converted to a string using the atom to string c function. The
      31             :  * string and the result of the conversion are concatenated to form a
      32             :  * new string. This string is returned.
      33             :  *
      34             :  * The length function returns the length of the string. The length is
      35             :  * the number of characters in the string. The maximum string length
      36             :  * handled by the kernel is 32-bits long.
      37             :  *
      38             :  * chrAt() returns the character at position index in the string
      39             :  * s. The function will fail when the index is out of range. The range
      40             :  * is from 0 to length(s)-1.
      41             :  *
      42             :  * The startsWith and endsWith functions test if the string s starts
      43             :  * with or ends with the given prefix or suffix.
      44             :  *
      45             :  * The toLower and toUpper functions cast the string to lower or upper
      46             :  * case characters.
      47             :  *
      48             :  * The search(str,chr) function searches for the first occurrence of a
      49             :  * character from the begining of the string. The search(chr,str)
      50             :  * searches for the last occurrence (or first from the end of the
      51             :  * string). The last search function locates the position of first
      52             :  * occurrence of the string s2 in string s. All search functions
      53             :  * return -1 when the search failed.  Otherwise the position is
      54             :  * returned.
      55             :  *
      56             :  * All string functions fail when an incorrect string (NULL pointer)
      57             :  * is given.  In the current implementation, a fail is signaled by
      58             :  * returning nil, since this facilitates the use of the string module
      59             :  * in bulk operations.
      60             :  *
      61             :  * All functions in the module have now been converted to
      62             :  * Unicode. Internally, we use UTF-8 to store strings as Unicode in
      63             :  * zero-terminated byte-sequences.
      64             :  */
      65             : #include "monetdb_config.h"
      66             : #include "str.h"
      67             : #include <string.h>
      68             : #ifdef HAVE_ICONV
      69             : #include <iconv.h>
      70             : #include <locale.h>
      71             : #endif
      72             : #include "mal_interpreter.h"
      73             : 
      74             : #include "utf8.h"
      75             : 
      76             : /*
      77             :  * UTF-8 Handling
      78             :  * UTF-8 is a way to store Unicode strings in zero-terminated byte
      79             :  * sequences, which you can e.g. strcmp() with old 8-bit Latin-1
      80             :  * strcmp() functions and which then gives the same results as doing
      81             :  * the strcmp() on equivalent Latin-1 and ASCII character strings
      82             :  * stored in simple one-byte sequences.  These characteristics make
      83             :  * UTF-8 an attractive format for upgrading an ASCII-oriented computer
      84             :  * program towards one that supports Unicode. That is why we use UTF-8
      85             :  * in MonetDB.
      86             :  *
      87             :  * For MonetDB, UTF-8 mostly has no consequences, as strings stored in
      88             :  * BATs are regarded as data, and it does not matter for the database
      89             :  * kernel whether the zero-terminated byte sequence it is processing
      90             :  * has UTF-8 or Latin-1 semantics. This module is the only place where
      91             :  * explicit string functionality is located. We {\bf do} have to adapt
      92             :  * the behavior of the length(), search(), substring() and the
      93             :  * like commands to the fact that one (Unicode) character is now
      94             :  * stored in a variable number of bytes (possibly > 1).
      95             :  *
      96             :  * One of the things that become more complex in Unicode are
      97             :  * uppercase/lowercase conversions. The below tables are the simple
      98             :  * one-to-one Unicode case mappings. We do not support the special
      99             :  * casing mappings (e.g. from one to two letters).
     100             :  *
     101             :  * References:
     102             :  * simple casing:       http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
     103             :  * complex casing: http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt
     104             :  *
     105             :  * The Unicode case conversion implementation in MonetDB fills a
     106             :  * mapping BAT of int,int combinations, in which we perform
     107             :  * high-performance hash-lookup (all code inlined).
     108             :  */
     109             : 
     110             : /* These tables were generated from the Unicode 13.0.0 spec. */
     111             : static const struct UTF8_lower_upper {
     112             :         const unsigned int from, to;
     113             : } UTF8_toUpper[] = {                    /* code points with non-null uppercase conversion */
     114             :         {0x0061, 0x0041,},
     115             :         {0x0062, 0x0042,},
     116             :         {0x0063, 0x0043,},
     117             :         {0x0064, 0x0044,},
     118             :         {0x0065, 0x0045,},
     119             :         {0x0066, 0x0046,},
     120             :         {0x0067, 0x0047,},
     121             :         {0x0068, 0x0048,},
     122             :         {0x0069, 0x0049,},
     123             :         {0x006A, 0x004A,},
     124             :         {0x006B, 0x004B,},
     125             :         {0x006C, 0x004C,},
     126             :         {0x006D, 0x004D,},
     127             :         {0x006E, 0x004E,},
     128             :         {0x006F, 0x004F,},
     129             :         {0x0070, 0x0050,},
     130             :         {0x0071, 0x0051,},
     131             :         {0x0072, 0x0052,},
     132             :         {0x0073, 0x0053,},
     133             :         {0x0074, 0x0054,},
     134             :         {0x0075, 0x0055,},
     135             :         {0x0076, 0x0056,},
     136             :         {0x0077, 0x0057,},
     137             :         {0x0078, 0x0058,},
     138             :         {0x0079, 0x0059,},
     139             :         {0x007A, 0x005A,},
     140             :         {0x00B5, 0x039C,},
     141             :         {0x00E0, 0x00C0,},
     142             :         {0x00E1, 0x00C1,},
     143             :         {0x00E2, 0x00C2,},
     144             :         {0x00E3, 0x00C3,},
     145             :         {0x00E4, 0x00C4,},
     146             :         {0x00E5, 0x00C5,},
     147             :         {0x00E6, 0x00C6,},
     148             :         {0x00E7, 0x00C7,},
     149             :         {0x00E8, 0x00C8,},
     150             :         {0x00E9, 0x00C9,},
     151             :         {0x00EA, 0x00CA,},
     152             :         {0x00EB, 0x00CB,},
     153             :         {0x00EC, 0x00CC,},
     154             :         {0x00ED, 0x00CD,},
     155             :         {0x00EE, 0x00CE,},
     156             :         {0x00EF, 0x00CF,},
     157             :         {0x00F0, 0x00D0,},
     158             :         {0x00F1, 0x00D1,},
     159             :         {0x00F2, 0x00D2,},
     160             :         {0x00F3, 0x00D3,},
     161             :         {0x00F4, 0x00D4,},
     162             :         {0x00F5, 0x00D5,},
     163             :         {0x00F6, 0x00D6,},
     164             :         {0x00F8, 0x00D8,},
     165             :         {0x00F9, 0x00D9,},
     166             :         {0x00FA, 0x00DA,},
     167             :         {0x00FB, 0x00DB,},
     168             :         {0x00FC, 0x00DC,},
     169             :         {0x00FD, 0x00DD,},
     170             :         {0x00FE, 0x00DE,},
     171             :         {0x00FF, 0x0178,},
     172             :         {0x0101, 0x0100,},
     173             :         {0x0103, 0x0102,},
     174             :         {0x0105, 0x0104,},
     175             :         {0x0107, 0x0106,},
     176             :         {0x0109, 0x0108,},
     177             :         {0x010B, 0x010A,},
     178             :         {0x010D, 0x010C,},
     179             :         {0x010F, 0x010E,},
     180             :         {0x0111, 0x0110,},
     181             :         {0x0113, 0x0112,},
     182             :         {0x0115, 0x0114,},
     183             :         {0x0117, 0x0116,},
     184             :         {0x0119, 0x0118,},
     185             :         {0x011B, 0x011A,},
     186             :         {0x011D, 0x011C,},
     187             :         {0x011F, 0x011E,},
     188             :         {0x0121, 0x0120,},
     189             :         {0x0123, 0x0122,},
     190             :         {0x0125, 0x0124,},
     191             :         {0x0127, 0x0126,},
     192             :         {0x0129, 0x0128,},
     193             :         {0x012B, 0x012A,},
     194             :         {0x012D, 0x012C,},
     195             :         {0x012F, 0x012E,},
     196             :         {0x0131, 0x0049,},
     197             :         {0x0133, 0x0132,},
     198             :         {0x0135, 0x0134,},
     199             :         {0x0137, 0x0136,},
     200             :         {0x013A, 0x0139,},
     201             :         {0x013C, 0x013B,},
     202             :         {0x013E, 0x013D,},
     203             :         {0x0140, 0x013F,},
     204             :         {0x0142, 0x0141,},
     205             :         {0x0144, 0x0143,},
     206             :         {0x0146, 0x0145,},
     207             :         {0x0148, 0x0147,},
     208             :         {0x014B, 0x014A,},
     209             :         {0x014D, 0x014C,},
     210             :         {0x014F, 0x014E,},
     211             :         {0x0151, 0x0150,},
     212             :         {0x0153, 0x0152,},
     213             :         {0x0155, 0x0154,},
     214             :         {0x0157, 0x0156,},
     215             :         {0x0159, 0x0158,},
     216             :         {0x015B, 0x015A,},
     217             :         {0x015D, 0x015C,},
     218             :         {0x015F, 0x015E,},
     219             :         {0x0161, 0x0160,},
     220             :         {0x0163, 0x0162,},
     221             :         {0x0165, 0x0164,},
     222             :         {0x0167, 0x0166,},
     223             :         {0x0169, 0x0168,},
     224             :         {0x016B, 0x016A,},
     225             :         {0x016D, 0x016C,},
     226             :         {0x016F, 0x016E,},
     227             :         {0x0171, 0x0170,},
     228             :         {0x0173, 0x0172,},
     229             :         {0x0175, 0x0174,},
     230             :         {0x0177, 0x0176,},
     231             :         {0x017A, 0x0179,},
     232             :         {0x017C, 0x017B,},
     233             :         {0x017E, 0x017D,},
     234             :         {0x017F, 0x0053,},
     235             :         {0x0180, 0x0243,},
     236             :         {0x0183, 0x0182,},
     237             :         {0x0185, 0x0184,},
     238             :         {0x0188, 0x0187,},
     239             :         {0x018C, 0x018B,},
     240             :         {0x0192, 0x0191,},
     241             :         {0x0195, 0x01F6,},
     242             :         {0x0199, 0x0198,},
     243             :         {0x019A, 0x023D,},
     244             :         {0x019E, 0x0220,},
     245             :         {0x01A1, 0x01A0,},
     246             :         {0x01A3, 0x01A2,},
     247             :         {0x01A5, 0x01A4,},
     248             :         {0x01A8, 0x01A7,},
     249             :         {0x01AD, 0x01AC,},
     250             :         {0x01B0, 0x01AF,},
     251             :         {0x01B4, 0x01B3,},
     252             :         {0x01B6, 0x01B5,},
     253             :         {0x01B9, 0x01B8,},
     254             :         {0x01BD, 0x01BC,},
     255             :         {0x01BF, 0x01F7,},
     256             :         {0x01C5, 0x01C4,},
     257             :         {0x01C6, 0x01C4,},
     258             :         {0x01C8, 0x01C7,},
     259             :         {0x01C9, 0x01C7,},
     260             :         {0x01CB, 0x01CA,},
     261             :         {0x01CC, 0x01CA,},
     262             :         {0x01CE, 0x01CD,},
     263             :         {0x01D0, 0x01CF,},
     264             :         {0x01D2, 0x01D1,},
     265             :         {0x01D4, 0x01D3,},
     266             :         {0x01D6, 0x01D5,},
     267             :         {0x01D8, 0x01D7,},
     268             :         {0x01DA, 0x01D9,},
     269             :         {0x01DC, 0x01DB,},
     270             :         {0x01DD, 0x018E,},
     271             :         {0x01DF, 0x01DE,},
     272             :         {0x01E1, 0x01E0,},
     273             :         {0x01E3, 0x01E2,},
     274             :         {0x01E5, 0x01E4,},
     275             :         {0x01E7, 0x01E6,},
     276             :         {0x01E9, 0x01E8,},
     277             :         {0x01EB, 0x01EA,},
     278             :         {0x01ED, 0x01EC,},
     279             :         {0x01EF, 0x01EE,},
     280             :         {0x01F2, 0x01F1,},
     281             :         {0x01F3, 0x01F1,},
     282             :         {0x01F5, 0x01F4,},
     283             :         {0x01F9, 0x01F8,},
     284             :         {0x01FB, 0x01FA,},
     285             :         {0x01FD, 0x01FC,},
     286             :         {0x01FF, 0x01FE,},
     287             :         {0x0201, 0x0200,},
     288             :         {0x0203, 0x0202,},
     289             :         {0x0205, 0x0204,},
     290             :         {0x0207, 0x0206,},
     291             :         {0x0209, 0x0208,},
     292             :         {0x020B, 0x020A,},
     293             :         {0x020D, 0x020C,},
     294             :         {0x020F, 0x020E,},
     295             :         {0x0211, 0x0210,},
     296             :         {0x0213, 0x0212,},
     297             :         {0x0215, 0x0214,},
     298             :         {0x0217, 0x0216,},
     299             :         {0x0219, 0x0218,},
     300             :         {0x021B, 0x021A,},
     301             :         {0x021D, 0x021C,},
     302             :         {0x021F, 0x021E,},
     303             :         {0x0223, 0x0222,},
     304             :         {0x0225, 0x0224,},
     305             :         {0x0227, 0x0226,},
     306             :         {0x0229, 0x0228,},
     307             :         {0x022B, 0x022A,},
     308             :         {0x022D, 0x022C,},
     309             :         {0x022F, 0x022E,},
     310             :         {0x0231, 0x0230,},
     311             :         {0x0233, 0x0232,},
     312             :         {0x023C, 0x023B,},
     313             :         {0x023F, 0x2C7E,},
     314             :         {0x0240, 0x2C7F,},
     315             :         {0x0242, 0x0241,},
     316             :         {0x0247, 0x0246,},
     317             :         {0x0249, 0x0248,},
     318             :         {0x024B, 0x024A,},
     319             :         {0x024D, 0x024C,},
     320             :         {0x024F, 0x024E,},
     321             :         {0x0250, 0x2C6F,},
     322             :         {0x0251, 0x2C6D,},
     323             :         {0x0252, 0x2C70,},
     324             :         {0x0253, 0x0181,},
     325             :         {0x0254, 0x0186,},
     326             :         {0x0256, 0x0189,},
     327             :         {0x0257, 0x018A,},
     328             :         {0x0259, 0x018F,},
     329             :         {0x025B, 0x0190,},
     330             :         {0x025C, 0xA7AB,},
     331             :         {0x0260, 0x0193,},
     332             :         {0x0261, 0xA7AC,},
     333             :         {0x0263, 0x0194,},
     334             :         {0x0265, 0xA78D,},
     335             :         {0x0266, 0xA7AA,},
     336             :         {0x0268, 0x0197,},
     337             :         {0x0269, 0x0196,},
     338             :         {0x026A, 0xA7AE,},
     339             :         {0x026B, 0x2C62,},
     340             :         {0x026C, 0xA7AD,},
     341             :         {0x026F, 0x019C,},
     342             :         {0x0271, 0x2C6E,},
     343             :         {0x0272, 0x019D,},
     344             :         {0x0275, 0x019F,},
     345             :         {0x027D, 0x2C64,},
     346             :         {0x0280, 0x01A6,},
     347             :         {0x0282, 0xA7C5,},
     348             :         {0x0283, 0x01A9,},
     349             :         {0x0287, 0xA7B1,},
     350             :         {0x0288, 0x01AE,},
     351             :         {0x0289, 0x0244,},
     352             :         {0x028A, 0x01B1,},
     353             :         {0x028B, 0x01B2,},
     354             :         {0x028C, 0x0245,},
     355             :         {0x0292, 0x01B7,},
     356             :         {0x029D, 0xA7B2,},
     357             :         {0x029E, 0xA7B0,},
     358             :         {0x0345, 0x0399,},
     359             :         {0x0371, 0x0370,},
     360             :         {0x0373, 0x0372,},
     361             :         {0x0377, 0x0376,},
     362             :         {0x037B, 0x03FD,},
     363             :         {0x037C, 0x03FE,},
     364             :         {0x037D, 0x03FF,},
     365             :         {0x03AC, 0x0386,},
     366             :         {0x03AD, 0x0388,},
     367             :         {0x03AE, 0x0389,},
     368             :         {0x03AF, 0x038A,},
     369             :         {0x03B1, 0x0391,},
     370             :         {0x03B2, 0x0392,},
     371             :         {0x03B3, 0x0393,},
     372             :         {0x03B4, 0x0394,},
     373             :         {0x03B5, 0x0395,},
     374             :         {0x03B6, 0x0396,},
     375             :         {0x03B7, 0x0397,},
     376             :         {0x03B8, 0x0398,},
     377             :         {0x03B9, 0x0399,},
     378             :         {0x03BA, 0x039A,},
     379             :         {0x03BB, 0x039B,},
     380             :         {0x03BC, 0x039C,},
     381             :         {0x03BD, 0x039D,},
     382             :         {0x03BE, 0x039E,},
     383             :         {0x03BF, 0x039F,},
     384             :         {0x03C0, 0x03A0,},
     385             :         {0x03C1, 0x03A1,},
     386             :         {0x03C2, 0x03A3,},
     387             :         {0x03C3, 0x03A3,},
     388             :         {0x03C4, 0x03A4,},
     389             :         {0x03C5, 0x03A5,},
     390             :         {0x03C6, 0x03A6,},
     391             :         {0x03C7, 0x03A7,},
     392             :         {0x03C8, 0x03A8,},
     393             :         {0x03C9, 0x03A9,},
     394             :         {0x03CA, 0x03AA,},
     395             :         {0x03CB, 0x03AB,},
     396             :         {0x03CC, 0x038C,},
     397             :         {0x03CD, 0x038E,},
     398             :         {0x03CE, 0x038F,},
     399             :         {0x03D0, 0x0392,},
     400             :         {0x03D1, 0x0398,},
     401             :         {0x03D5, 0x03A6,},
     402             :         {0x03D6, 0x03A0,},
     403             :         {0x03D7, 0x03CF,},
     404             :         {0x03D9, 0x03D8,},
     405             :         {0x03DB, 0x03DA,},
     406             :         {0x03DD, 0x03DC,},
     407             :         {0x03DF, 0x03DE,},
     408             :         {0x03E1, 0x03E0,},
     409             :         {0x03E3, 0x03E2,},
     410             :         {0x03E5, 0x03E4,},
     411             :         {0x03E7, 0x03E6,},
     412             :         {0x03E9, 0x03E8,},
     413             :         {0x03EB, 0x03EA,},
     414             :         {0x03ED, 0x03EC,},
     415             :         {0x03EF, 0x03EE,},
     416             :         {0x03F0, 0x039A,},
     417             :         {0x03F1, 0x03A1,},
     418             :         {0x03F2, 0x03F9,},
     419             :         {0x03F3, 0x037F,},
     420             :         {0x03F5, 0x0395,},
     421             :         {0x03F8, 0x03F7,},
     422             :         {0x03FB, 0x03FA,},
     423             :         {0x0430, 0x0410,},
     424             :         {0x0431, 0x0411,},
     425             :         {0x0432, 0x0412,},
     426             :         {0x0433, 0x0413,},
     427             :         {0x0434, 0x0414,},
     428             :         {0x0435, 0x0415,},
     429             :         {0x0436, 0x0416,},
     430             :         {0x0437, 0x0417,},
     431             :         {0x0438, 0x0418,},
     432             :         {0x0439, 0x0419,},
     433             :         {0x043A, 0x041A,},
     434             :         {0x043B, 0x041B,},
     435             :         {0x043C, 0x041C,},
     436             :         {0x043D, 0x041D,},
     437             :         {0x043E, 0x041E,},
     438             :         {0x043F, 0x041F,},
     439             :         {0x0440, 0x0420,},
     440             :         {0x0441, 0x0421,},
     441             :         {0x0442, 0x0422,},
     442             :         {0x0443, 0x0423,},
     443             :         {0x0444, 0x0424,},
     444             :         {0x0445, 0x0425,},
     445             :         {0x0446, 0x0426,},
     446             :         {0x0447, 0x0427,},
     447             :         {0x0448, 0x0428,},
     448             :         {0x0449, 0x0429,},
     449             :         {0x044A, 0x042A,},
     450             :         {0x044B, 0x042B,},
     451             :         {0x044C, 0x042C,},
     452             :         {0x044D, 0x042D,},
     453             :         {0x044E, 0x042E,},
     454             :         {0x044F, 0x042F,},
     455             :         {0x0450, 0x0400,},
     456             :         {0x0451, 0x0401,},
     457             :         {0x0452, 0x0402,},
     458             :         {0x0453, 0x0403,},
     459             :         {0x0454, 0x0404,},
     460             :         {0x0455, 0x0405,},
     461             :         {0x0456, 0x0406,},
     462             :         {0x0457, 0x0407,},
     463             :         {0x0458, 0x0408,},
     464             :         {0x0459, 0x0409,},
     465             :         {0x045A, 0x040A,},
     466             :         {0x045B, 0x040B,},
     467             :         {0x045C, 0x040C,},
     468             :         {0x045D, 0x040D,},
     469             :         {0x045E, 0x040E,},
     470             :         {0x045F, 0x040F,},
     471             :         {0x0461, 0x0460,},
     472             :         {0x0463, 0x0462,},
     473             :         {0x0465, 0x0464,},
     474             :         {0x0467, 0x0466,},
     475             :         {0x0469, 0x0468,},
     476             :         {0x046B, 0x046A,},
     477             :         {0x046D, 0x046C,},
     478             :         {0x046F, 0x046E,},
     479             :         {0x0471, 0x0470,},
     480             :         {0x0473, 0x0472,},
     481             :         {0x0475, 0x0474,},
     482             :         {0x0477, 0x0476,},
     483             :         {0x0479, 0x0478,},
     484             :         {0x047B, 0x047A,},
     485             :         {0x047D, 0x047C,},
     486             :         {0x047F, 0x047E,},
     487             :         {0x0481, 0x0480,},
     488             :         {0x048B, 0x048A,},
     489             :         {0x048D, 0x048C,},
     490             :         {0x048F, 0x048E,},
     491             :         {0x0491, 0x0490,},
     492             :         {0x0493, 0x0492,},
     493             :         {0x0495, 0x0494,},
     494             :         {0x0497, 0x0496,},
     495             :         {0x0499, 0x0498,},
     496             :         {0x049B, 0x049A,},
     497             :         {0x049D, 0x049C,},
     498             :         {0x049F, 0x049E,},
     499             :         {0x04A1, 0x04A0,},
     500             :         {0x04A3, 0x04A2,},
     501             :         {0x04A5, 0x04A4,},
     502             :         {0x04A7, 0x04A6,},
     503             :         {0x04A9, 0x04A8,},
     504             :         {0x04AB, 0x04AA,},
     505             :         {0x04AD, 0x04AC,},
     506             :         {0x04AF, 0x04AE,},
     507             :         {0x04B1, 0x04B0,},
     508             :         {0x04B3, 0x04B2,},
     509             :         {0x04B5, 0x04B4,},
     510             :         {0x04B7, 0x04B6,},
     511             :         {0x04B9, 0x04B8,},
     512             :         {0x04BB, 0x04BA,},
     513             :         {0x04BD, 0x04BC,},
     514             :         {0x04BF, 0x04BE,},
     515             :         {0x04C2, 0x04C1,},
     516             :         {0x04C4, 0x04C3,},
     517             :         {0x04C6, 0x04C5,},
     518             :         {0x04C8, 0x04C7,},
     519             :         {0x04CA, 0x04C9,},
     520             :         {0x04CC, 0x04CB,},
     521             :         {0x04CE, 0x04CD,},
     522             :         {0x04CF, 0x04C0,},
     523             :         {0x04D1, 0x04D0,},
     524             :         {0x04D3, 0x04D2,},
     525             :         {0x04D5, 0x04D4,},
     526             :         {0x04D7, 0x04D6,},
     527             :         {0x04D9, 0x04D8,},
     528             :         {0x04DB, 0x04DA,},
     529             :         {0x04DD, 0x04DC,},
     530             :         {0x04DF, 0x04DE,},
     531             :         {0x04E1, 0x04E0,},
     532             :         {0x04E3, 0x04E2,},
     533             :         {0x04E5, 0x04E4,},
     534             :         {0x04E7, 0x04E6,},
     535             :         {0x04E9, 0x04E8,},
     536             :         {0x04EB, 0x04EA,},
     537             :         {0x04ED, 0x04EC,},
     538             :         {0x04EF, 0x04EE,},
     539             :         {0x04F1, 0x04F0,},
     540             :         {0x04F3, 0x04F2,},
     541             :         {0x04F5, 0x04F4,},
     542             :         {0x04F7, 0x04F6,},
     543             :         {0x04F9, 0x04F8,},
     544             :         {0x04FB, 0x04FA,},
     545             :         {0x04FD, 0x04FC,},
     546             :         {0x04FF, 0x04FE,},
     547             :         {0x0501, 0x0500,},
     548             :         {0x0503, 0x0502,},
     549             :         {0x0505, 0x0504,},
     550             :         {0x0507, 0x0506,},
     551             :         {0x0509, 0x0508,},
     552             :         {0x050B, 0x050A,},
     553             :         {0x050D, 0x050C,},
     554             :         {0x050F, 0x050E,},
     555             :         {0x0511, 0x0510,},
     556             :         {0x0513, 0x0512,},
     557             :         {0x0515, 0x0514,},
     558             :         {0x0517, 0x0516,},
     559             :         {0x0519, 0x0518,},
     560             :         {0x051B, 0x051A,},
     561             :         {0x051D, 0x051C,},
     562             :         {0x051F, 0x051E,},
     563             :         {0x0521, 0x0520,},
     564             :         {0x0523, 0x0522,},
     565             :         {0x0525, 0x0524,},
     566             :         {0x0527, 0x0526,},
     567             :         {0x0529, 0x0528,},
     568             :         {0x052B, 0x052A,},
     569             :         {0x052D, 0x052C,},
     570             :         {0x052F, 0x052E,},
     571             :         {0x0561, 0x0531,},
     572             :         {0x0562, 0x0532,},
     573             :         {0x0563, 0x0533,},
     574             :         {0x0564, 0x0534,},
     575             :         {0x0565, 0x0535,},
     576             :         {0x0566, 0x0536,},
     577             :         {0x0567, 0x0537,},
     578             :         {0x0568, 0x0538,},
     579             :         {0x0569, 0x0539,},
     580             :         {0x056A, 0x053A,},
     581             :         {0x056B, 0x053B,},
     582             :         {0x056C, 0x053C,},
     583             :         {0x056D, 0x053D,},
     584             :         {0x056E, 0x053E,},
     585             :         {0x056F, 0x053F,},
     586             :         {0x0570, 0x0540,},
     587             :         {0x0571, 0x0541,},
     588             :         {0x0572, 0x0542,},
     589             :         {0x0573, 0x0543,},
     590             :         {0x0574, 0x0544,},
     591             :         {0x0575, 0x0545,},
     592             :         {0x0576, 0x0546,},
     593             :         {0x0577, 0x0547,},
     594             :         {0x0578, 0x0548,},
     595             :         {0x0579, 0x0549,},
     596             :         {0x057A, 0x054A,},
     597             :         {0x057B, 0x054B,},
     598             :         {0x057C, 0x054C,},
     599             :         {0x057D, 0x054D,},
     600             :         {0x057E, 0x054E,},
     601             :         {0x057F, 0x054F,},
     602             :         {0x0580, 0x0550,},
     603             :         {0x0581, 0x0551,},
     604             :         {0x0582, 0x0552,},
     605             :         {0x0583, 0x0553,},
     606             :         {0x0584, 0x0554,},
     607             :         {0x0585, 0x0555,},
     608             :         {0x0586, 0x0556,},
     609             :         {0x10D0, 0x1C90,},
     610             :         {0x10D1, 0x1C91,},
     611             :         {0x10D2, 0x1C92,},
     612             :         {0x10D3, 0x1C93,},
     613             :         {0x10D4, 0x1C94,},
     614             :         {0x10D5, 0x1C95,},
     615             :         {0x10D6, 0x1C96,},
     616             :         {0x10D7, 0x1C97,},
     617             :         {0x10D8, 0x1C98,},
     618             :         {0x10D9, 0x1C99,},
     619             :         {0x10DA, 0x1C9A,},
     620             :         {0x10DB, 0x1C9B,},
     621             :         {0x10DC, 0x1C9C,},
     622             :         {0x10DD, 0x1C9D,},
     623             :         {0x10DE, 0x1C9E,},
     624             :         {0x10DF, 0x1C9F,},
     625             :         {0x10E0, 0x1CA0,},
     626             :         {0x10E1, 0x1CA1,},
     627             :         {0x10E2, 0x1CA2,},
     628             :         {0x10E3, 0x1CA3,},
     629             :         {0x10E4, 0x1CA4,},
     630             :         {0x10E5, 0x1CA5,},
     631             :         {0x10E6, 0x1CA6,},
     632             :         {0x10E7, 0x1CA7,},
     633             :         {0x10E8, 0x1CA8,},
     634             :         {0x10E9, 0x1CA9,},
     635             :         {0x10EA, 0x1CAA,},
     636             :         {0x10EB, 0x1CAB,},
     637             :         {0x10EC, 0x1CAC,},
     638             :         {0x10ED, 0x1CAD,},
     639             :         {0x10EE, 0x1CAE,},
     640             :         {0x10EF, 0x1CAF,},
     641             :         {0x10F0, 0x1CB0,},
     642             :         {0x10F1, 0x1CB1,},
     643             :         {0x10F2, 0x1CB2,},
     644             :         {0x10F3, 0x1CB3,},
     645             :         {0x10F4, 0x1CB4,},
     646             :         {0x10F5, 0x1CB5,},
     647             :         {0x10F6, 0x1CB6,},
     648             :         {0x10F7, 0x1CB7,},
     649             :         {0x10F8, 0x1CB8,},
     650             :         {0x10F9, 0x1CB9,},
     651             :         {0x10FA, 0x1CBA,},
     652             :         {0x10FD, 0x1CBD,},
     653             :         {0x10FE, 0x1CBE,},
     654             :         {0x10FF, 0x1CBF,},
     655             :         {0x13F8, 0x13F0,},
     656             :         {0x13F9, 0x13F1,},
     657             :         {0x13FA, 0x13F2,},
     658             :         {0x13FB, 0x13F3,},
     659             :         {0x13FC, 0x13F4,},
     660             :         {0x13FD, 0x13F5,},
     661             :         {0x1C80, 0x0412,},
     662             :         {0x1C81, 0x0414,},
     663             :         {0x1C82, 0x041E,},
     664             :         {0x1C83, 0x0421,},
     665             :         {0x1C84, 0x0422,},
     666             :         {0x1C85, 0x0422,},
     667             :         {0x1C86, 0x042A,},
     668             :         {0x1C87, 0x0462,},
     669             :         {0x1C88, 0xA64A,},
     670             :         {0x1D79, 0xA77D,},
     671             :         {0x1D7D, 0x2C63,},
     672             :         {0x1D8E, 0xA7C6,},
     673             :         {0x1E01, 0x1E00,},
     674             :         {0x1E03, 0x1E02,},
     675             :         {0x1E05, 0x1E04,},
     676             :         {0x1E07, 0x1E06,},
     677             :         {0x1E09, 0x1E08,},
     678             :         {0x1E0B, 0x1E0A,},
     679             :         {0x1E0D, 0x1E0C,},
     680             :         {0x1E0F, 0x1E0E,},
     681             :         {0x1E11, 0x1E10,},
     682             :         {0x1E13, 0x1E12,},
     683             :         {0x1E15, 0x1E14,},
     684             :         {0x1E17, 0x1E16,},
     685             :         {0x1E19, 0x1E18,},
     686             :         {0x1E1B, 0x1E1A,},
     687             :         {0x1E1D, 0x1E1C,},
     688             :         {0x1E1F, 0x1E1E,},
     689             :         {0x1E21, 0x1E20,},
     690             :         {0x1E23, 0x1E22,},
     691             :         {0x1E25, 0x1E24,},
     692             :         {0x1E27, 0x1E26,},
     693             :         {0x1E29, 0x1E28,},
     694             :         {0x1E2B, 0x1E2A,},
     695             :         {0x1E2D, 0x1E2C,},
     696             :         {0x1E2F, 0x1E2E,},
     697             :         {0x1E31, 0x1E30,},
     698             :         {0x1E33, 0x1E32,},
     699             :         {0x1E35, 0x1E34,},
     700             :         {0x1E37, 0x1E36,},
     701             :         {0x1E39, 0x1E38,},
     702             :         {0x1E3B, 0x1E3A,},
     703             :         {0x1E3D, 0x1E3C,},
     704             :         {0x1E3F, 0x1E3E,},
     705             :         {0x1E41, 0x1E40,},
     706             :         {0x1E43, 0x1E42,},
     707             :         {0x1E45, 0x1E44,},
     708             :         {0x1E47, 0x1E46,},
     709             :         {0x1E49, 0x1E48,},
     710             :         {0x1E4B, 0x1E4A,},
     711             :         {0x1E4D, 0x1E4C,},
     712             :         {0x1E4F, 0x1E4E,},
     713             :         {0x1E51, 0x1E50,},
     714             :         {0x1E53, 0x1E52,},
     715             :         {0x1E55, 0x1E54,},
     716             :         {0x1E57, 0x1E56,},
     717             :         {0x1E59, 0x1E58,},
     718             :         {0x1E5B, 0x1E5A,},
     719             :         {0x1E5D, 0x1E5C,},
     720             :         {0x1E5F, 0x1E5E,},
     721             :         {0x1E61, 0x1E60,},
     722             :         {0x1E63, 0x1E62,},
     723             :         {0x1E65, 0x1E64,},
     724             :         {0x1E67, 0x1E66,},
     725             :         {0x1E69, 0x1E68,},
     726             :         {0x1E6B, 0x1E6A,},
     727             :         {0x1E6D, 0x1E6C,},
     728             :         {0x1E6F, 0x1E6E,},
     729             :         {0x1E71, 0x1E70,},
     730             :         {0x1E73, 0x1E72,},
     731             :         {0x1E75, 0x1E74,},
     732             :         {0x1E77, 0x1E76,},
     733             :         {0x1E79, 0x1E78,},
     734             :         {0x1E7B, 0x1E7A,},
     735             :         {0x1E7D, 0x1E7C,},
     736             :         {0x1E7F, 0x1E7E,},
     737             :         {0x1E81, 0x1E80,},
     738             :         {0x1E83, 0x1E82,},
     739             :         {0x1E85, 0x1E84,},
     740             :         {0x1E87, 0x1E86,},
     741             :         {0x1E89, 0x1E88,},
     742             :         {0x1E8B, 0x1E8A,},
     743             :         {0x1E8D, 0x1E8C,},
     744             :         {0x1E8F, 0x1E8E,},
     745             :         {0x1E91, 0x1E90,},
     746             :         {0x1E93, 0x1E92,},
     747             :         {0x1E95, 0x1E94,},
     748             :         {0x1E9B, 0x1E60,},
     749             :         {0x1EA1, 0x1EA0,},
     750             :         {0x1EA3, 0x1EA2,},
     751             :         {0x1EA5, 0x1EA4,},
     752             :         {0x1EA7, 0x1EA6,},
     753             :         {0x1EA9, 0x1EA8,},
     754             :         {0x1EAB, 0x1EAA,},
     755             :         {0x1EAD, 0x1EAC,},
     756             :         {0x1EAF, 0x1EAE,},
     757             :         {0x1EB1, 0x1EB0,},
     758             :         {0x1EB3, 0x1EB2,},
     759             :         {0x1EB5, 0x1EB4,},
     760             :         {0x1EB7, 0x1EB6,},
     761             :         {0x1EB9, 0x1EB8,},
     762             :         {0x1EBB, 0x1EBA,},
     763             :         {0x1EBD, 0x1EBC,},
     764             :         {0x1EBF, 0x1EBE,},
     765             :         {0x1EC1, 0x1EC0,},
     766             :         {0x1EC3, 0x1EC2,},
     767             :         {0x1EC5, 0x1EC4,},
     768             :         {0x1EC7, 0x1EC6,},
     769             :         {0x1EC9, 0x1EC8,},
     770             :         {0x1ECB, 0x1ECA,},
     771             :         {0x1ECD, 0x1ECC,},
     772             :         {0x1ECF, 0x1ECE,},
     773             :         {0x1ED1, 0x1ED0,},
     774             :         {0x1ED3, 0x1ED2,},
     775             :         {0x1ED5, 0x1ED4,},
     776             :         {0x1ED7, 0x1ED6,},
     777             :         {0x1ED9, 0x1ED8,},
     778             :         {0x1EDB, 0x1EDA,},
     779             :         {0x1EDD, 0x1EDC,},
     780             :         {0x1EDF, 0x1EDE,},
     781             :         {0x1EE1, 0x1EE0,},
     782             :         {0x1EE3, 0x1EE2,},
     783             :         {0x1EE5, 0x1EE4,},
     784             :         {0x1EE7, 0x1EE6,},
     785             :         {0x1EE9, 0x1EE8,},
     786             :         {0x1EEB, 0x1EEA,},
     787             :         {0x1EED, 0x1EEC,},
     788             :         {0x1EEF, 0x1EEE,},
     789             :         {0x1EF1, 0x1EF0,},
     790             :         {0x1EF3, 0x1EF2,},
     791             :         {0x1EF5, 0x1EF4,},
     792             :         {0x1EF7, 0x1EF6,},
     793             :         {0x1EF9, 0x1EF8,},
     794             :         {0x1EFB, 0x1EFA,},
     795             :         {0x1EFD, 0x1EFC,},
     796             :         {0x1EFF, 0x1EFE,},
     797             :         {0x1F00, 0x1F08,},
     798             :         {0x1F01, 0x1F09,},
     799             :         {0x1F02, 0x1F0A,},
     800             :         {0x1F03, 0x1F0B,},
     801             :         {0x1F04, 0x1F0C,},
     802             :         {0x1F05, 0x1F0D,},
     803             :         {0x1F06, 0x1F0E,},
     804             :         {0x1F07, 0x1F0F,},
     805             :         {0x1F10, 0x1F18,},
     806             :         {0x1F11, 0x1F19,},
     807             :         {0x1F12, 0x1F1A,},
     808             :         {0x1F13, 0x1F1B,},
     809             :         {0x1F14, 0x1F1C,},
     810             :         {0x1F15, 0x1F1D,},
     811             :         {0x1F20, 0x1F28,},
     812             :         {0x1F21, 0x1F29,},
     813             :         {0x1F22, 0x1F2A,},
     814             :         {0x1F23, 0x1F2B,},
     815             :         {0x1F24, 0x1F2C,},
     816             :         {0x1F25, 0x1F2D,},
     817             :         {0x1F26, 0x1F2E,},
     818             :         {0x1F27, 0x1F2F,},
     819             :         {0x1F30, 0x1F38,},
     820             :         {0x1F31, 0x1F39,},
     821             :         {0x1F32, 0x1F3A,},
     822             :         {0x1F33, 0x1F3B,},
     823             :         {0x1F34, 0x1F3C,},
     824             :         {0x1F35, 0x1F3D,},
     825             :         {0x1F36, 0x1F3E,},
     826             :         {0x1F37, 0x1F3F,},
     827             :         {0x1F40, 0x1F48,},
     828             :         {0x1F41, 0x1F49,},
     829             :         {0x1F42, 0x1F4A,},
     830             :         {0x1F43, 0x1F4B,},
     831             :         {0x1F44, 0x1F4C,},
     832             :         {0x1F45, 0x1F4D,},
     833             :         {0x1F51, 0x1F59,},
     834             :         {0x1F53, 0x1F5B,},
     835             :         {0x1F55, 0x1F5D,},
     836             :         {0x1F57, 0x1F5F,},
     837             :         {0x1F60, 0x1F68,},
     838             :         {0x1F61, 0x1F69,},
     839             :         {0x1F62, 0x1F6A,},
     840             :         {0x1F63, 0x1F6B,},
     841             :         {0x1F64, 0x1F6C,},
     842             :         {0x1F65, 0x1F6D,},
     843             :         {0x1F66, 0x1F6E,},
     844             :         {0x1F67, 0x1F6F,},
     845             :         {0x1F70, 0x1FBA,},
     846             :         {0x1F71, 0x1FBB,},
     847             :         {0x1F72, 0x1FC8,},
     848             :         {0x1F73, 0x1FC9,},
     849             :         {0x1F74, 0x1FCA,},
     850             :         {0x1F75, 0x1FCB,},
     851             :         {0x1F76, 0x1FDA,},
     852             :         {0x1F77, 0x1FDB,},
     853             :         {0x1F78, 0x1FF8,},
     854             :         {0x1F79, 0x1FF9,},
     855             :         {0x1F7A, 0x1FEA,},
     856             :         {0x1F7B, 0x1FEB,},
     857             :         {0x1F7C, 0x1FFA,},
     858             :         {0x1F7D, 0x1FFB,},
     859             :         {0x1F80, 0x1F88,},
     860             :         {0x1F81, 0x1F89,},
     861             :         {0x1F82, 0x1F8A,},
     862             :         {0x1F83, 0x1F8B,},
     863             :         {0x1F84, 0x1F8C,},
     864             :         {0x1F85, 0x1F8D,},
     865             :         {0x1F86, 0x1F8E,},
     866             :         {0x1F87, 0x1F8F,},
     867             :         {0x1F90, 0x1F98,},
     868             :         {0x1F91, 0x1F99,},
     869             :         {0x1F92, 0x1F9A,},
     870             :         {0x1F93, 0x1F9B,},
     871             :         {0x1F94, 0x1F9C,},
     872             :         {0x1F95, 0x1F9D,},
     873             :         {0x1F96, 0x1F9E,},
     874             :         {0x1F97, 0x1F9F,},
     875             :         {0x1FA0, 0x1FA8,},
     876             :         {0x1FA1, 0x1FA9,},
     877             :         {0x1FA2, 0x1FAA,},
     878             :         {0x1FA3, 0x1FAB,},
     879             :         {0x1FA4, 0x1FAC,},
     880             :         {0x1FA5, 0x1FAD,},
     881             :         {0x1FA6, 0x1FAE,},
     882             :         {0x1FA7, 0x1FAF,},
     883             :         {0x1FB0, 0x1FB8,},
     884             :         {0x1FB1, 0x1FB9,},
     885             :         {0x1FB3, 0x1FBC,},
     886             :         {0x1FBE, 0x0399,},
     887             :         {0x1FC3, 0x1FCC,},
     888             :         {0x1FD0, 0x1FD8,},
     889             :         {0x1FD1, 0x1FD9,},
     890             :         {0x1FE0, 0x1FE8,},
     891             :         {0x1FE1, 0x1FE9,},
     892             :         {0x1FE5, 0x1FEC,},
     893             :         {0x1FF3, 0x1FFC,},
     894             :         {0x214E, 0x2132,},
     895             :         {0x2170, 0x2160,},
     896             :         {0x2171, 0x2161,},
     897             :         {0x2172, 0x2162,},
     898             :         {0x2173, 0x2163,},
     899             :         {0x2174, 0x2164,},
     900             :         {0x2175, 0x2165,},
     901             :         {0x2176, 0x2166,},
     902             :         {0x2177, 0x2167,},
     903             :         {0x2178, 0x2168,},
     904             :         {0x2179, 0x2169,},
     905             :         {0x217A, 0x216A,},
     906             :         {0x217B, 0x216B,},
     907             :         {0x217C, 0x216C,},
     908             :         {0x217D, 0x216D,},
     909             :         {0x217E, 0x216E,},
     910             :         {0x217F, 0x216F,},
     911             :         {0x2184, 0x2183,},
     912             :         {0x24D0, 0x24B6,},
     913             :         {0x24D1, 0x24B7,},
     914             :         {0x24D2, 0x24B8,},
     915             :         {0x24D3, 0x24B9,},
     916             :         {0x24D4, 0x24BA,},
     917             :         {0x24D5, 0x24BB,},
     918             :         {0x24D6, 0x24BC,},
     919             :         {0x24D7, 0x24BD,},
     920             :         {0x24D8, 0x24BE,},
     921             :         {0x24D9, 0x24BF,},
     922             :         {0x24DA, 0x24C0,},
     923             :         {0x24DB, 0x24C1,},
     924             :         {0x24DC, 0x24C2,},
     925             :         {0x24DD, 0x24C3,},
     926             :         {0x24DE, 0x24C4,},
     927             :         {0x24DF, 0x24C5,},
     928             :         {0x24E0, 0x24C6,},
     929             :         {0x24E1, 0x24C7,},
     930             :         {0x24E2, 0x24C8,},
     931             :         {0x24E3, 0x24C9,},
     932             :         {0x24E4, 0x24CA,},
     933             :         {0x24E5, 0x24CB,},
     934             :         {0x24E6, 0x24CC,},
     935             :         {0x24E7, 0x24CD,},
     936             :         {0x24E8, 0x24CE,},
     937             :         {0x24E9, 0x24CF,},
     938             :         {0x2C30, 0x2C00,},
     939             :         {0x2C31, 0x2C01,},
     940             :         {0x2C32, 0x2C02,},
     941             :         {0x2C33, 0x2C03,},
     942             :         {0x2C34, 0x2C04,},
     943             :         {0x2C35, 0x2C05,},
     944             :         {0x2C36, 0x2C06,},
     945             :         {0x2C37, 0x2C07,},
     946             :         {0x2C38, 0x2C08,},
     947             :         {0x2C39, 0x2C09,},
     948             :         {0x2C3A, 0x2C0A,},
     949             :         {0x2C3B, 0x2C0B,},
     950             :         {0x2C3C, 0x2C0C,},
     951             :         {0x2C3D, 0x2C0D,},
     952             :         {0x2C3E, 0x2C0E,},
     953             :         {0x2C3F, 0x2C0F,},
     954             :         {0x2C40, 0x2C10,},
     955             :         {0x2C41, 0x2C11,},
     956             :         {0x2C42, 0x2C12,},
     957             :         {0x2C43, 0x2C13,},
     958             :         {0x2C44, 0x2C14,},
     959             :         {0x2C45, 0x2C15,},
     960             :         {0x2C46, 0x2C16,},
     961             :         {0x2C47, 0x2C17,},
     962             :         {0x2C48, 0x2C18,},
     963             :         {0x2C49, 0x2C19,},
     964             :         {0x2C4A, 0x2C1A,},
     965             :         {0x2C4B, 0x2C1B,},
     966             :         {0x2C4C, 0x2C1C,},
     967             :         {0x2C4D, 0x2C1D,},
     968             :         {0x2C4E, 0x2C1E,},
     969             :         {0x2C4F, 0x2C1F,},
     970             :         {0x2C50, 0x2C20,},
     971             :         {0x2C51, 0x2C21,},
     972             :         {0x2C52, 0x2C22,},
     973             :         {0x2C53, 0x2C23,},
     974             :         {0x2C54, 0x2C24,},
     975             :         {0x2C55, 0x2C25,},
     976             :         {0x2C56, 0x2C26,},
     977             :         {0x2C57, 0x2C27,},
     978             :         {0x2C58, 0x2C28,},
     979             :         {0x2C59, 0x2C29,},
     980             :         {0x2C5A, 0x2C2A,},
     981             :         {0x2C5B, 0x2C2B,},
     982             :         {0x2C5C, 0x2C2C,},
     983             :         {0x2C5D, 0x2C2D,},
     984             :         {0x2C5E, 0x2C2E,},
     985             :         {0x2C5F, 0x2C2F,},
     986             :         {0x2C61, 0x2C60,},
     987             :         {0x2C65, 0x023A,},
     988             :         {0x2C66, 0x023E,},
     989             :         {0x2C68, 0x2C67,},
     990             :         {0x2C6A, 0x2C69,},
     991             :         {0x2C6C, 0x2C6B,},
     992             :         {0x2C73, 0x2C72,},
     993             :         {0x2C76, 0x2C75,},
     994             :         {0x2C81, 0x2C80,},
     995             :         {0x2C83, 0x2C82,},
     996             :         {0x2C85, 0x2C84,},
     997             :         {0x2C87, 0x2C86,},
     998             :         {0x2C89, 0x2C88,},
     999             :         {0x2C8B, 0x2C8A,},
    1000             :         {0x2C8D, 0x2C8C,},
    1001             :         {0x2C8F, 0x2C8E,},
    1002             :         {0x2C91, 0x2C90,},
    1003             :         {0x2C93, 0x2C92,},
    1004             :         {0x2C95, 0x2C94,},
    1005             :         {0x2C97, 0x2C96,},
    1006             :         {0x2C99, 0x2C98,},
    1007             :         {0x2C9B, 0x2C9A,},
    1008             :         {0x2C9D, 0x2C9C,},
    1009             :         {0x2C9F, 0x2C9E,},
    1010             :         {0x2CA1, 0x2CA0,},
    1011             :         {0x2CA3, 0x2CA2,},
    1012             :         {0x2CA5, 0x2CA4,},
    1013             :         {0x2CA7, 0x2CA6,},
    1014             :         {0x2CA9, 0x2CA8,},
    1015             :         {0x2CAB, 0x2CAA,},
    1016             :         {0x2CAD, 0x2CAC,},
    1017             :         {0x2CAF, 0x2CAE,},
    1018             :         {0x2CB1, 0x2CB0,},
    1019             :         {0x2CB3, 0x2CB2,},
    1020             :         {0x2CB5, 0x2CB4,},
    1021             :         {0x2CB7, 0x2CB6,},
    1022             :         {0x2CB9, 0x2CB8,},
    1023             :         {0x2CBB, 0x2CBA,},
    1024             :         {0x2CBD, 0x2CBC,},
    1025             :         {0x2CBF, 0x2CBE,},
    1026             :         {0x2CC1, 0x2CC0,},
    1027             :         {0x2CC3, 0x2CC2,},
    1028             :         {0x2CC5, 0x2CC4,},
    1029             :         {0x2CC7, 0x2CC6,},
    1030             :         {0x2CC9, 0x2CC8,},
    1031             :         {0x2CCB, 0x2CCA,},
    1032             :         {0x2CCD, 0x2CCC,},
    1033             :         {0x2CCF, 0x2CCE,},
    1034             :         {0x2CD1, 0x2CD0,},
    1035             :         {0x2CD3, 0x2CD2,},
    1036             :         {0x2CD5, 0x2CD4,},
    1037             :         {0x2CD7, 0x2CD6,},
    1038             :         {0x2CD9, 0x2CD8,},
    1039             :         {0x2CDB, 0x2CDA,},
    1040             :         {0x2CDD, 0x2CDC,},
    1041             :         {0x2CDF, 0x2CDE,},
    1042             :         {0x2CE1, 0x2CE0,},
    1043             :         {0x2CE3, 0x2CE2,},
    1044             :         {0x2CEC, 0x2CEB,},
    1045             :         {0x2CEE, 0x2CED,},
    1046             :         {0x2CF3, 0x2CF2,},
    1047             :         {0x2D00, 0x10A0,},
    1048             :         {0x2D01, 0x10A1,},
    1049             :         {0x2D02, 0x10A2,},
    1050             :         {0x2D03, 0x10A3,},
    1051             :         {0x2D04, 0x10A4,},
    1052             :         {0x2D05, 0x10A5,},
    1053             :         {0x2D06, 0x10A6,},
    1054             :         {0x2D07, 0x10A7,},
    1055             :         {0x2D08, 0x10A8,},
    1056             :         {0x2D09, 0x10A9,},
    1057             :         {0x2D0A, 0x10AA,},
    1058             :         {0x2D0B, 0x10AB,},
    1059             :         {0x2D0C, 0x10AC,},
    1060             :         {0x2D0D, 0x10AD,},
    1061             :         {0x2D0E, 0x10AE,},
    1062             :         {0x2D0F, 0x10AF,},
    1063             :         {0x2D10, 0x10B0,},
    1064             :         {0x2D11, 0x10B1,},
    1065             :         {0x2D12, 0x10B2,},
    1066             :         {0x2D13, 0x10B3,},
    1067             :         {0x2D14, 0x10B4,},
    1068             :         {0x2D15, 0x10B5,},
    1069             :         {0x2D16, 0x10B6,},
    1070             :         {0x2D17, 0x10B7,},
    1071             :         {0x2D18, 0x10B8,},
    1072             :         {0x2D19, 0x10B9,},
    1073             :         {0x2D1A, 0x10BA,},
    1074             :         {0x2D1B, 0x10BB,},
    1075             :         {0x2D1C, 0x10BC,},
    1076             :         {0x2D1D, 0x10BD,},
    1077             :         {0x2D1E, 0x10BE,},
    1078             :         {0x2D1F, 0x10BF,},
    1079             :         {0x2D20, 0x10C0,},
    1080             :         {0x2D21, 0x10C1,},
    1081             :         {0x2D22, 0x10C2,},
    1082             :         {0x2D23, 0x10C3,},
    1083             :         {0x2D24, 0x10C4,},
    1084             :         {0x2D25, 0x10C5,},
    1085             :         {0x2D27, 0x10C7,},
    1086             :         {0x2D2D, 0x10CD,},
    1087             :         {0xA641, 0xA640,},
    1088             :         {0xA643, 0xA642,},
    1089             :         {0xA645, 0xA644,},
    1090             :         {0xA647, 0xA646,},
    1091             :         {0xA649, 0xA648,},
    1092             :         {0xA64B, 0xA64A,},
    1093             :         {0xA64D, 0xA64C,},
    1094             :         {0xA64F, 0xA64E,},
    1095             :         {0xA651, 0xA650,},
    1096             :         {0xA653, 0xA652,},
    1097             :         {0xA655, 0xA654,},
    1098             :         {0xA657, 0xA656,},
    1099             :         {0xA659, 0xA658,},
    1100             :         {0xA65B, 0xA65A,},
    1101             :         {0xA65D, 0xA65C,},
    1102             :         {0xA65F, 0xA65E,},
    1103             :         {0xA661, 0xA660,},
    1104             :         {0xA663, 0xA662,},
    1105             :         {0xA665, 0xA664,},
    1106             :         {0xA667, 0xA666,},
    1107             :         {0xA669, 0xA668,},
    1108             :         {0xA66B, 0xA66A,},
    1109             :         {0xA66D, 0xA66C,},
    1110             :         {0xA681, 0xA680,},
    1111             :         {0xA683, 0xA682,},
    1112             :         {0xA685, 0xA684,},
    1113             :         {0xA687, 0xA686,},
    1114             :         {0xA689, 0xA688,},
    1115             :         {0xA68B, 0xA68A,},
    1116             :         {0xA68D, 0xA68C,},
    1117             :         {0xA68F, 0xA68E,},
    1118             :         {0xA691, 0xA690,},
    1119             :         {0xA693, 0xA692,},
    1120             :         {0xA695, 0xA694,},
    1121             :         {0xA697, 0xA696,},
    1122             :         {0xA699, 0xA698,},
    1123             :         {0xA69B, 0xA69A,},
    1124             :         {0xA723, 0xA722,},
    1125             :         {0xA725, 0xA724,},
    1126             :         {0xA727, 0xA726,},
    1127             :         {0xA729, 0xA728,},
    1128             :         {0xA72B, 0xA72A,},
    1129             :         {0xA72D, 0xA72C,},
    1130             :         {0xA72F, 0xA72E,},
    1131             :         {0xA733, 0xA732,},
    1132             :         {0xA735, 0xA734,},
    1133             :         {0xA737, 0xA736,},
    1134             :         {0xA739, 0xA738,},
    1135             :         {0xA73B, 0xA73A,},
    1136             :         {0xA73D, 0xA73C,},
    1137             :         {0xA73F, 0xA73E,},
    1138             :         {0xA741, 0xA740,},
    1139             :         {0xA743, 0xA742,},
    1140             :         {0xA745, 0xA744,},
    1141             :         {0xA747, 0xA746,},
    1142             :         {0xA749, 0xA748,},
    1143             :         {0xA74B, 0xA74A,},
    1144             :         {0xA74D, 0xA74C,},
    1145             :         {0xA74F, 0xA74E,},
    1146             :         {0xA751, 0xA750,},
    1147             :         {0xA753, 0xA752,},
    1148             :         {0xA755, 0xA754,},
    1149             :         {0xA757, 0xA756,},
    1150             :         {0xA759, 0xA758,},
    1151             :         {0xA75B, 0xA75A,},
    1152             :         {0xA75D, 0xA75C,},
    1153             :         {0xA75F, 0xA75E,},
    1154             :         {0xA761, 0xA760,},
    1155             :         {0xA763, 0xA762,},
    1156             :         {0xA765, 0xA764,},
    1157             :         {0xA767, 0xA766,},
    1158             :         {0xA769, 0xA768,},
    1159             :         {0xA76B, 0xA76A,},
    1160             :         {0xA76D, 0xA76C,},
    1161             :         {0xA76F, 0xA76E,},
    1162             :         {0xA77A, 0xA779,},
    1163             :         {0xA77C, 0xA77B,},
    1164             :         {0xA77F, 0xA77E,},
    1165             :         {0xA781, 0xA780,},
    1166             :         {0xA783, 0xA782,},
    1167             :         {0xA785, 0xA784,},
    1168             :         {0xA787, 0xA786,},
    1169             :         {0xA78C, 0xA78B,},
    1170             :         {0xA791, 0xA790,},
    1171             :         {0xA793, 0xA792,},
    1172             :         {0xA794, 0xA7C4,},
    1173             :         {0xA797, 0xA796,},
    1174             :         {0xA799, 0xA798,},
    1175             :         {0xA79B, 0xA79A,},
    1176             :         {0xA79D, 0xA79C,},
    1177             :         {0xA79F, 0xA79E,},
    1178             :         {0xA7A1, 0xA7A0,},
    1179             :         {0xA7A3, 0xA7A2,},
    1180             :         {0xA7A5, 0xA7A4,},
    1181             :         {0xA7A7, 0xA7A6,},
    1182             :         {0xA7A9, 0xA7A8,},
    1183             :         {0xA7B5, 0xA7B4,},
    1184             :         {0xA7B7, 0xA7B6,},
    1185             :         {0xA7B9, 0xA7B8,},
    1186             :         {0xA7BB, 0xA7BA,},
    1187             :         {0xA7BD, 0xA7BC,},
    1188             :         {0xA7BF, 0xA7BE,},
    1189             :         {0xA7C1, 0xA7C0,},
    1190             :         {0xA7C3, 0xA7C2,},
    1191             :         {0xA7C8, 0xA7C7,},
    1192             :         {0xA7CA, 0xA7C9,},
    1193             :         {0xA7D1, 0xA7D0,},
    1194             :         {0xA7D7, 0xA7D6,},
    1195             :         {0xA7D9, 0xA7D8,},
    1196             :         {0xA7F6, 0xA7F5,},
    1197             :         {0xAB53, 0xA7B3,},
    1198             :         {0xAB70, 0x13A0,},
    1199             :         {0xAB71, 0x13A1,},
    1200             :         {0xAB72, 0x13A2,},
    1201             :         {0xAB73, 0x13A3,},
    1202             :         {0xAB74, 0x13A4,},
    1203             :         {0xAB75, 0x13A5,},
    1204             :         {0xAB76, 0x13A6,},
    1205             :         {0xAB77, 0x13A7,},
    1206             :         {0xAB78, 0x13A8,},
    1207             :         {0xAB79, 0x13A9,},
    1208             :         {0xAB7A, 0x13AA,},
    1209             :         {0xAB7B, 0x13AB,},
    1210             :         {0xAB7C, 0x13AC,},
    1211             :         {0xAB7D, 0x13AD,},
    1212             :         {0xAB7E, 0x13AE,},
    1213             :         {0xAB7F, 0x13AF,},
    1214             :         {0xAB80, 0x13B0,},
    1215             :         {0xAB81, 0x13B1,},
    1216             :         {0xAB82, 0x13B2,},
    1217             :         {0xAB83, 0x13B3,},
    1218             :         {0xAB84, 0x13B4,},
    1219             :         {0xAB85, 0x13B5,},
    1220             :         {0xAB86, 0x13B6,},
    1221             :         {0xAB87, 0x13B7,},
    1222             :         {0xAB88, 0x13B8,},
    1223             :         {0xAB89, 0x13B9,},
    1224             :         {0xAB8A, 0x13BA,},
    1225             :         {0xAB8B, 0x13BB,},
    1226             :         {0xAB8C, 0x13BC,},
    1227             :         {0xAB8D, 0x13BD,},
    1228             :         {0xAB8E, 0x13BE,},
    1229             :         {0xAB8F, 0x13BF,},
    1230             :         {0xAB90, 0x13C0,},
    1231             :         {0xAB91, 0x13C1,},
    1232             :         {0xAB92, 0x13C2,},
    1233             :         {0xAB93, 0x13C3,},
    1234             :         {0xAB94, 0x13C4,},
    1235             :         {0xAB95, 0x13C5,},
    1236             :         {0xAB96, 0x13C6,},
    1237             :         {0xAB97, 0x13C7,},
    1238             :         {0xAB98, 0x13C8,},
    1239             :         {0xAB99, 0x13C9,},
    1240             :         {0xAB9A, 0x13CA,},
    1241             :         {0xAB9B, 0x13CB,},
    1242             :         {0xAB9C, 0x13CC,},
    1243             :         {0xAB9D, 0x13CD,},
    1244             :         {0xAB9E, 0x13CE,},
    1245             :         {0xAB9F, 0x13CF,},
    1246             :         {0xABA0, 0x13D0,},
    1247             :         {0xABA1, 0x13D1,},
    1248             :         {0xABA2, 0x13D2,},
    1249             :         {0xABA3, 0x13D3,},
    1250             :         {0xABA4, 0x13D4,},
    1251             :         {0xABA5, 0x13D5,},
    1252             :         {0xABA6, 0x13D6,},
    1253             :         {0xABA7, 0x13D7,},
    1254             :         {0xABA8, 0x13D8,},
    1255             :         {0xABA9, 0x13D9,},
    1256             :         {0xABAA, 0x13DA,},
    1257             :         {0xABAB, 0x13DB,},
    1258             :         {0xABAC, 0x13DC,},
    1259             :         {0xABAD, 0x13DD,},
    1260             :         {0xABAE, 0x13DE,},
    1261             :         {0xABAF, 0x13DF,},
    1262             :         {0xABB0, 0x13E0,},
    1263             :         {0xABB1, 0x13E1,},
    1264             :         {0xABB2, 0x13E2,},
    1265             :         {0xABB3, 0x13E3,},
    1266             :         {0xABB4, 0x13E4,},
    1267             :         {0xABB5, 0x13E5,},
    1268             :         {0xABB6, 0x13E6,},
    1269             :         {0xABB7, 0x13E7,},
    1270             :         {0xABB8, 0x13E8,},
    1271             :         {0xABB9, 0x13E9,},
    1272             :         {0xABBA, 0x13EA,},
    1273             :         {0xABBB, 0x13EB,},
    1274             :         {0xABBC, 0x13EC,},
    1275             :         {0xABBD, 0x13ED,},
    1276             :         {0xABBE, 0x13EE,},
    1277             :         {0xABBF, 0x13EF,},
    1278             :         {0xFF41, 0xFF21,},
    1279             :         {0xFF42, 0xFF22,},
    1280             :         {0xFF43, 0xFF23,},
    1281             :         {0xFF44, 0xFF24,},
    1282             :         {0xFF45, 0xFF25,},
    1283             :         {0xFF46, 0xFF26,},
    1284             :         {0xFF47, 0xFF27,},
    1285             :         {0xFF48, 0xFF28,},
    1286             :         {0xFF49, 0xFF29,},
    1287             :         {0xFF4A, 0xFF2A,},
    1288             :         {0xFF4B, 0xFF2B,},
    1289             :         {0xFF4C, 0xFF2C,},
    1290             :         {0xFF4D, 0xFF2D,},
    1291             :         {0xFF4E, 0xFF2E,},
    1292             :         {0xFF4F, 0xFF2F,},
    1293             :         {0xFF50, 0xFF30,},
    1294             :         {0xFF51, 0xFF31,},
    1295             :         {0xFF52, 0xFF32,},
    1296             :         {0xFF53, 0xFF33,},
    1297             :         {0xFF54, 0xFF34,},
    1298             :         {0xFF55, 0xFF35,},
    1299             :         {0xFF56, 0xFF36,},
    1300             :         {0xFF57, 0xFF37,},
    1301             :         {0xFF58, 0xFF38,},
    1302             :         {0xFF59, 0xFF39,},
    1303             :         {0xFF5A, 0xFF3A,},
    1304             :         {0x10428, 0x10400,},
    1305             :         {0x10429, 0x10401,},
    1306             :         {0x1042A, 0x10402,},
    1307             :         {0x1042B, 0x10403,},
    1308             :         {0x1042C, 0x10404,},
    1309             :         {0x1042D, 0x10405,},
    1310             :         {0x1042E, 0x10406,},
    1311             :         {0x1042F, 0x10407,},
    1312             :         {0x10430, 0x10408,},
    1313             :         {0x10431, 0x10409,},
    1314             :         {0x10432, 0x1040A,},
    1315             :         {0x10433, 0x1040B,},
    1316             :         {0x10434, 0x1040C,},
    1317             :         {0x10435, 0x1040D,},
    1318             :         {0x10436, 0x1040E,},
    1319             :         {0x10437, 0x1040F,},
    1320             :         {0x10438, 0x10410,},
    1321             :         {0x10439, 0x10411,},
    1322             :         {0x1043A, 0x10412,},
    1323             :         {0x1043B, 0x10413,},
    1324             :         {0x1043C, 0x10414,},
    1325             :         {0x1043D, 0x10415,},
    1326             :         {0x1043E, 0x10416,},
    1327             :         {0x1043F, 0x10417,},
    1328             :         {0x10440, 0x10418,},
    1329             :         {0x10441, 0x10419,},
    1330             :         {0x10442, 0x1041A,},
    1331             :         {0x10443, 0x1041B,},
    1332             :         {0x10444, 0x1041C,},
    1333             :         {0x10445, 0x1041D,},
    1334             :         {0x10446, 0x1041E,},
    1335             :         {0x10447, 0x1041F,},
    1336             :         {0x10448, 0x10420,},
    1337             :         {0x10449, 0x10421,},
    1338             :         {0x1044A, 0x10422,},
    1339             :         {0x1044B, 0x10423,},
    1340             :         {0x1044C, 0x10424,},
    1341             :         {0x1044D, 0x10425,},
    1342             :         {0x1044E, 0x10426,},
    1343             :         {0x1044F, 0x10427,},
    1344             :         {0x104D8, 0x104B0,},
    1345             :         {0x104D9, 0x104B1,},
    1346             :         {0x104DA, 0x104B2,},
    1347             :         {0x104DB, 0x104B3,},
    1348             :         {0x104DC, 0x104B4,},
    1349             :         {0x104DD, 0x104B5,},
    1350             :         {0x104DE, 0x104B6,},
    1351             :         {0x104DF, 0x104B7,},
    1352             :         {0x104E0, 0x104B8,},
    1353             :         {0x104E1, 0x104B9,},
    1354             :         {0x104E2, 0x104BA,},
    1355             :         {0x104E3, 0x104BB,},
    1356             :         {0x104E4, 0x104BC,},
    1357             :         {0x104E5, 0x104BD,},
    1358             :         {0x104E6, 0x104BE,},
    1359             :         {0x104E7, 0x104BF,},
    1360             :         {0x104E8, 0x104C0,},
    1361             :         {0x104E9, 0x104C1,},
    1362             :         {0x104EA, 0x104C2,},
    1363             :         {0x104EB, 0x104C3,},
    1364             :         {0x104EC, 0x104C4,},
    1365             :         {0x104ED, 0x104C5,},
    1366             :         {0x104EE, 0x104C6,},
    1367             :         {0x104EF, 0x104C7,},
    1368             :         {0x104F0, 0x104C8,},
    1369             :         {0x104F1, 0x104C9,},
    1370             :         {0x104F2, 0x104CA,},
    1371             :         {0x104F3, 0x104CB,},
    1372             :         {0x104F4, 0x104CC,},
    1373             :         {0x104F5, 0x104CD,},
    1374             :         {0x104F6, 0x104CE,},
    1375             :         {0x104F7, 0x104CF,},
    1376             :         {0x104F8, 0x104D0,},
    1377             :         {0x104F9, 0x104D1,},
    1378             :         {0x104FA, 0x104D2,},
    1379             :         {0x104FB, 0x104D3,},
    1380             :         {0x10597, 0x10570,},
    1381             :         {0x10598, 0x10571,},
    1382             :         {0x10599, 0x10572,},
    1383             :         {0x1059A, 0x10573,},
    1384             :         {0x1059B, 0x10574,},
    1385             :         {0x1059C, 0x10575,},
    1386             :         {0x1059D, 0x10576,},
    1387             :         {0x1059E, 0x10577,},
    1388             :         {0x1059F, 0x10578,},
    1389             :         {0x105A0, 0x10579,},
    1390             :         {0x105A1, 0x1057A,},
    1391             :         {0x105A3, 0x1057C,},
    1392             :         {0x105A4, 0x1057D,},
    1393             :         {0x105A5, 0x1057E,},
    1394             :         {0x105A6, 0x1057F,},
    1395             :         {0x105A7, 0x10580,},
    1396             :         {0x105A8, 0x10581,},
    1397             :         {0x105A9, 0x10582,},
    1398             :         {0x105AA, 0x10583,},
    1399             :         {0x105AB, 0x10584,},
    1400             :         {0x105AC, 0x10585,},
    1401             :         {0x105AD, 0x10586,},
    1402             :         {0x105AE, 0x10587,},
    1403             :         {0x105AF, 0x10588,},
    1404             :         {0x105B0, 0x10589,},
    1405             :         {0x105B1, 0x1058A,},
    1406             :         {0x105B3, 0x1058C,},
    1407             :         {0x105B4, 0x1058D,},
    1408             :         {0x105B5, 0x1058E,},
    1409             :         {0x105B6, 0x1058F,},
    1410             :         {0x105B7, 0x10590,},
    1411             :         {0x105B8, 0x10591,},
    1412             :         {0x105B9, 0x10592,},
    1413             :         {0x105BB, 0x10594,},
    1414             :         {0x105BC, 0x10595,},
    1415             :         {0x10CC0, 0x10C80,},
    1416             :         {0x10CC1, 0x10C81,},
    1417             :         {0x10CC2, 0x10C82,},
    1418             :         {0x10CC3, 0x10C83,},
    1419             :         {0x10CC4, 0x10C84,},
    1420             :         {0x10CC5, 0x10C85,},
    1421             :         {0x10CC6, 0x10C86,},
    1422             :         {0x10CC7, 0x10C87,},
    1423             :         {0x10CC8, 0x10C88,},
    1424             :         {0x10CC9, 0x10C89,},
    1425             :         {0x10CCA, 0x10C8A,},
    1426             :         {0x10CCB, 0x10C8B,},
    1427             :         {0x10CCC, 0x10C8C,},
    1428             :         {0x10CCD, 0x10C8D,},
    1429             :         {0x10CCE, 0x10C8E,},
    1430             :         {0x10CCF, 0x10C8F,},
    1431             :         {0x10CD0, 0x10C90,},
    1432             :         {0x10CD1, 0x10C91,},
    1433             :         {0x10CD2, 0x10C92,},
    1434             :         {0x10CD3, 0x10C93,},
    1435             :         {0x10CD4, 0x10C94,},
    1436             :         {0x10CD5, 0x10C95,},
    1437             :         {0x10CD6, 0x10C96,},
    1438             :         {0x10CD7, 0x10C97,},
    1439             :         {0x10CD8, 0x10C98,},
    1440             :         {0x10CD9, 0x10C99,},
    1441             :         {0x10CDA, 0x10C9A,},
    1442             :         {0x10CDB, 0x10C9B,},
    1443             :         {0x10CDC, 0x10C9C,},
    1444             :         {0x10CDD, 0x10C9D,},
    1445             :         {0x10CDE, 0x10C9E,},
    1446             :         {0x10CDF, 0x10C9F,},
    1447             :         {0x10CE0, 0x10CA0,},
    1448             :         {0x10CE1, 0x10CA1,},
    1449             :         {0x10CE2, 0x10CA2,},
    1450             :         {0x10CE3, 0x10CA3,},
    1451             :         {0x10CE4, 0x10CA4,},
    1452             :         {0x10CE5, 0x10CA5,},
    1453             :         {0x10CE6, 0x10CA6,},
    1454             :         {0x10CE7, 0x10CA7,},
    1455             :         {0x10CE8, 0x10CA8,},
    1456             :         {0x10CE9, 0x10CA9,},
    1457             :         {0x10CEA, 0x10CAA,},
    1458             :         {0x10CEB, 0x10CAB,},
    1459             :         {0x10CEC, 0x10CAC,},
    1460             :         {0x10CED, 0x10CAD,},
    1461             :         {0x10CEE, 0x10CAE,},
    1462             :         {0x10CEF, 0x10CAF,},
    1463             :         {0x10CF0, 0x10CB0,},
    1464             :         {0x10CF1, 0x10CB1,},
    1465             :         {0x10CF2, 0x10CB2,},
    1466             :         {0x118C0, 0x118A0,},
    1467             :         {0x118C1, 0x118A1,},
    1468             :         {0x118C2, 0x118A2,},
    1469             :         {0x118C3, 0x118A3,},
    1470             :         {0x118C4, 0x118A4,},
    1471             :         {0x118C5, 0x118A5,},
    1472             :         {0x118C6, 0x118A6,},
    1473             :         {0x118C7, 0x118A7,},
    1474             :         {0x118C8, 0x118A8,},
    1475             :         {0x118C9, 0x118A9,},
    1476             :         {0x118CA, 0x118AA,},
    1477             :         {0x118CB, 0x118AB,},
    1478             :         {0x118CC, 0x118AC,},
    1479             :         {0x118CD, 0x118AD,},
    1480             :         {0x118CE, 0x118AE,},
    1481             :         {0x118CF, 0x118AF,},
    1482             :         {0x118D0, 0x118B0,},
    1483             :         {0x118D1, 0x118B1,},
    1484             :         {0x118D2, 0x118B2,},
    1485             :         {0x118D3, 0x118B3,},
    1486             :         {0x118D4, 0x118B4,},
    1487             :         {0x118D5, 0x118B5,},
    1488             :         {0x118D6, 0x118B6,},
    1489             :         {0x118D7, 0x118B7,},
    1490             :         {0x118D8, 0x118B8,},
    1491             :         {0x118D9, 0x118B9,},
    1492             :         {0x118DA, 0x118BA,},
    1493             :         {0x118DB, 0x118BB,},
    1494             :         {0x118DC, 0x118BC,},
    1495             :         {0x118DD, 0x118BD,},
    1496             :         {0x118DE, 0x118BE,},
    1497             :         {0x118DF, 0x118BF,},
    1498             :         {0x16E60, 0x16E40,},
    1499             :         {0x16E61, 0x16E41,},
    1500             :         {0x16E62, 0x16E42,},
    1501             :         {0x16E63, 0x16E43,},
    1502             :         {0x16E64, 0x16E44,},
    1503             :         {0x16E65, 0x16E45,},
    1504             :         {0x16E66, 0x16E46,},
    1505             :         {0x16E67, 0x16E47,},
    1506             :         {0x16E68, 0x16E48,},
    1507             :         {0x16E69, 0x16E49,},
    1508             :         {0x16E6A, 0x16E4A,},
    1509             :         {0x16E6B, 0x16E4B,},
    1510             :         {0x16E6C, 0x16E4C,},
    1511             :         {0x16E6D, 0x16E4D,},
    1512             :         {0x16E6E, 0x16E4E,},
    1513             :         {0x16E6F, 0x16E4F,},
    1514             :         {0x16E70, 0x16E50,},
    1515             :         {0x16E71, 0x16E51,},
    1516             :         {0x16E72, 0x16E52,},
    1517             :         {0x16E73, 0x16E53,},
    1518             :         {0x16E74, 0x16E54,},
    1519             :         {0x16E75, 0x16E55,},
    1520             :         {0x16E76, 0x16E56,},
    1521             :         {0x16E77, 0x16E57,},
    1522             :         {0x16E78, 0x16E58,},
    1523             :         {0x16E79, 0x16E59,},
    1524             :         {0x16E7A, 0x16E5A,},
    1525             :         {0x16E7B, 0x16E5B,},
    1526             :         {0x16E7C, 0x16E5C,},
    1527             :         {0x16E7D, 0x16E5D,},
    1528             :         {0x16E7E, 0x16E5E,},
    1529             :         {0x16E7F, 0x16E5F,},
    1530             :         {0x1E922, 0x1E900,},
    1531             :         {0x1E923, 0x1E901,},
    1532             :         {0x1E924, 0x1E902,},
    1533             :         {0x1E925, 0x1E903,},
    1534             :         {0x1E926, 0x1E904,},
    1535             :         {0x1E927, 0x1E905,},
    1536             :         {0x1E928, 0x1E906,},
    1537             :         {0x1E929, 0x1E907,},
    1538             :         {0x1E92A, 0x1E908,},
    1539             :         {0x1E92B, 0x1E909,},
    1540             :         {0x1E92C, 0x1E90A,},
    1541             :         {0x1E92D, 0x1E90B,},
    1542             :         {0x1E92E, 0x1E90C,},
    1543             :         {0x1E92F, 0x1E90D,},
    1544             :         {0x1E930, 0x1E90E,},
    1545             :         {0x1E931, 0x1E90F,},
    1546             :         {0x1E932, 0x1E910,},
    1547             :         {0x1E933, 0x1E911,},
    1548             :         {0x1E934, 0x1E912,},
    1549             :         {0x1E935, 0x1E913,},
    1550             :         {0x1E936, 0x1E914,},
    1551             :         {0x1E937, 0x1E915,},
    1552             :         {0x1E938, 0x1E916,},
    1553             :         {0x1E939, 0x1E917,},
    1554             :         {0x1E93A, 0x1E918,},
    1555             :         {0x1E93B, 0x1E919,},
    1556             :         {0x1E93C, 0x1E91A,},
    1557             :         {0x1E93D, 0x1E91B,},
    1558             :         {0x1E93E, 0x1E91C,},
    1559             :         {0x1E93F, 0x1E91D,},
    1560             :         {0x1E940, 0x1E91E,},
    1561             :         {0x1E941, 0x1E91F,},
    1562             :         {0x1E942, 0x1E920,},
    1563             :         {0x1E943, 0x1E921,},
    1564             : }, UTF8_toLower[] = {                   /* code points with non-null lowercase conversion */
    1565             :         {0x0041, 0x0061,},
    1566             :         {0x0042, 0x0062,},
    1567             :         {0x0043, 0x0063,},
    1568             :         {0x0044, 0x0064,},
    1569             :         {0x0045, 0x0065,},
    1570             :         {0x0046, 0x0066,},
    1571             :         {0x0047, 0x0067,},
    1572             :         {0x0048, 0x0068,},
    1573             :         {0x0049, 0x0069,},
    1574             :         {0x004A, 0x006A,},
    1575             :         {0x004B, 0x006B,},
    1576             :         {0x004C, 0x006C,},
    1577             :         {0x004D, 0x006D,},
    1578             :         {0x004E, 0x006E,},
    1579             :         {0x004F, 0x006F,},
    1580             :         {0x0050, 0x0070,},
    1581             :         {0x0051, 0x0071,},
    1582             :         {0x0052, 0x0072,},
    1583             :         {0x0053, 0x0073,},
    1584             :         {0x0054, 0x0074,},
    1585             :         {0x0055, 0x0075,},
    1586             :         {0x0056, 0x0076,},
    1587             :         {0x0057, 0x0077,},
    1588             :         {0x0058, 0x0078,},
    1589             :         {0x0059, 0x0079,},
    1590             :         {0x005A, 0x007A,},
    1591             :         {0x00C0, 0x00E0,},
    1592             :         {0x00C1, 0x00E1,},
    1593             :         {0x00C2, 0x00E2,},
    1594             :         {0x00C3, 0x00E3,},
    1595             :         {0x00C4, 0x00E4,},
    1596             :         {0x00C5, 0x00E5,},
    1597             :         {0x00C6, 0x00E6,},
    1598             :         {0x00C7, 0x00E7,},
    1599             :         {0x00C8, 0x00E8,},
    1600             :         {0x00C9, 0x00E9,},
    1601             :         {0x00CA, 0x00EA,},
    1602             :         {0x00CB, 0x00EB,},
    1603             :         {0x00CC, 0x00EC,},
    1604             :         {0x00CD, 0x00ED,},
    1605             :         {0x00CE, 0x00EE,},
    1606             :         {0x00CF, 0x00EF,},
    1607             :         {0x00D0, 0x00F0,},
    1608             :         {0x00D1, 0x00F1,},
    1609             :         {0x00D2, 0x00F2,},
    1610             :         {0x00D3, 0x00F3,},
    1611             :         {0x00D4, 0x00F4,},
    1612             :         {0x00D5, 0x00F5,},
    1613             :         {0x00D6, 0x00F6,},
    1614             :         {0x00D8, 0x00F8,},
    1615             :         {0x00D9, 0x00F9,},
    1616             :         {0x00DA, 0x00FA,},
    1617             :         {0x00DB, 0x00FB,},
    1618             :         {0x00DC, 0x00FC,},
    1619             :         {0x00DD, 0x00FD,},
    1620             :         {0x00DE, 0x00FE,},
    1621             :         {0x0100, 0x0101,},
    1622             :         {0x0102, 0x0103,},
    1623             :         {0x0104, 0x0105,},
    1624             :         {0x0106, 0x0107,},
    1625             :         {0x0108, 0x0109,},
    1626             :         {0x010A, 0x010B,},
    1627             :         {0x010C, 0x010D,},
    1628             :         {0x010E, 0x010F,},
    1629             :         {0x0110, 0x0111,},
    1630             :         {0x0112, 0x0113,},
    1631             :         {0x0114, 0x0115,},
    1632             :         {0x0116, 0x0117,},
    1633             :         {0x0118, 0x0119,},
    1634             :         {0x011A, 0x011B,},
    1635             :         {0x011C, 0x011D,},
    1636             :         {0x011E, 0x011F,},
    1637             :         {0x0120, 0x0121,},
    1638             :         {0x0122, 0x0123,},
    1639             :         {0x0124, 0x0125,},
    1640             :         {0x0126, 0x0127,},
    1641             :         {0x0128, 0x0129,},
    1642             :         {0x012A, 0x012B,},
    1643             :         {0x012C, 0x012D,},
    1644             :         {0x012E, 0x012F,},
    1645             :         {0x0130, 0x0069,},
    1646             :         {0x0132, 0x0133,},
    1647             :         {0x0134, 0x0135,},
    1648             :         {0x0136, 0x0137,},
    1649             :         {0x0139, 0x013A,},
    1650             :         {0x013B, 0x013C,},
    1651             :         {0x013D, 0x013E,},
    1652             :         {0x013F, 0x0140,},
    1653             :         {0x0141, 0x0142,},
    1654             :         {0x0143, 0x0144,},
    1655             :         {0x0145, 0x0146,},
    1656             :         {0x0147, 0x0148,},
    1657             :         {0x014A, 0x014B,},
    1658             :         {0x014C, 0x014D,},
    1659             :         {0x014E, 0x014F,},
    1660             :         {0x0150, 0x0151,},
    1661             :         {0x0152, 0x0153,},
    1662             :         {0x0154, 0x0155,},
    1663             :         {0x0156, 0x0157,},
    1664             :         {0x0158, 0x0159,},
    1665             :         {0x015A, 0x015B,},
    1666             :         {0x015C, 0x015D,},
    1667             :         {0x015E, 0x015F,},
    1668             :         {0x0160, 0x0161,},
    1669             :         {0x0162, 0x0163,},
    1670             :         {0x0164, 0x0165,},
    1671             :         {0x0166, 0x0167,},
    1672             :         {0x0168, 0x0169,},
    1673             :         {0x016A, 0x016B,},
    1674             :         {0x016C, 0x016D,},
    1675             :         {0x016E, 0x016F,},
    1676             :         {0x0170, 0x0171,},
    1677             :         {0x0172, 0x0173,},
    1678             :         {0x0174, 0x0175,},
    1679             :         {0x0176, 0x0177,},
    1680             :         {0x0178, 0x00FF,},
    1681             :         {0x0179, 0x017A,},
    1682             :         {0x017B, 0x017C,},
    1683             :         {0x017D, 0x017E,},
    1684             :         {0x0181, 0x0253,},
    1685             :         {0x0182, 0x0183,},
    1686             :         {0x0184, 0x0185,},
    1687             :         {0x0186, 0x0254,},
    1688             :         {0x0187, 0x0188,},
    1689             :         {0x0189, 0x0256,},
    1690             :         {0x018A, 0x0257,},
    1691             :         {0x018B, 0x018C,},
    1692             :         {0x018E, 0x01DD,},
    1693             :         {0x018F, 0x0259,},
    1694             :         {0x0190, 0x025B,},
    1695             :         {0x0191, 0x0192,},
    1696             :         {0x0193, 0x0260,},
    1697             :         {0x0194, 0x0263,},
    1698             :         {0x0196, 0x0269,},
    1699             :         {0x0197, 0x0268,},
    1700             :         {0x0198, 0x0199,},
    1701             :         {0x019C, 0x026F,},
    1702             :         {0x019D, 0x0272,},
    1703             :         {0x019F, 0x0275,},
    1704             :         {0x01A0, 0x01A1,},
    1705             :         {0x01A2, 0x01A3,},
    1706             :         {0x01A4, 0x01A5,},
    1707             :         {0x01A6, 0x0280,},
    1708             :         {0x01A7, 0x01A8,},
    1709             :         {0x01A9, 0x0283,},
    1710             :         {0x01AC, 0x01AD,},
    1711             :         {0x01AE, 0x0288,},
    1712             :         {0x01AF, 0x01B0,},
    1713             :         {0x01B1, 0x028A,},
    1714             :         {0x01B2, 0x028B,},
    1715             :         {0x01B3, 0x01B4,},
    1716             :         {0x01B5, 0x01B6,},
    1717             :         {0x01B7, 0x0292,},
    1718             :         {0x01B8, 0x01B9,},
    1719             :         {0x01BC, 0x01BD,},
    1720             :         {0x01C4, 0x01C6,},
    1721             :         {0x01C5, 0x01C6,},
    1722             :         {0x01C7, 0x01C9,},
    1723             :         {0x01C8, 0x01C9,},
    1724             :         {0x01CA, 0x01CC,},
    1725             :         {0x01CB, 0x01CC,},
    1726             :         {0x01CD, 0x01CE,},
    1727             :         {0x01CF, 0x01D0,},
    1728             :         {0x01D1, 0x01D2,},
    1729             :         {0x01D3, 0x01D4,},
    1730             :         {0x01D5, 0x01D6,},
    1731             :         {0x01D7, 0x01D8,},
    1732             :         {0x01D9, 0x01DA,},
    1733             :         {0x01DB, 0x01DC,},
    1734             :         {0x01DE, 0x01DF,},
    1735             :         {0x01E0, 0x01E1,},
    1736             :         {0x01E2, 0x01E3,},
    1737             :         {0x01E4, 0x01E5,},
    1738             :         {0x01E6, 0x01E7,},
    1739             :         {0x01E8, 0x01E9,},
    1740             :         {0x01EA, 0x01EB,},
    1741             :         {0x01EC, 0x01ED,},
    1742             :         {0x01EE, 0x01EF,},
    1743             :         {0x01F1, 0x01F3,},
    1744             :         {0x01F2, 0x01F3,},
    1745             :         {0x01F4, 0x01F5,},
    1746             :         {0x01F6, 0x0195,},
    1747             :         {0x01F7, 0x01BF,},
    1748             :         {0x01F8, 0x01F9,},
    1749             :         {0x01FA, 0x01FB,},
    1750             :         {0x01FC, 0x01FD,},
    1751             :         {0x01FE, 0x01FF,},
    1752             :         {0x0200, 0x0201,},
    1753             :         {0x0202, 0x0203,},
    1754             :         {0x0204, 0x0205,},
    1755             :         {0x0206, 0x0207,},
    1756             :         {0x0208, 0x0209,},
    1757             :         {0x020A, 0x020B,},
    1758             :         {0x020C, 0x020D,},
    1759             :         {0x020E, 0x020F,},
    1760             :         {0x0210, 0x0211,},
    1761             :         {0x0212, 0x0213,},
    1762             :         {0x0214, 0x0215,},
    1763             :         {0x0216, 0x0217,},
    1764             :         {0x0218, 0x0219,},
    1765             :         {0x021A, 0x021B,},
    1766             :         {0x021C, 0x021D,},
    1767             :         {0x021E, 0x021F,},
    1768             :         {0x0220, 0x019E,},
    1769             :         {0x0222, 0x0223,},
    1770             :         {0x0224, 0x0225,},
    1771             :         {0x0226, 0x0227,},
    1772             :         {0x0228, 0x0229,},
    1773             :         {0x022A, 0x022B,},
    1774             :         {0x022C, 0x022D,},
    1775             :         {0x022E, 0x022F,},
    1776             :         {0x0230, 0x0231,},
    1777             :         {0x0232, 0x0233,},
    1778             :         {0x023A, 0x2C65,},
    1779             :         {0x023B, 0x023C,},
    1780             :         {0x023D, 0x019A,},
    1781             :         {0x023E, 0x2C66,},
    1782             :         {0x0241, 0x0242,},
    1783             :         {0x0243, 0x0180,},
    1784             :         {0x0244, 0x0289,},
    1785             :         {0x0245, 0x028C,},
    1786             :         {0x0246, 0x0247,},
    1787             :         {0x0248, 0x0249,},
    1788             :         {0x024A, 0x024B,},
    1789             :         {0x024C, 0x024D,},
    1790             :         {0x024E, 0x024F,},
    1791             :         {0x0370, 0x0371,},
    1792             :         {0x0372, 0x0373,},
    1793             :         {0x0376, 0x0377,},
    1794             :         {0x037F, 0x03F3,},
    1795             :         {0x0386, 0x03AC,},
    1796             :         {0x0388, 0x03AD,},
    1797             :         {0x0389, 0x03AE,},
    1798             :         {0x038A, 0x03AF,},
    1799             :         {0x038C, 0x03CC,},
    1800             :         {0x038E, 0x03CD,},
    1801             :         {0x038F, 0x03CE,},
    1802             :         {0x0391, 0x03B1,},
    1803             :         {0x0392, 0x03B2,},
    1804             :         {0x0393, 0x03B3,},
    1805             :         {0x0394, 0x03B4,},
    1806             :         {0x0395, 0x03B5,},
    1807             :         {0x0396, 0x03B6,},
    1808             :         {0x0397, 0x03B7,},
    1809             :         {0x0398, 0x03B8,},
    1810             :         {0x0399, 0x03B9,},
    1811             :         {0x039A, 0x03BA,},
    1812             :         {0x039B, 0x03BB,},
    1813             :         {0x039C, 0x03BC,},
    1814             :         {0x039D, 0x03BD,},
    1815             :         {0x039E, 0x03BE,},
    1816             :         {0x039F, 0x03BF,},
    1817             :         {0x03A0, 0x03C0,},
    1818             :         {0x03A1, 0x03C1,},
    1819             :         {0x03A3, 0x03C3,},
    1820             :         {0x03A4, 0x03C4,},
    1821             :         {0x03A5, 0x03C5,},
    1822             :         {0x03A6, 0x03C6,},
    1823             :         {0x03A7, 0x03C7,},
    1824             :         {0x03A8, 0x03C8,},
    1825             :         {0x03A9, 0x03C9,},
    1826             :         {0x03AA, 0x03CA,},
    1827             :         {0x03AB, 0x03CB,},
    1828             :         {0x03CF, 0x03D7,},
    1829             :         {0x03D8, 0x03D9,},
    1830             :         {0x03DA, 0x03DB,},
    1831             :         {0x03DC, 0x03DD,},
    1832             :         {0x03DE, 0x03DF,},
    1833             :         {0x03E0, 0x03E1,},
    1834             :         {0x03E2, 0x03E3,},
    1835             :         {0x03E4, 0x03E5,},
    1836             :         {0x03E6, 0x03E7,},
    1837             :         {0x03E8, 0x03E9,},
    1838             :         {0x03EA, 0x03EB,},
    1839             :         {0x03EC, 0x03ED,},
    1840             :         {0x03EE, 0x03EF,},
    1841             :         {0x03F4, 0x03B8,},
    1842             :         {0x03F7, 0x03F8,},
    1843             :         {0x03F9, 0x03F2,},
    1844             :         {0x03FA, 0x03FB,},
    1845             :         {0x03FD, 0x037B,},
    1846             :         {0x03FE, 0x037C,},
    1847             :         {0x03FF, 0x037D,},
    1848             :         {0x0400, 0x0450,},
    1849             :         {0x0401, 0x0451,},
    1850             :         {0x0402, 0x0452,},
    1851             :         {0x0403, 0x0453,},
    1852             :         {0x0404, 0x0454,},
    1853             :         {0x0405, 0x0455,},
    1854             :         {0x0406, 0x0456,},
    1855             :         {0x0407, 0x0457,},
    1856             :         {0x0408, 0x0458,},
    1857             :         {0x0409, 0x0459,},
    1858             :         {0x040A, 0x045A,},
    1859             :         {0x040B, 0x045B,},
    1860             :         {0x040C, 0x045C,},
    1861             :         {0x040D, 0x045D,},
    1862             :         {0x040E, 0x045E,},
    1863             :         {0x040F, 0x045F,},
    1864             :         {0x0410, 0x0430,},
    1865             :         {0x0411, 0x0431,},
    1866             :         {0x0412, 0x0432,},
    1867             :         {0x0413, 0x0433,},
    1868             :         {0x0414, 0x0434,},
    1869             :         {0x0415, 0x0435,},
    1870             :         {0x0416, 0x0436,},
    1871             :         {0x0417, 0x0437,},
    1872             :         {0x0418, 0x0438,},
    1873             :         {0x0419, 0x0439,},
    1874             :         {0x041A, 0x043A,},
    1875             :         {0x041B, 0x043B,},
    1876             :         {0x041C, 0x043C,},
    1877             :         {0x041D, 0x043D,},
    1878             :         {0x041E, 0x043E,},
    1879             :         {0x041F, 0x043F,},
    1880             :         {0x0420, 0x0440,},
    1881             :         {0x0421, 0x0441,},
    1882             :         {0x0422, 0x0442,},
    1883             :         {0x0423, 0x0443,},
    1884             :         {0x0424, 0x0444,},
    1885             :         {0x0425, 0x0445,},
    1886             :         {0x0426, 0x0446,},
    1887             :         {0x0427, 0x0447,},
    1888             :         {0x0428, 0x0448,},
    1889             :         {0x0429, 0x0449,},
    1890             :         {0x042A, 0x044A,},
    1891             :         {0x042B, 0x044B,},
    1892             :         {0x042C, 0x044C,},
    1893             :         {0x042D, 0x044D,},
    1894             :         {0x042E, 0x044E,},
    1895             :         {0x042F, 0x044F,},
    1896             :         {0x0460, 0x0461,},
    1897             :         {0x0462, 0x0463,},
    1898             :         {0x0464, 0x0465,},
    1899             :         {0x0466, 0x0467,},
    1900             :         {0x0468, 0x0469,},
    1901             :         {0x046A, 0x046B,},
    1902             :         {0x046C, 0x046D,},
    1903             :         {0x046E, 0x046F,},
    1904             :         {0x0470, 0x0471,},
    1905             :         {0x0472, 0x0473,},
    1906             :         {0x0474, 0x0475,},
    1907             :         {0x0476, 0x0477,},
    1908             :         {0x0478, 0x0479,},
    1909             :         {0x047A, 0x047B,},
    1910             :         {0x047C, 0x047D,},
    1911             :         {0x047E, 0x047F,},
    1912             :         {0x0480, 0x0481,},
    1913             :         {0x048A, 0x048B,},
    1914             :         {0x048C, 0x048D,},
    1915             :         {0x048E, 0x048F,},
    1916             :         {0x0490, 0x0491,},
    1917             :         {0x0492, 0x0493,},
    1918             :         {0x0494, 0x0495,},
    1919             :         {0x0496, 0x0497,},
    1920             :         {0x0498, 0x0499,},
    1921             :         {0x049A, 0x049B,},
    1922             :         {0x049C, 0x049D,},
    1923             :         {0x049E, 0x049F,},
    1924             :         {0x04A0, 0x04A1,},
    1925             :         {0x04A2, 0x04A3,},
    1926             :         {0x04A4, 0x04A5,},
    1927             :         {0x04A6, 0x04A7,},
    1928             :         {0x04A8, 0x04A9,},
    1929             :         {0x04AA, 0x04AB,},
    1930             :         {0x04AC, 0x04AD,},
    1931             :         {0x04AE, 0x04AF,},
    1932             :         {0x04B0, 0x04B1,},
    1933             :         {0x04B2, 0x04B3,},
    1934             :         {0x04B4, 0x04B5,},
    1935             :         {0x04B6, 0x04B7,},
    1936             :         {0x04B8, 0x04B9,},
    1937             :         {0x04BA, 0x04BB,},
    1938             :         {0x04BC, 0x04BD,},
    1939             :         {0x04BE, 0x04BF,},
    1940             :         {0x04C0, 0x04CF,},
    1941             :         {0x04C1, 0x04C2,},
    1942             :         {0x04C3, 0x04C4,},
    1943             :         {0x04C5, 0x04C6,},
    1944             :         {0x04C7, 0x04C8,},
    1945             :         {0x04C9, 0x04CA,},
    1946             :         {0x04CB, 0x04CC,},
    1947             :         {0x04CD, 0x04CE,},
    1948             :         {0x04D0, 0x04D1,},
    1949             :         {0x04D2, 0x04D3,},
    1950             :         {0x04D4, 0x04D5,},
    1951             :         {0x04D6, 0x04D7,},
    1952             :         {0x04D8, 0x04D9,},
    1953             :         {0x04DA, 0x04DB,},
    1954             :         {0x04DC, 0x04DD,},
    1955             :         {0x04DE, 0x04DF,},
    1956             :         {0x04E0, 0x04E1,},
    1957             :         {0x04E2, 0x04E3,},
    1958             :         {0x04E4, 0x04E5,},
    1959             :         {0x04E6, 0x04E7,},
    1960             :         {0x04E8, 0x04E9,},
    1961             :         {0x04EA, 0x04EB,},
    1962             :         {0x04EC, 0x04ED,},
    1963             :         {0x04EE, 0x04EF,},
    1964             :         {0x04F0, 0x04F1,},
    1965             :         {0x04F2, 0x04F3,},
    1966             :         {0x04F4, 0x04F5,},
    1967             :         {0x04F6, 0x04F7,},
    1968             :         {0x04F8, 0x04F9,},
    1969             :         {0x04FA, 0x04FB,},
    1970             :         {0x04FC, 0x04FD,},
    1971             :         {0x04FE, 0x04FF,},
    1972             :         {0x0500, 0x0501,},
    1973             :         {0x0502, 0x0503,},
    1974             :         {0x0504, 0x0505,},
    1975             :         {0x0506, 0x0507,},
    1976             :         {0x0508, 0x0509,},
    1977             :         {0x050A, 0x050B,},
    1978             :         {0x050C, 0x050D,},
    1979             :         {0x050E, 0x050F,},
    1980             :         {0x0510, 0x0511,},
    1981             :         {0x0512, 0x0513,},
    1982             :         {0x0514, 0x0515,},
    1983             :         {0x0516, 0x0517,},
    1984             :         {0x0518, 0x0519,},
    1985             :         {0x051A, 0x051B,},
    1986             :         {0x051C, 0x051D,},
    1987             :         {0x051E, 0x051F,},
    1988             :         {0x0520, 0x0521,},
    1989             :         {0x0522, 0x0523,},
    1990             :         {0x0524, 0x0525,},
    1991             :         {0x0526, 0x0527,},
    1992             :         {0x0528, 0x0529,},
    1993             :         {0x052A, 0x052B,},
    1994             :         {0x052C, 0x052D,},
    1995             :         {0x052E, 0x052F,},
    1996             :         {0x0531, 0x0561,},
    1997             :         {0x0532, 0x0562,},
    1998             :         {0x0533, 0x0563,},
    1999             :         {0x0534, 0x0564,},
    2000             :         {0x0535, 0x0565,},
    2001             :         {0x0536, 0x0566,},
    2002             :         {0x0537, 0x0567,},
    2003             :         {0x0538, 0x0568,},
    2004             :         {0x0539, 0x0569,},
    2005             :         {0x053A, 0x056A,},
    2006             :         {0x053B, 0x056B,},
    2007             :         {0x053C, 0x056C,},
    2008             :         {0x053D, 0x056D,},
    2009             :         {0x053E, 0x056E,},
    2010             :         {0x053F, 0x056F,},
    2011             :         {0x0540, 0x0570,},
    2012             :         {0x0541, 0x0571,},
    2013             :         {0x0542, 0x0572,},
    2014             :         {0x0543, 0x0573,},
    2015             :         {0x0544, 0x0574,},
    2016             :         {0x0545, 0x0575,},
    2017             :         {0x0546, 0x0576,},
    2018             :         {0x0547, 0x0577,},
    2019             :         {0x0548, 0x0578,},
    2020             :         {0x0549, 0x0579,},
    2021             :         {0x054A, 0x057A,},
    2022             :         {0x054B, 0x057B,},
    2023             :         {0x054C, 0x057C,},
    2024             :         {0x054D, 0x057D,},
    2025             :         {0x054E, 0x057E,},
    2026             :         {0x054F, 0x057F,},
    2027             :         {0x0550, 0x0580,},
    2028             :         {0x0551, 0x0581,},
    2029             :         {0x0552, 0x0582,},
    2030             :         {0x0553, 0x0583,},
    2031             :         {0x0554, 0x0584,},
    2032             :         {0x0555, 0x0585,},
    2033             :         {0x0556, 0x0586,},
    2034             :         {0x10A0, 0x2D00,},
    2035             :         {0x10A1, 0x2D01,},
    2036             :         {0x10A2, 0x2D02,},
    2037             :         {0x10A3, 0x2D03,},
    2038             :         {0x10A4, 0x2D04,},
    2039             :         {0x10A5, 0x2D05,},
    2040             :         {0x10A6, 0x2D06,},
    2041             :         {0x10A7, 0x2D07,},
    2042             :         {0x10A8, 0x2D08,},
    2043             :         {0x10A9, 0x2D09,},
    2044             :         {0x10AA, 0x2D0A,},
    2045             :         {0x10AB, 0x2D0B,},
    2046             :         {0x10AC, 0x2D0C,},
    2047             :         {0x10AD, 0x2D0D,},
    2048             :         {0x10AE, 0x2D0E,},
    2049             :         {0x10AF, 0x2D0F,},
    2050             :         {0x10B0, 0x2D10,},
    2051             :         {0x10B1, 0x2D11,},
    2052             :         {0x10B2, 0x2D12,},
    2053             :         {0x10B3, 0x2D13,},
    2054             :         {0x10B4, 0x2D14,},
    2055             :         {0x10B5, 0x2D15,},
    2056             :         {0x10B6, 0x2D16,},
    2057             :         {0x10B7, 0x2D17,},
    2058             :         {0x10B8, 0x2D18,},
    2059             :         {0x10B9, 0x2D19,},
    2060             :         {0x10BA, 0x2D1A,},
    2061             :         {0x10BB, 0x2D1B,},
    2062             :         {0x10BC, 0x2D1C,},
    2063             :         {0x10BD, 0x2D1D,},
    2064             :         {0x10BE, 0x2D1E,},
    2065             :         {0x10BF, 0x2D1F,},
    2066             :         {0x10C0, 0x2D20,},
    2067             :         {0x10C1, 0x2D21,},
    2068             :         {0x10C2, 0x2D22,},
    2069             :         {0x10C3, 0x2D23,},
    2070             :         {0x10C4, 0x2D24,},
    2071             :         {0x10C5, 0x2D25,},
    2072             :         {0x10C7, 0x2D27,},
    2073             :         {0x10CD, 0x2D2D,},
    2074             :         {0x13A0, 0xAB70,},
    2075             :         {0x13A1, 0xAB71,},
    2076             :         {0x13A2, 0xAB72,},
    2077             :         {0x13A3, 0xAB73,},
    2078             :         {0x13A4, 0xAB74,},
    2079             :         {0x13A5, 0xAB75,},
    2080             :         {0x13A6, 0xAB76,},
    2081             :         {0x13A7, 0xAB77,},
    2082             :         {0x13A8, 0xAB78,},
    2083             :         {0x13A9, 0xAB79,},
    2084             :         {0x13AA, 0xAB7A,},
    2085             :         {0x13AB, 0xAB7B,},
    2086             :         {0x13AC, 0xAB7C,},
    2087             :         {0x13AD, 0xAB7D,},
    2088             :         {0x13AE, 0xAB7E,},
    2089             :         {0x13AF, 0xAB7F,},
    2090             :         {0x13B0, 0xAB80,},
    2091             :         {0x13B1, 0xAB81,},
    2092             :         {0x13B2, 0xAB82,},
    2093             :         {0x13B3, 0xAB83,},
    2094             :         {0x13B4, 0xAB84,},
    2095             :         {0x13B5, 0xAB85,},
    2096             :         {0x13B6, 0xAB86,},
    2097             :         {0x13B7, 0xAB87,},
    2098             :         {0x13B8, 0xAB88,},
    2099             :         {0x13B9, 0xAB89,},
    2100             :         {0x13BA, 0xAB8A,},
    2101             :         {0x13BB, 0xAB8B,},
    2102             :         {0x13BC, 0xAB8C,},
    2103             :         {0x13BD, 0xAB8D,},
    2104             :         {0x13BE, 0xAB8E,},
    2105             :         {0x13BF, 0xAB8F,},
    2106             :         {0x13C0, 0xAB90,},
    2107             :         {0x13C1, 0xAB91,},
    2108             :         {0x13C2, 0xAB92,},
    2109             :         {0x13C3, 0xAB93,},
    2110             :         {0x13C4, 0xAB94,},
    2111             :         {0x13C5, 0xAB95,},
    2112             :         {0x13C6, 0xAB96,},
    2113             :         {0x13C7, 0xAB97,},
    2114             :         {0x13C8, 0xAB98,},
    2115             :         {0x13C9, 0xAB99,},
    2116             :         {0x13CA, 0xAB9A,},
    2117             :         {0x13CB, 0xAB9B,},
    2118             :         {0x13CC, 0xAB9C,},
    2119             :         {0x13CD, 0xAB9D,},
    2120             :         {0x13CE, 0xAB9E,},
    2121             :         {0x13CF, 0xAB9F,},
    2122             :         {0x13D0, 0xABA0,},
    2123             :         {0x13D1, 0xABA1,},
    2124             :         {0x13D2, 0xABA2,},
    2125             :         {0x13D3, 0xABA3,},
    2126             :         {0x13D4, 0xABA4,},
    2127             :         {0x13D5, 0xABA5,},
    2128             :         {0x13D6, 0xABA6,},
    2129             :         {0x13D7, 0xABA7,},
    2130             :         {0x13D8, 0xABA8,},
    2131             :         {0x13D9, 0xABA9,},
    2132             :         {0x13DA, 0xABAA,},
    2133             :         {0x13DB, 0xABAB,},
    2134             :         {0x13DC, 0xABAC,},
    2135             :         {0x13DD, 0xABAD,},
    2136             :         {0x13DE, 0xABAE,},
    2137             :         {0x13DF, 0xABAF,},
    2138             :         {0x13E0, 0xABB0,},
    2139             :         {0x13E1, 0xABB1,},
    2140             :         {0x13E2, 0xABB2,},
    2141             :         {0x13E3, 0xABB3,},
    2142             :         {0x13E4, 0xABB4,},
    2143             :         {0x13E5, 0xABB5,},
    2144             :         {0x13E6, 0xABB6,},
    2145             :         {0x13E7, 0xABB7,},
    2146             :         {0x13E8, 0xABB8,},
    2147             :         {0x13E9, 0xABB9,},
    2148             :         {0x13EA, 0xABBA,},
    2149             :         {0x13EB, 0xABBB,},
    2150             :         {0x13EC, 0xABBC,},
    2151             :         {0x13ED, 0xABBD,},
    2152             :         {0x13EE, 0xABBE,},
    2153             :         {0x13EF, 0xABBF,},
    2154             :         {0x13F0, 0x13F8,},
    2155             :         {0x13F1, 0x13F9,},
    2156             :         {0x13F2, 0x13FA,},
    2157             :         {0x13F3, 0x13FB,},
    2158             :         {0x13F4, 0x13FC,},
    2159             :         {0x13F5, 0x13FD,},
    2160             :         {0x1C90, 0x10D0,},
    2161             :         {0x1C91, 0x10D1,},
    2162             :         {0x1C92, 0x10D2,},
    2163             :         {0x1C93, 0x10D3,},
    2164             :         {0x1C94, 0x10D4,},
    2165             :         {0x1C95, 0x10D5,},
    2166             :         {0x1C96, 0x10D6,},
    2167             :         {0x1C97, 0x10D7,},
    2168             :         {0x1C98, 0x10D8,},
    2169             :         {0x1C99, 0x10D9,},
    2170             :         {0x1C9A, 0x10DA,},
    2171             :         {0x1C9B, 0x10DB,},
    2172             :         {0x1C9C, 0x10DC,},
    2173             :         {0x1C9D, 0x10DD,},
    2174             :         {0x1C9E, 0x10DE,},
    2175             :         {0x1C9F, 0x10DF,},
    2176             :         {0x1CA0, 0x10E0,},
    2177             :         {0x1CA1, 0x10E1,},
    2178             :         {0x1CA2, 0x10E2,},
    2179             :         {0x1CA3, 0x10E3,},
    2180             :         {0x1CA4, 0x10E4,},
    2181             :         {0x1CA5, 0x10E5,},
    2182             :         {0x1CA6, 0x10E6,},
    2183             :         {0x1CA7, 0x10E7,},
    2184             :         {0x1CA8, 0x10E8,},
    2185             :         {0x1CA9, 0x10E9,},
    2186             :         {0x1CAA, 0x10EA,},
    2187             :         {0x1CAB, 0x10EB,},
    2188             :         {0x1CAC, 0x10EC,},
    2189             :         {0x1CAD, 0x10ED,},
    2190             :         {0x1CAE, 0x10EE,},
    2191             :         {0x1CAF, 0x10EF,},
    2192             :         {0x1CB0, 0x10F0,},
    2193             :         {0x1CB1, 0x10F1,},
    2194             :         {0x1CB2, 0x10F2,},
    2195             :         {0x1CB3, 0x10F3,},
    2196             :         {0x1CB4, 0x10F4,},
    2197             :         {0x1CB5, 0x10F5,},
    2198             :         {0x1CB6, 0x10F6,},
    2199             :         {0x1CB7, 0x10F7,},
    2200             :         {0x1CB8, 0x10F8,},
    2201             :         {0x1CB9, 0x10F9,},
    2202             :         {0x1CBA, 0x10FA,},
    2203             :         {0x1CBD, 0x10FD,},
    2204             :         {0x1CBE, 0x10FE,},
    2205             :         {0x1CBF, 0x10FF,},
    2206             :         {0x1E00, 0x1E01,},
    2207             :         {0x1E02, 0x1E03,},
    2208             :         {0x1E04, 0x1E05,},
    2209             :         {0x1E06, 0x1E07,},
    2210             :         {0x1E08, 0x1E09,},
    2211             :         {0x1E0A, 0x1E0B,},
    2212             :         {0x1E0C, 0x1E0D,},
    2213             :         {0x1E0E, 0x1E0F,},
    2214             :         {0x1E10, 0x1E11,},
    2215             :         {0x1E12, 0x1E13,},
    2216             :         {0x1E14, 0x1E15,},
    2217             :         {0x1E16, 0x1E17,},
    2218             :         {0x1E18, 0x1E19,},
    2219             :         {0x1E1A, 0x1E1B,},
    2220             :         {0x1E1C, 0x1E1D,},
    2221             :         {0x1E1E, 0x1E1F,},
    2222             :         {0x1E20, 0x1E21,},
    2223             :         {0x1E22, 0x1E23,},
    2224             :         {0x1E24, 0x1E25,},
    2225             :         {0x1E26, 0x1E27,},
    2226             :         {0x1E28, 0x1E29,},
    2227             :         {0x1E2A, 0x1E2B,},
    2228             :         {0x1E2C, 0x1E2D,},
    2229             :         {0x1E2E, 0x1E2F,},
    2230             :         {0x1E30, 0x1E31,},
    2231             :         {0x1E32, 0x1E33,},
    2232             :         {0x1E34, 0x1E35,},
    2233             :         {0x1E36, 0x1E37,},
    2234             :         {0x1E38, 0x1E39,},
    2235             :         {0x1E3A, 0x1E3B,},
    2236             :         {0x1E3C, 0x1E3D,},
    2237             :         {0x1E3E, 0x1E3F,},
    2238             :         {0x1E40, 0x1E41,},
    2239             :         {0x1E42, 0x1E43,},
    2240             :         {0x1E44, 0x1E45,},
    2241             :         {0x1E46, 0x1E47,},
    2242             :         {0x1E48, 0x1E49,},
    2243             :         {0x1E4A, 0x1E4B,},
    2244             :         {0x1E4C, 0x1E4D,},
    2245             :         {0x1E4E, 0x1E4F,},
    2246             :         {0x1E50, 0x1E51,},
    2247             :         {0x1E52, 0x1E53,},
    2248             :         {0x1E54, 0x1E55,},
    2249             :         {0x1E56, 0x1E57,},
    2250             :         {0x1E58, 0x1E59,},
    2251             :         {0x1E5A, 0x1E5B,},
    2252             :         {0x1E5C, 0x1E5D,},
    2253             :         {0x1E5E, 0x1E5F,},
    2254             :         {0x1E60, 0x1E61,},
    2255             :         {0x1E62, 0x1E63,},
    2256             :         {0x1E64, 0x1E65,},
    2257             :         {0x1E66, 0x1E67,},
    2258             :         {0x1E68, 0x1E69,},
    2259             :         {0x1E6A, 0x1E6B,},
    2260             :         {0x1E6C, 0x1E6D,},
    2261             :         {0x1E6E, 0x1E6F,},
    2262             :         {0x1E70, 0x1E71,},
    2263             :         {0x1E72, 0x1E73,},
    2264             :         {0x1E74, 0x1E75,},
    2265             :         {0x1E76, 0x1E77,},
    2266             :         {0x1E78, 0x1E79,},
    2267             :         {0x1E7A, 0x1E7B,},
    2268             :         {0x1E7C, 0x1E7D,},
    2269             :         {0x1E7E, 0x1E7F,},
    2270             :         {0x1E80, 0x1E81,},
    2271             :         {0x1E82, 0x1E83,},
    2272             :         {0x1E84, 0x1E85,},
    2273             :         {0x1E86, 0x1E87,},
    2274             :         {0x1E88, 0x1E89,},
    2275             :         {0x1E8A, 0x1E8B,},
    2276             :         {0x1E8C, 0x1E8D,},
    2277             :         {0x1E8E, 0x1E8F,},
    2278             :         {0x1E90, 0x1E91,},
    2279             :         {0x1E92, 0x1E93,},
    2280             :         {0x1E94, 0x1E95,},
    2281             :         {0x1E9E, 0x00DF,},
    2282             :         {0x1EA0, 0x1EA1,},
    2283             :         {0x1EA2, 0x1EA3,},
    2284             :         {0x1EA4, 0x1EA5,},
    2285             :         {0x1EA6, 0x1EA7,},
    2286             :         {0x1EA8, 0x1EA9,},
    2287             :         {0x1EAA, 0x1EAB,},
    2288             :         {0x1EAC, 0x1EAD,},
    2289             :         {0x1EAE, 0x1EAF,},
    2290             :         {0x1EB0, 0x1EB1,},
    2291             :         {0x1EB2, 0x1EB3,},
    2292             :         {0x1EB4, 0x1EB5,},
    2293             :         {0x1EB6, 0x1EB7,},
    2294             :         {0x1EB8, 0x1EB9,},
    2295             :         {0x1EBA, 0x1EBB,},
    2296             :         {0x1EBC, 0x1EBD,},
    2297             :         {0x1EBE, 0x1EBF,},
    2298             :         {0x1EC0, 0x1EC1,},
    2299             :         {0x1EC2, 0x1EC3,},
    2300             :         {0x1EC4, 0x1EC5,},
    2301             :         {0x1EC6, 0x1EC7,},
    2302             :         {0x1EC8, 0x1EC9,},
    2303             :         {0x1ECA, 0x1ECB,},
    2304             :         {0x1ECC, 0x1ECD,},
    2305             :         {0x1ECE, 0x1ECF,},
    2306             :         {0x1ED0, 0x1ED1,},
    2307             :         {0x1ED2, 0x1ED3,},
    2308             :         {0x1ED4, 0x1ED5,},
    2309             :         {0x1ED6, 0x1ED7,},
    2310             :         {0x1ED8, 0x1ED9,},
    2311             :         {0x1EDA, 0x1EDB,},
    2312             :         {0x1EDC, 0x1EDD,},
    2313             :         {0x1EDE, 0x1EDF,},
    2314             :         {0x1EE0, 0x1EE1,},
    2315             :         {0x1EE2, 0x1EE3,},
    2316             :         {0x1EE4, 0x1EE5,},
    2317             :         {0x1EE6, 0x1EE7,},
    2318             :         {0x1EE8, 0x1EE9,},
    2319             :         {0x1EEA, 0x1EEB,},
    2320             :         {0x1EEC, 0x1EED,},
    2321             :         {0x1EEE, 0x1EEF,},
    2322             :         {0x1EF0, 0x1EF1,},
    2323             :         {0x1EF2, 0x1EF3,},
    2324             :         {0x1EF4, 0x1EF5,},
    2325             :         {0x1EF6, 0x1EF7,},
    2326             :         {0x1EF8, 0x1EF9,},
    2327             :         {0x1EFA, 0x1EFB,},
    2328             :         {0x1EFC, 0x1EFD,},
    2329             :         {0x1EFE, 0x1EFF,},
    2330             :         {0x1F08, 0x1F00,},
    2331             :         {0x1F09, 0x1F01,},
    2332             :         {0x1F0A, 0x1F02,},
    2333             :         {0x1F0B, 0x1F03,},
    2334             :         {0x1F0C, 0x1F04,},
    2335             :         {0x1F0D, 0x1F05,},
    2336             :         {0x1F0E, 0x1F06,},
    2337             :         {0x1F0F, 0x1F07,},
    2338             :         {0x1F18, 0x1F10,},
    2339             :         {0x1F19, 0x1F11,},
    2340             :         {0x1F1A, 0x1F12,},
    2341             :         {0x1F1B, 0x1F13,},
    2342             :         {0x1F1C, 0x1F14,},
    2343             :         {0x1F1D, 0x1F15,},
    2344             :         {0x1F28, 0x1F20,},
    2345             :         {0x1F29, 0x1F21,},
    2346             :         {0x1F2A, 0x1F22,},
    2347             :         {0x1F2B, 0x1F23,},
    2348             :         {0x1F2C, 0x1F24,},
    2349             :         {0x1F2D, 0x1F25,},
    2350             :         {0x1F2E, 0x1F26,},
    2351             :         {0x1F2F, 0x1F27,},
    2352             :         {0x1F38, 0x1F30,},
    2353             :         {0x1F39, 0x1F31,},
    2354             :         {0x1F3A, 0x1F32,},
    2355             :         {0x1F3B, 0x1F33,},
    2356             :         {0x1F3C, 0x1F34,},
    2357             :         {0x1F3D, 0x1F35,},
    2358             :         {0x1F3E, 0x1F36,},
    2359             :         {0x1F3F, 0x1F37,},
    2360             :         {0x1F48, 0x1F40,},
    2361             :         {0x1F49, 0x1F41,},
    2362             :         {0x1F4A, 0x1F42,},
    2363             :         {0x1F4B, 0x1F43,},
    2364             :         {0x1F4C, 0x1F44,},
    2365             :         {0x1F4D, 0x1F45,},
    2366             :         {0x1F59, 0x1F51,},
    2367             :         {0x1F5B, 0x1F53,},
    2368             :         {0x1F5D, 0x1F55,},
    2369             :         {0x1F5F, 0x1F57,},
    2370             :         {0x1F68, 0x1F60,},
    2371             :         {0x1F69, 0x1F61,},
    2372             :         {0x1F6A, 0x1F62,},
    2373             :         {0x1F6B, 0x1F63,},
    2374             :         {0x1F6C, 0x1F64,},
    2375             :         {0x1F6D, 0x1F65,},
    2376             :         {0x1F6E, 0x1F66,},
    2377             :         {0x1F6F, 0x1F67,},
    2378             :         {0x1F88, 0x1F80,},
    2379             :         {0x1F89, 0x1F81,},
    2380             :         {0x1F8A, 0x1F82,},
    2381             :         {0x1F8B, 0x1F83,},
    2382             :         {0x1F8C, 0x1F84,},
    2383             :         {0x1F8D, 0x1F85,},
    2384             :         {0x1F8E, 0x1F86,},
    2385             :         {0x1F8F, 0x1F87,},
    2386             :         {0x1F98, 0x1F90,},
    2387             :         {0x1F99, 0x1F91,},
    2388             :         {0x1F9A, 0x1F92,},
    2389             :         {0x1F9B, 0x1F93,},
    2390             :         {0x1F9C, 0x1F94,},
    2391             :         {0x1F9D, 0x1F95,},
    2392             :         {0x1F9E, 0x1F96,},
    2393             :         {0x1F9F, 0x1F97,},
    2394             :         {0x1FA8, 0x1FA0,},
    2395             :         {0x1FA9, 0x1FA1,},
    2396             :         {0x1FAA, 0x1FA2,},
    2397             :         {0x1FAB, 0x1FA3,},
    2398             :         {0x1FAC, 0x1FA4,},
    2399             :         {0x1FAD, 0x1FA5,},
    2400             :         {0x1FAE, 0x1FA6,},
    2401             :         {0x1FAF, 0x1FA7,},
    2402             :         {0x1FB8, 0x1FB0,},
    2403             :         {0x1FB9, 0x1FB1,},
    2404             :         {0x1FBA, 0x1F70,},
    2405             :         {0x1FBB, 0x1F71,},
    2406             :         {0x1FBC, 0x1FB3,},
    2407             :         {0x1FC8, 0x1F72,},
    2408             :         {0x1FC9, 0x1F73,},
    2409             :         {0x1FCA, 0x1F74,},
    2410             :         {0x1FCB, 0x1F75,},
    2411             :         {0x1FCC, 0x1FC3,},
    2412             :         {0x1FD8, 0x1FD0,},
    2413             :         {0x1FD9, 0x1FD1,},
    2414             :         {0x1FDA, 0x1F76,},
    2415             :         {0x1FDB, 0x1F77,},
    2416             :         {0x1FE8, 0x1FE0,},
    2417             :         {0x1FE9, 0x1FE1,},
    2418             :         {0x1FEA, 0x1F7A,},
    2419             :         {0x1FEB, 0x1F7B,},
    2420             :         {0x1FEC, 0x1FE5,},
    2421             :         {0x1FF8, 0x1F78,},
    2422             :         {0x1FF9, 0x1F79,},
    2423             :         {0x1FFA, 0x1F7C,},
    2424             :         {0x1FFB, 0x1F7D,},
    2425             :         {0x1FFC, 0x1FF3,},
    2426             :         {0x2126, 0x03C9,},
    2427             :         {0x212A, 0x006B,},
    2428             :         {0x212B, 0x00E5,},
    2429             :         {0x2132, 0x214E,},
    2430             :         {0x2160, 0x2170,},
    2431             :         {0x2161, 0x2171,},
    2432             :         {0x2162, 0x2172,},
    2433             :         {0x2163, 0x2173,},
    2434             :         {0x2164, 0x2174,},
    2435             :         {0x2165, 0x2175,},
    2436             :         {0x2166, 0x2176,},
    2437             :         {0x2167, 0x2177,},
    2438             :         {0x2168, 0x2178,},
    2439             :         {0x2169, 0x2179,},
    2440             :         {0x216A, 0x217A,},
    2441             :         {0x216B, 0x217B,},
    2442             :         {0x216C, 0x217C,},
    2443             :         {0x216D, 0x217D,},
    2444             :         {0x216E, 0x217E,},
    2445             :         {0x216F, 0x217F,},
    2446             :         {0x2183, 0x2184,},
    2447             :         {0x24B6, 0x24D0,},
    2448             :         {0x24B7, 0x24D1,},
    2449             :         {0x24B8, 0x24D2,},
    2450             :         {0x24B9, 0x24D3,},
    2451             :         {0x24BA, 0x24D4,},
    2452             :         {0x24BB, 0x24D5,},
    2453             :         {0x24BC, 0x24D6,},
    2454             :         {0x24BD, 0x24D7,},
    2455             :         {0x24BE, 0x24D8,},
    2456             :         {0x24BF, 0x24D9,},
    2457             :         {0x24C0, 0x24DA,},
    2458             :         {0x24C1, 0x24DB,},
    2459             :         {0x24C2, 0x24DC,},
    2460             :         {0x24C3, 0x24DD,},
    2461             :         {0x24C4, 0x24DE,},
    2462             :         {0x24C5, 0x24DF,},
    2463             :         {0x24C6, 0x24E0,},
    2464             :         {0x24C7, 0x24E1,},
    2465             :         {0x24C8, 0x24E2,},
    2466             :         {0x24C9, 0x24E3,},
    2467             :         {0x24CA, 0x24E4,},
    2468             :         {0x24CB, 0x24E5,},
    2469             :         {0x24CC, 0x24E6,},
    2470             :         {0x24CD, 0x24E7,},
    2471             :         {0x24CE, 0x24E8,},
    2472             :         {0x24CF, 0x24E9,},
    2473             :         {0x2C00, 0x2C30,},
    2474             :         {0x2C01, 0x2C31,},
    2475             :         {0x2C02, 0x2C32,},
    2476             :         {0x2C03, 0x2C33,},
    2477             :         {0x2C04, 0x2C34,},
    2478             :         {0x2C05, 0x2C35,},
    2479             :         {0x2C06, 0x2C36,},
    2480             :         {0x2C07, 0x2C37,},
    2481             :         {0x2C08, 0x2C38,},
    2482             :         {0x2C09, 0x2C39,},
    2483             :         {0x2C0A, 0x2C3A,},
    2484             :         {0x2C0B, 0x2C3B,},
    2485             :         {0x2C0C, 0x2C3C,},
    2486             :         {0x2C0D, 0x2C3D,},
    2487             :         {0x2C0E, 0x2C3E,},
    2488             :         {0x2C0F, 0x2C3F,},
    2489             :         {0x2C10, 0x2C40,},
    2490             :         {0x2C11, 0x2C41,},
    2491             :         {0x2C12, 0x2C42,},
    2492             :         {0x2C13, 0x2C43,},
    2493             :         {0x2C14, 0x2C44,},
    2494             :         {0x2C15, 0x2C45,},
    2495             :         {0x2C16, 0x2C46,},
    2496             :         {0x2C17, 0x2C47,},
    2497             :         {0x2C18, 0x2C48,},
    2498             :         {0x2C19, 0x2C49,},
    2499             :         {0x2C1A, 0x2C4A,},
    2500             :         {0x2C1B, 0x2C4B,},
    2501             :         {0x2C1C, 0x2C4C,},
    2502             :         {0x2C1D, 0x2C4D,},
    2503             :         {0x2C1E, 0x2C4E,},
    2504             :         {0x2C1F, 0x2C4F,},
    2505             :         {0x2C20, 0x2C50,},
    2506             :         {0x2C21, 0x2C51,},
    2507             :         {0x2C22, 0x2C52,},
    2508             :         {0x2C23, 0x2C53,},
    2509             :         {0x2C24, 0x2C54,},
    2510             :         {0x2C25, 0x2C55,},
    2511             :         {0x2C26, 0x2C56,},
    2512             :         {0x2C27, 0x2C57,},
    2513             :         {0x2C28, 0x2C58,},
    2514             :         {0x2C29, 0x2C59,},
    2515             :         {0x2C2A, 0x2C5A,},
    2516             :         {0x2C2B, 0x2C5B,},
    2517             :         {0x2C2C, 0x2C5C,},
    2518             :         {0x2C2D, 0x2C5D,},
    2519             :         {0x2C2E, 0x2C5E,},
    2520             :         {0x2C2F, 0x2C5F,},
    2521             :         {0x2C60, 0x2C61,},
    2522             :         {0x2C62, 0x026B,},
    2523             :         {0x2C63, 0x1D7D,},
    2524             :         {0x2C64, 0x027D,},
    2525             :         {0x2C67, 0x2C68,},
    2526             :         {0x2C69, 0x2C6A,},
    2527             :         {0x2C6B, 0x2C6C,},
    2528             :         {0x2C6D, 0x0251,},
    2529             :         {0x2C6E, 0x0271,},
    2530             :         {0x2C6F, 0x0250,},
    2531             :         {0x2C70, 0x0252,},
    2532             :         {0x2C72, 0x2C73,},
    2533             :         {0x2C75, 0x2C76,},
    2534             :         {0x2C7E, 0x023F,},
    2535             :         {0x2C7F, 0x0240,},
    2536             :         {0x2C80, 0x2C81,},
    2537             :         {0x2C82, 0x2C83,},
    2538             :         {0x2C84, 0x2C85,},
    2539             :         {0x2C86, 0x2C87,},
    2540             :         {0x2C88, 0x2C89,},
    2541             :         {0x2C8A, 0x2C8B,},
    2542             :         {0x2C8C, 0x2C8D,},
    2543             :         {0x2C8E, 0x2C8F,},
    2544             :         {0x2C90, 0x2C91,},
    2545             :         {0x2C92, 0x2C93,},
    2546             :         {0x2C94, 0x2C95,},
    2547             :         {0x2C96, 0x2C97,},
    2548             :         {0x2C98, 0x2C99,},
    2549             :         {0x2C9A, 0x2C9B,},
    2550             :         {0x2C9C, 0x2C9D,},
    2551             :         {0x2C9E, 0x2C9F,},
    2552             :         {0x2CA0, 0x2CA1,},
    2553             :         {0x2CA2, 0x2CA3,},
    2554             :         {0x2CA4, 0x2CA5,},
    2555             :         {0x2CA6, 0x2CA7,},
    2556             :         {0x2CA8, 0x2CA9,},
    2557             :         {0x2CAA, 0x2CAB,},
    2558             :         {0x2CAC, 0x2CAD,},
    2559             :         {0x2CAE, 0x2CAF,},
    2560             :         {0x2CB0, 0x2CB1,},
    2561             :         {0x2CB2, 0x2CB3,},
    2562             :         {0x2CB4, 0x2CB5,},
    2563             :         {0x2CB6, 0x2CB7,},
    2564             :         {0x2CB8, 0x2CB9,},
    2565             :         {0x2CBA, 0x2CBB,},
    2566             :         {0x2CBC, 0x2CBD,},
    2567             :         {0x2CBE, 0x2CBF,},
    2568             :         {0x2CC0, 0x2CC1,},
    2569             :         {0x2CC2, 0x2CC3,},
    2570             :         {0x2CC4, 0x2CC5,},
    2571             :         {0x2CC6, 0x2CC7,},
    2572             :         {0x2CC8, 0x2CC9,},
    2573             :         {0x2CCA, 0x2CCB,},
    2574             :         {0x2CCC, 0x2CCD,},
    2575             :         {0x2CCE, 0x2CCF,},
    2576             :         {0x2CD0, 0x2CD1,},
    2577             :         {0x2CD2, 0x2CD3,},
    2578             :         {0x2CD4, 0x2CD5,},
    2579             :         {0x2CD6, 0x2CD7,},
    2580             :         {0x2CD8, 0x2CD9,},
    2581             :         {0x2CDA, 0x2CDB,},
    2582             :         {0x2CDC, 0x2CDD,},
    2583             :         {0x2CDE, 0x2CDF,},
    2584             :         {0x2CE0, 0x2CE1,},
    2585             :         {0x2CE2, 0x2CE3,},
    2586             :         {0x2CEB, 0x2CEC,},
    2587             :         {0x2CED, 0x2CEE,},
    2588             :         {0x2CF2, 0x2CF3,},
    2589             :         {0xA640, 0xA641,},
    2590             :         {0xA642, 0xA643,},
    2591             :         {0xA644, 0xA645,},
    2592             :         {0xA646, 0xA647,},
    2593             :         {0xA648, 0xA649,},
    2594             :         {0xA64A, 0xA64B,},
    2595             :         {0xA64C, 0xA64D,},
    2596             :         {0xA64E, 0xA64F,},
    2597             :         {0xA650, 0xA651,},
    2598             :         {0xA652, 0xA653,},
    2599             :         {0xA654, 0xA655,},
    2600             :         {0xA656, 0xA657,},
    2601             :         {0xA658, 0xA659,},
    2602             :         {0xA65A, 0xA65B,},
    2603             :         {0xA65C, 0xA65D,},
    2604             :         {0xA65E, 0xA65F,},
    2605             :         {0xA660, 0xA661,},
    2606             :         {0xA662, 0xA663,},
    2607             :         {0xA664, 0xA665,},
    2608             :         {0xA666, 0xA667,},
    2609             :         {0xA668, 0xA669,},
    2610             :         {0xA66A, 0xA66B,},
    2611             :         {0xA66C, 0xA66D,},
    2612             :         {0xA680, 0xA681,},
    2613             :         {0xA682, 0xA683,},
    2614             :         {0xA684, 0xA685,},
    2615             :         {0xA686, 0xA687,},
    2616             :         {0xA688, 0xA689,},
    2617             :         {0xA68A, 0xA68B,},
    2618             :         {0xA68C, 0xA68D,},
    2619             :         {0xA68E, 0xA68F,},
    2620             :         {0xA690, 0xA691,},
    2621             :         {0xA692, 0xA693,},
    2622             :         {0xA694, 0xA695,},
    2623             :         {0xA696, 0xA697,},
    2624             :         {0xA698, 0xA699,},
    2625             :         {0xA69A, 0xA69B,},
    2626             :         {0xA722, 0xA723,},
    2627             :         {0xA724, 0xA725,},
    2628             :         {0xA726, 0xA727,},
    2629             :         {0xA728, 0xA729,},
    2630             :         {0xA72A, 0xA72B,},
    2631             :         {0xA72C, 0xA72D,},
    2632             :         {0xA72E, 0xA72F,},
    2633             :         {0xA732, 0xA733,},
    2634             :         {0xA734, 0xA735,},
    2635             :         {0xA736, 0xA737,},
    2636             :         {0xA738, 0xA739,},
    2637             :         {0xA73A, 0xA73B,},
    2638             :         {0xA73C, 0xA73D,},
    2639             :         {0xA73E, 0xA73F,},
    2640             :         {0xA740, 0xA741,},
    2641             :         {0xA742, 0xA743,},
    2642             :         {0xA744, 0xA745,},
    2643             :         {0xA746, 0xA747,},
    2644             :         {0xA748, 0xA749,},
    2645             :         {0xA74A, 0xA74B,},
    2646             :         {0xA74C, 0xA74D,},
    2647             :         {0xA74E, 0xA74F,},
    2648             :         {0xA750, 0xA751,},
    2649             :         {0xA752, 0xA753,},
    2650             :         {0xA754, 0xA755,},
    2651             :         {0xA756, 0xA757,},
    2652             :         {0xA758, 0xA759,},
    2653             :         {0xA75A, 0xA75B,},
    2654             :         {0xA75C, 0xA75D,},
    2655             :         {0xA75E, 0xA75F,},
    2656             :         {0xA760, 0xA761,},
    2657             :         {0xA762, 0xA763,},
    2658             :         {0xA764, 0xA765,},
    2659             :         {0xA766, 0xA767,},
    2660             :         {0xA768, 0xA769,},
    2661             :         {0xA76A, 0xA76B,},
    2662             :         {0xA76C, 0xA76D,},
    2663             :         {0xA76E, 0xA76F,},
    2664             :         {0xA779, 0xA77A,},
    2665             :         {0xA77B, 0xA77C,},
    2666             :         {0xA77D, 0x1D79,},
    2667             :         {0xA77E, 0xA77F,},
    2668             :         {0xA780, 0xA781,},
    2669             :         {0xA782, 0xA783,},
    2670             :         {0xA784, 0xA785,},
    2671             :         {0xA786, 0xA787,},
    2672             :         {0xA78B, 0xA78C,},
    2673             :         {0xA78D, 0x0265,},
    2674             :         {0xA790, 0xA791,},
    2675             :         {0xA792, 0xA793,},
    2676             :         {0xA796, 0xA797,},
    2677             :         {0xA798, 0xA799,},
    2678             :         {0xA79A, 0xA79B,},
    2679             :         {0xA79C, 0xA79D,},
    2680             :         {0xA79E, 0xA79F,},
    2681             :         {0xA7A0, 0xA7A1,},
    2682             :         {0xA7A2, 0xA7A3,},
    2683             :         {0xA7A4, 0xA7A5,},
    2684             :         {0xA7A6, 0xA7A7,},
    2685             :         {0xA7A8, 0xA7A9,},
    2686             :         {0xA7AA, 0x0266,},
    2687             :         {0xA7AB, 0x025C,},
    2688             :         {0xA7AC, 0x0261,},
    2689             :         {0xA7AD, 0x026C,},
    2690             :         {0xA7AE, 0x026A,},
    2691             :         {0xA7B0, 0x029E,},
    2692             :         {0xA7B1, 0x0287,},
    2693             :         {0xA7B2, 0x029D,},
    2694             :         {0xA7B3, 0xAB53,},
    2695             :         {0xA7B4, 0xA7B5,},
    2696             :         {0xA7B6, 0xA7B7,},
    2697             :         {0xA7B8, 0xA7B9,},
    2698             :         {0xA7BA, 0xA7BB,},
    2699             :         {0xA7BC, 0xA7BD,},
    2700             :         {0xA7BE, 0xA7BF,},
    2701             :         {0xA7C0, 0xA7C1,},
    2702             :         {0xA7C2, 0xA7C3,},
    2703             :         {0xA7C4, 0xA794,},
    2704             :         {0xA7C5, 0x0282,},
    2705             :         {0xA7C6, 0x1D8E,},
    2706             :         {0xA7C7, 0xA7C8,},
    2707             :         {0xA7C9, 0xA7CA,},
    2708             :         {0xA7D0, 0xA7D1,},
    2709             :         {0xA7D6, 0xA7D7,},
    2710             :         {0xA7D8, 0xA7D9,},
    2711             :         {0xA7F5, 0xA7F6,},
    2712             :         {0xFF21, 0xFF41,},
    2713             :         {0xFF22, 0xFF42,},
    2714             :         {0xFF23, 0xFF43,},
    2715             :         {0xFF24, 0xFF44,},
    2716             :         {0xFF25, 0xFF45,},
    2717             :         {0xFF26, 0xFF46,},
    2718             :         {0xFF27, 0xFF47,},
    2719             :         {0xFF28, 0xFF48,},
    2720             :         {0xFF29, 0xFF49,},
    2721             :         {0xFF2A, 0xFF4A,},
    2722             :         {0xFF2B, 0xFF4B,},
    2723             :         {0xFF2C, 0xFF4C,},
    2724             :         {0xFF2D, 0xFF4D,},
    2725             :         {0xFF2E, 0xFF4E,},
    2726             :         {0xFF2F, 0xFF4F,},
    2727             :         {0xFF30, 0xFF50,},
    2728             :         {0xFF31, 0xFF51,},
    2729             :         {0xFF32, 0xFF52,},
    2730             :         {0xFF33, 0xFF53,},
    2731             :         {0xFF34, 0xFF54,},
    2732             :         {0xFF35, 0xFF55,},
    2733             :         {0xFF36, 0xFF56,},
    2734             :         {0xFF37, 0xFF57,},
    2735             :         {0xFF38, 0xFF58,},
    2736             :         {0xFF39, 0xFF59,},
    2737             :         {0xFF3A, 0xFF5A,},
    2738             :         {0x10400, 0x10428,},
    2739             :         {0x10401, 0x10429,},
    2740             :         {0x10402, 0x1042A,},
    2741             :         {0x10403, 0x1042B,},
    2742             :         {0x10404, 0x1042C,},
    2743             :         {0x10405, 0x1042D,},
    2744             :         {0x10406, 0x1042E,},
    2745             :         {0x10407, 0x1042F,},
    2746             :         {0x10408, 0x10430,},
    2747             :         {0x10409, 0x10431,},
    2748             :         {0x1040A, 0x10432,},
    2749             :         {0x1040B, 0x10433,},
    2750             :         {0x1040C, 0x10434,},
    2751             :         {0x1040D, 0x10435,},
    2752             :         {0x1040E, 0x10436,},
    2753             :         {0x1040F, 0x10437,},
    2754             :         {0x10410, 0x10438,},
    2755             :         {0x10411, 0x10439,},
    2756             :         {0x10412, 0x1043A,},
    2757             :         {0x10413, 0x1043B,},
    2758             :         {0x10414, 0x1043C,},
    2759             :         {0x10415, 0x1043D,},
    2760             :         {0x10416, 0x1043E,},
    2761             :         {0x10417, 0x1043F,},
    2762             :         {0x10418, 0x10440,},
    2763             :         {0x10419, 0x10441,},
    2764             :         {0x1041A, 0x10442,},
    2765             :         {0x1041B, 0x10443,},
    2766             :         {0x1041C, 0x10444,},
    2767             :         {0x1041D, 0x10445,},
    2768             :         {0x1041E, 0x10446,},
    2769             :         {0x1041F, 0x10447,},
    2770             :         {0x10420, 0x10448,},
    2771             :         {0x10421, 0x10449,},
    2772             :         {0x10422, 0x1044A,},
    2773             :         {0x10423, 0x1044B,},
    2774             :         {0x10424, 0x1044C,},
    2775             :         {0x10425, 0x1044D,},
    2776             :         {0x10426, 0x1044E,},
    2777             :         {0x10427, 0x1044F,},
    2778             :         {0x104B0, 0x104D8,},
    2779             :         {0x104B1, 0x104D9,},
    2780             :         {0x104B2, 0x104DA,},
    2781             :         {0x104B3, 0x104DB,},
    2782             :         {0x104B4, 0x104DC,},
    2783             :         {0x104B5, 0x104DD,},
    2784             :         {0x104B6, 0x104DE,},
    2785             :         {0x104B7, 0x104DF,},
    2786             :         {0x104B8, 0x104E0,},
    2787             :         {0x104B9, 0x104E1,},
    2788             :         {0x104BA, 0x104E2,},
    2789             :         {0x104BB, 0x104E3,},
    2790             :         {0x104BC, 0x104E4,},
    2791             :         {0x104BD, 0x104E5,},
    2792             :         {0x104BE, 0x104E6,},
    2793             :         {0x104BF, 0x104E7,},
    2794             :         {0x104C0, 0x104E8,},
    2795             :         {0x104C1, 0x104E9,},
    2796             :         {0x104C2, 0x104EA,},
    2797             :         {0x104C3, 0x104EB,},
    2798             :         {0x104C4, 0x104EC,},
    2799             :         {0x104C5, 0x104ED,},
    2800             :         {0x104C6, 0x104EE,},
    2801             :         {0x104C7, 0x104EF,},
    2802             :         {0x104C8, 0x104F0,},
    2803             :         {0x104C9, 0x104F1,},
    2804             :         {0x104CA, 0x104F2,},
    2805             :         {0x104CB, 0x104F3,},
    2806             :         {0x104CC, 0x104F4,},
    2807             :         {0x104CD, 0x104F5,},
    2808             :         {0x104CE, 0x104F6,},
    2809             :         {0x104CF, 0x104F7,},
    2810             :         {0x104D0, 0x104F8,},
    2811             :         {0x104D1, 0x104F9,},
    2812             :         {0x104D2, 0x104FA,},
    2813             :         {0x104D3, 0x104FB,},
    2814             :         {0x10570, 0x10597,},
    2815             :         {0x10571, 0x10598,},
    2816             :         {0x10572, 0x10599,},
    2817             :         {0x10573, 0x1059A,},
    2818             :         {0x10574, 0x1059B,},
    2819             :         {0x10575, 0x1059C,},
    2820             :         {0x10576, 0x1059D,},
    2821             :         {0x10577, 0x1059E,},
    2822             :         {0x10578, 0x1059F,},
    2823             :         {0x10579, 0x105A0,},
    2824             :         {0x1057A, 0x105A1,},
    2825             :         {0x1057C, 0x105A3,},
    2826             :         {0x1057D, 0x105A4,},
    2827             :         {0x1057E, 0x105A5,},
    2828             :         {0x1057F, 0x105A6,},
    2829             :         {0x10580, 0x105A7,},
    2830             :         {0x10581, 0x105A8,},
    2831             :         {0x10582, 0x105A9,},
    2832             :         {0x10583, 0x105AA,},
    2833             :         {0x10584, 0x105AB,},
    2834             :         {0x10585, 0x105AC,},
    2835             :         {0x10586, 0x105AD,},
    2836             :         {0x10587, 0x105AE,},
    2837             :         {0x10588, 0x105AF,},
    2838             :         {0x10589, 0x105B0,},
    2839             :         {0x1058A, 0x105B1,},
    2840             :         {0x1058C, 0x105B3,},
    2841             :         {0x1058D, 0x105B4,},
    2842             :         {0x1058E, 0x105B5,},
    2843             :         {0x1058F, 0x105B6,},
    2844             :         {0x10590, 0x105B7,},
    2845             :         {0x10591, 0x105B8,},
    2846             :         {0x10592, 0x105B9,},
    2847             :         {0x10594, 0x105BB,},
    2848             :         {0x10595, 0x105BC,},
    2849             :         {0x10C80, 0x10CC0,},
    2850             :         {0x10C81, 0x10CC1,},
    2851             :         {0x10C82, 0x10CC2,},
    2852             :         {0x10C83, 0x10CC3,},
    2853             :         {0x10C84, 0x10CC4,},
    2854             :         {0x10C85, 0x10CC5,},
    2855             :         {0x10C86, 0x10CC6,},
    2856             :         {0x10C87, 0x10CC7,},
    2857             :         {0x10C88, 0x10CC8,},
    2858             :         {0x10C89, 0x10CC9,},
    2859             :         {0x10C8A, 0x10CCA,},
    2860             :         {0x10C8B, 0x10CCB,},
    2861             :         {0x10C8C, 0x10CCC,},
    2862             :         {0x10C8D, 0x10CCD,},
    2863             :         {0x10C8E, 0x10CCE,},
    2864             :         {0x10C8F, 0x10CCF,},
    2865             :         {0x10C90, 0x10CD0,},
    2866             :         {0x10C91, 0x10CD1,},
    2867             :         {0x10C92, 0x10CD2,},
    2868             :         {0x10C93, 0x10CD3,},
    2869             :         {0x10C94, 0x10CD4,},
    2870             :         {0x10C95, 0x10CD5,},
    2871             :         {0x10C96, 0x10CD6,},
    2872             :         {0x10C97, 0x10CD7,},
    2873             :         {0x10C98, 0x10CD8,},
    2874             :         {0x10C99, 0x10CD9,},
    2875             :         {0x10C9A, 0x10CDA,},
    2876             :         {0x10C9B, 0x10CDB,},
    2877             :         {0x10C9C, 0x10CDC,},
    2878             :         {0x10C9D, 0x10CDD,},
    2879             :         {0x10C9E, 0x10CDE,},
    2880             :         {0x10C9F, 0x10CDF,},
    2881             :         {0x10CA0, 0x10CE0,},
    2882             :         {0x10CA1, 0x10CE1,},
    2883             :         {0x10CA2, 0x10CE2,},
    2884             :         {0x10CA3, 0x10CE3,},
    2885             :         {0x10CA4, 0x10CE4,},
    2886             :         {0x10CA5, 0x10CE5,},
    2887             :         {0x10CA6, 0x10CE6,},
    2888             :         {0x10CA7, 0x10CE7,},
    2889             :         {0x10CA8, 0x10CE8,},
    2890             :         {0x10CA9, 0x10CE9,},
    2891             :         {0x10CAA, 0x10CEA,},
    2892             :         {0x10CAB, 0x10CEB,},
    2893             :         {0x10CAC, 0x10CEC,},
    2894             :         {0x10CAD, 0x10CED,},
    2895             :         {0x10CAE, 0x10CEE,},
    2896             :         {0x10CAF, 0x10CEF,},
    2897             :         {0x10CB0, 0x10CF0,},
    2898             :         {0x10CB1, 0x10CF1,},
    2899             :         {0x10CB2, 0x10CF2,},
    2900             :         {0x118A0, 0x118C0,},
    2901             :         {0x118A1, 0x118C1,},
    2902             :         {0x118A2, 0x118C2,},
    2903             :         {0x118A3, 0x118C3,},
    2904             :         {0x118A4, 0x118C4,},
    2905             :         {0x118A5, 0x118C5,},
    2906             :         {0x118A6, 0x118C6,},
    2907             :         {0x118A7, 0x118C7,},
    2908             :         {0x118A8, 0x118C8,},
    2909             :         {0x118A9, 0x118C9,},
    2910             :         {0x118AA, 0x118CA,},
    2911             :         {0x118AB, 0x118CB,},
    2912             :         {0x118AC, 0x118CC,},
    2913             :         {0x118AD, 0x118CD,},
    2914             :         {0x118AE, 0x118CE,},
    2915             :         {0x118AF, 0x118CF,},
    2916             :         {0x118B0, 0x118D0,},
    2917             :         {0x118B1, 0x118D1,},
    2918             :         {0x118B2, 0x118D2,},
    2919             :         {0x118B3, 0x118D3,},
    2920             :         {0x118B4, 0x118D4,},
    2921             :         {0x118B5, 0x118D5,},
    2922             :         {0x118B6, 0x118D6,},
    2923             :         {0x118B7, 0x118D7,},
    2924             :         {0x118B8, 0x118D8,},
    2925             :         {0x118B9, 0x118D9,},
    2926             :         {0x118BA, 0x118DA,},
    2927             :         {0x118BB, 0x118DB,},
    2928             :         {0x118BC, 0x118DC,},
    2929             :         {0x118BD, 0x118DD,},
    2930             :         {0x118BE, 0x118DE,},
    2931             :         {0x118BF, 0x118DF,},
    2932             :         {0x16E40, 0x16E60,},
    2933             :         {0x16E41, 0x16E61,},
    2934             :         {0x16E42, 0x16E62,},
    2935             :         {0x16E43, 0x16E63,},
    2936             :         {0x16E44, 0x16E64,},
    2937             :         {0x16E45, 0x16E65,},
    2938             :         {0x16E46, 0x16E66,},
    2939             :         {0x16E47, 0x16E67,},
    2940             :         {0x16E48, 0x16E68,},
    2941             :         {0x16E49, 0x16E69,},
    2942             :         {0x16E4A, 0x16E6A,},
    2943             :         {0x16E4B, 0x16E6B,},
    2944             :         {0x16E4C, 0x16E6C,},
    2945             :         {0x16E4D, 0x16E6D,},
    2946             :         {0x16E4E, 0x16E6E,},
    2947             :         {0x16E4F, 0x16E6F,},
    2948             :         {0x16E50, 0x16E70,},
    2949             :         {0x16E51, 0x16E71,},
    2950             :         {0x16E52, 0x16E72,},
    2951             :         {0x16E53, 0x16E73,},
    2952             :         {0x16E54, 0x16E74,},
    2953             :         {0x16E55, 0x16E75,},
    2954             :         {0x16E56, 0x16E76,},
    2955             :         {0x16E57, 0x16E77,},
    2956             :         {0x16E58, 0x16E78,},
    2957             :         {0x16E59, 0x16E79,},
    2958             :         {0x16E5A, 0x16E7A,},
    2959             :         {0x16E5B, 0x16E7B,},
    2960             :         {0x16E5C, 0x16E7C,},
    2961             :         {0x16E5D, 0x16E7D,},
    2962             :         {0x16E5E, 0x16E7E,},
    2963             :         {0x16E5F, 0x16E7F,},
    2964             :         {0x1E900, 0x1E922,},
    2965             :         {0x1E901, 0x1E923,},
    2966             :         {0x1E902, 0x1E924,},
    2967             :         {0x1E903, 0x1E925,},
    2968             :         {0x1E904, 0x1E926,},
    2969             :         {0x1E905, 0x1E927,},
    2970             :         {0x1E906, 0x1E928,},
    2971             :         {0x1E907, 0x1E929,},
    2972             :         {0x1E908, 0x1E92A,},
    2973             :         {0x1E909, 0x1E92B,},
    2974             :         {0x1E90A, 0x1E92C,},
    2975             :         {0x1E90B, 0x1E92D,},
    2976             :         {0x1E90C, 0x1E92E,},
    2977             :         {0x1E90D, 0x1E92F,},
    2978             :         {0x1E90E, 0x1E930,},
    2979             :         {0x1E90F, 0x1E931,},
    2980             :         {0x1E910, 0x1E932,},
    2981             :         {0x1E911, 0x1E933,},
    2982             :         {0x1E912, 0x1E934,},
    2983             :         {0x1E913, 0x1E935,},
    2984             :         {0x1E914, 0x1E936,},
    2985             :         {0x1E915, 0x1E937,},
    2986             :         {0x1E916, 0x1E938,},
    2987             :         {0x1E917, 0x1E939,},
    2988             :         {0x1E918, 0x1E93A,},
    2989             :         {0x1E919, 0x1E93B,},
    2990             :         {0x1E91A, 0x1E93C,},
    2991             :         {0x1E91B, 0x1E93D,},
    2992             :         {0x1E91C, 0x1E93E,},
    2993             :         {0x1E91D, 0x1E93F,},
    2994             :         {0x1E91E, 0x1E940,},
    2995             :         {0x1E91F, 0x1E941,},
    2996             :         {0x1E920, 0x1E942,},
    2997             :         {0x1E921, 0x1E943,},
    2998             : };
    2999             : 
    3000             : static BAT *UTF8_toUpperFrom = NULL, *UTF8_toUpperTo = NULL,
    3001             :                 *UTF8_toLowerFrom = NULL, *UTF8_toLowerTo = NULL;
    3002             : 
    3003             : static str
    3004         336 : STRprelude(void)
    3005             : {
    3006         336 :         if (UTF8_toUpperFrom == NULL) {
    3007         336 :                 size_t i;
    3008             : 
    3009         336 :                 UTF8_toUpperFrom = COLnew(0, TYPE_int,
    3010             :                                                                   sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]),
    3011             :                                                                   SYSTRANS);
    3012         336 :                 UTF8_toUpperTo = COLnew(0, TYPE_int,
    3013             :                                                                 sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]),
    3014             :                                                                 SYSTRANS);
    3015         336 :                 UTF8_toLowerFrom = COLnew(0, TYPE_int,
    3016             :                                                                   sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]),
    3017             :                                                                   SYSTRANS);
    3018         336 :                 UTF8_toLowerTo = COLnew(0, TYPE_int,
    3019             :                                                                 sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]),
    3020             :                                                                 SYSTRANS);
    3021         336 :                 if (UTF8_toUpperFrom == NULL || UTF8_toUpperTo == NULL
    3022         336 :                         || UTF8_toLowerFrom == NULL || UTF8_toLowerTo == NULL) {
    3023           0 :                         goto bailout;
    3024             :                 }
    3025             : 
    3026         336 :                 int *fp = (int *) Tloc(UTF8_toUpperFrom, 0);
    3027         336 :                 int *tp = (int *) Tloc(UTF8_toUpperTo, 0);
    3028      487536 :                 for (i = 0; i < sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]); i++) {
    3029      487200 :                         fp[i] = UTF8_toUpper[i].from;
    3030      487200 :                         tp[i] = UTF8_toUpper[i].to;
    3031             :                 }
    3032         336 :                 BATsetcount(UTF8_toUpperFrom, i);
    3033         336 :                 UTF8_toUpperFrom->tkey = true;
    3034         336 :                 UTF8_toUpperFrom->tsorted = true;
    3035         336 :                 UTF8_toUpperFrom->trevsorted = false;
    3036         336 :                 UTF8_toUpperFrom->tnil = false;
    3037         336 :                 UTF8_toUpperFrom->tnonil = true;
    3038         336 :                 BATsetcount(UTF8_toUpperTo, i);
    3039         336 :                 UTF8_toUpperTo->tkey = false;
    3040         336 :                 UTF8_toUpperTo->tsorted = false;
    3041         336 :                 UTF8_toUpperTo->trevsorted = false;
    3042         336 :                 UTF8_toUpperTo->tnil = false;
    3043         336 :                 UTF8_toUpperTo->tnonil = true;
    3044             : 
    3045         336 :                 fp = (int *) Tloc(UTF8_toLowerFrom, 0);
    3046         336 :                 tp = (int *) Tloc(UTF8_toLowerTo, 0);
    3047      481824 :                 for (i = 0; i < sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]); i++) {
    3048      481488 :                         fp[i] = UTF8_toLower[i].from;
    3049      481488 :                         tp[i] = UTF8_toLower[i].to;
    3050             :                 }
    3051         336 :                 BATsetcount(UTF8_toLowerFrom, i);
    3052         336 :                 UTF8_toLowerFrom->tkey = true;
    3053         336 :                 UTF8_toLowerFrom->tsorted = true;
    3054         336 :                 UTF8_toLowerFrom->trevsorted = false;
    3055         336 :                 UTF8_toLowerFrom->tnil = false;
    3056         336 :                 UTF8_toLowerFrom->tnonil = true;
    3057         336 :                 BATsetcount(UTF8_toLowerTo, i);
    3058         336 :                 UTF8_toLowerTo->tkey = false;
    3059         336 :                 UTF8_toLowerTo->tsorted = false;
    3060         336 :                 UTF8_toLowerTo->trevsorted = false;
    3061         336 :                 UTF8_toLowerTo->tnil = false;
    3062         336 :                 UTF8_toLowerTo->tnonil = true;
    3063             : 
    3064         672 :                 if (BBPrename(UTF8_toUpperFrom, "monet_unicode_upper_from") != 0 ||
    3065         672 :                         BBPrename(UTF8_toUpperTo, "monet_unicode_upper_to") != 0 ||
    3066         672 :                         BBPrename(UTF8_toLowerFrom, "monet_unicode_lower_from") != 0 ||
    3067         336 :                         BBPrename(UTF8_toLowerTo, "monet_unicode_lower_to") != 0) {
    3068           0 :                         goto bailout;
    3069             :                 }
    3070         336 :                 BBP_pid(UTF8_toUpperFrom->batCacheid) = 0;
    3071         336 :                 BBP_pid(UTF8_toUpperTo->batCacheid) = 0;
    3072         336 :                 BBP_pid(UTF8_toLowerFrom->batCacheid) = 0;
    3073         336 :                 BBP_pid(UTF8_toLowerTo->batCacheid) = 0;
    3074             :         }
    3075             :         return MAL_SUCCEED;
    3076             : 
    3077           0 :   bailout:
    3078           0 :         BBPreclaim(UTF8_toUpperFrom);
    3079           0 :         BBPreclaim(UTF8_toUpperTo);
    3080           0 :         BBPreclaim(UTF8_toLowerFrom);
    3081           0 :         BBPreclaim(UTF8_toLowerTo);
    3082           0 :         UTF8_toUpperFrom = NULL;
    3083           0 :         UTF8_toUpperTo = NULL;
    3084           0 :         UTF8_toLowerFrom = NULL;
    3085           0 :         UTF8_toLowerTo = NULL;
    3086           0 :         throw(MAL, "str.prelude", GDK_EXCEPTION);
    3087             : }
    3088             : 
    3089             : static str
    3090         334 : STRepilogue(void *ret)
    3091             : {
    3092         334 :         (void) ret;
    3093         334 :         BBPreclaim(UTF8_toUpperFrom);
    3094         334 :         BBPreclaim(UTF8_toUpperTo);
    3095         334 :         BBPreclaim(UTF8_toLowerFrom);
    3096         334 :         BBPreclaim(UTF8_toLowerTo);
    3097         334 :         UTF8_toUpperFrom = NULL;
    3098         334 :         UTF8_toUpperTo = NULL;
    3099         334 :         UTF8_toLowerFrom = NULL;
    3100         334 :         UTF8_toLowerTo = NULL;
    3101         334 :         return MAL_SUCCEED;
    3102             : }
    3103             : 
    3104             : #ifndef NDEBUG
    3105             : static inline void
    3106    41995063 : UTF8_assert(const char *s)
    3107             : {
    3108    83990126 :         assert(strNil(s) || utf8valid(s) == 0);
    3109    41995063 : }
    3110             : #else
    3111             : #define UTF8_assert(s)          ((void) 0)
    3112             : #endif
    3113             : 
    3114             : /* return how many codepoints in the substring end in s starts */
    3115             : static inline int
    3116         723 : UTF8_strpos(const char *s, const char *end)
    3117             : {
    3118         723 :         UTF8_assert(s);
    3119             : 
    3120         722 :         if (s > end) {
    3121             :                 return -1;
    3122             :         }
    3123         722 :         return (int) utf8nlen(s, (size_t) (end - s));
    3124             : }
    3125             : 
    3126             : /* return a pointer to the byte that starts the pos'th (0-based)
    3127             :  * codepoint in s */
    3128             : static inline str
    3129     7159739 : UTF8_strtail(const char *s, int pos)
    3130             : {
    3131     7159739 :         UTF8_assert(s);
    3132   102702937 :         while (*s) {
    3133   102250957 :                 if ((*s & 0xC0) != 0x80) {
    3134   102250887 :                         if (pos <= 0)
    3135             :                                 break;
    3136    95543128 :                         pos--;
    3137             :                 }
    3138    95543198 :                 s++;
    3139             :         }
    3140     7653398 :         return (str) s;
    3141             : }
    3142             : 
    3143             : /* copy n Unicode codepoints from s to dst, return pointer to new end */
    3144             : static inline str
    3145         216 : UTF8_strncpy(char *restrict dst, const char *restrict s, int n)
    3146             : {
    3147         216 :         UTF8_assert(s);
    3148        1451 :         while (*s && n) {
    3149        1235 :                 if ((*s & 0xF8) == 0xF0) {
    3150             :                         /* 4 byte UTF-8 sequence */
    3151           0 :                         *dst++ = *s++;
    3152           0 :                         *dst++ = *s++;
    3153           0 :                         *dst++ = *s++;
    3154           0 :                         *dst++ = *s++;
    3155        1235 :                 } else if ((*s & 0xF0) == 0xE0) {
    3156             :                         /* 3 byte UTF-8 sequence */
    3157           6 :                         *dst++ = *s++;
    3158           6 :                         *dst++ = *s++;
    3159           6 :                         *dst++ = *s++;
    3160        1229 :                 } else if ((*s & 0xE0) == 0xC0) {
    3161             :                         /* 2 byte UTF-8 sequence */
    3162           0 :                         *dst++ = *s++;
    3163           0 :                         *dst++ = *s++;
    3164             :                 } else {
    3165             :                         /* 1 byte UTF-8 "sequence" */
    3166        1229 :                         *dst++ = *s++;
    3167             :                 }
    3168        1235 :                 n--;
    3169             :         }
    3170         216 :         *dst = '\0';
    3171         216 :         return dst;
    3172             : }
    3173             : 
    3174             : /* return number of Unicode codepoints in s; s is not nil */
    3175             : int
    3176    34811190 : UTF8_strlen(const char *s)
    3177             : {                                                               /* This function assumes s is never nil */
    3178    34811190 :         UTF8_assert(s);
    3179    69555262 :         assert(!strNil(s));
    3180             : 
    3181    34777631 :         return (int) utf8len(s);
    3182             : }
    3183             : 
    3184             : /* return (int) strlen(s); s is not nil */
    3185             : int
    3186        8961 : str_strlen(const char *s)
    3187             : {                                                               /* This function assumes s is never nil */
    3188        8961 :         UTF8_assert(s);
    3189       18062 :         assert(!strNil(s));
    3190             : 
    3191        9031 :         return (int) strlen(s);
    3192             : }
    3193             : 
    3194             : /* return the display width of s */
    3195             : int
    3196     5723019 : UTF8_strwidth(const char *s)
    3197             : {
    3198     5723019 :         int len = 0;
    3199     5723019 :         int c;
    3200     5723019 :         int n;
    3201             : 
    3202     5723019 :         if (strNil(s))
    3203      214261 :                 return int_nil;
    3204             :         c = 0;
    3205             :         n = 0;
    3206   141751468 :         while (*s != 0) {
    3207   136242710 :                 if ((*s & 0x80) == 0) {
    3208   136229514 :                         assert(n == 0);
    3209   136229514 :                         len++;
    3210   136229514 :                         n = 0;
    3211       13196 :                 } else if ((*s & 0xC0) == 0x80) {
    3212        8679 :                         c = (c << 6) | (*s & 0x3F);
    3213        8679 :                         if (--n == 0) {
    3214             :                                 /* last byte of a multi-byte character */
    3215        4517 :                                 len++;
    3216             :                                 /* this list was created by combining
    3217             :                                  * the code points marked as
    3218             :                                  * Emoji_Presentation in
    3219             :                                  * /usr/share/unicode/emoji/emoji-data.txt
    3220             :                                  * and code points marked either F or
    3221             :                                  * W in EastAsianWidth.txt; this list
    3222             :                                  * is up-to-date with Unicode 9.0 */
    3223        4517 :                                 if ((0x1100 <= c && c <= 0x115F) ||
    3224        4517 :                                         (0x231A <= c && c <= 0x231B) ||
    3225             :                                         (0x2329 <= c && c <= 0x232A) ||
    3226             :                                         (0x23E9 <= c && c <= 0x23EC) ||
    3227             :                                         c == 0x23F0 ||
    3228             :                                         c == 0x23F3 ||
    3229             :                                         (0x25FD <= c && c <= 0x25FE) ||
    3230             :                                         (0x2614 <= c && c <= 0x2615) ||
    3231             :                                         (0x2648 <= c && c <= 0x2653) ||
    3232             :                                         c == 0x267F ||
    3233             :                                         c == 0x2693 ||
    3234             :                                         c == 0x26A1 ||
    3235             :                                         (0x26AA <= c && c <= 0x26AB) ||
    3236             :                                         (0x26BD <= c && c <= 0x26BE) ||
    3237             :                                         (0x26C4 <= c && c <= 0x26C5) ||
    3238             :                                         c == 0x26CE ||
    3239             :                                         c == 0x26D4 ||
    3240             :                                         c == 0x26EA ||
    3241             :                                         (0x26F2 <= c && c <= 0x26F3) ||
    3242             :                                         c == 0x26F5 ||
    3243             :                                         c == 0x26FA ||
    3244             :                                         c == 0x26FD ||
    3245             :                                         c == 0x2705 ||
    3246             :                                         (0x270A <= c && c <= 0x270B) ||
    3247             :                                         c == 0x2728 ||
    3248             :                                         c == 0x274C ||
    3249             :                                         c == 0x274E ||
    3250             :                                         (0x2753 <= c && c <= 0x2755) ||
    3251             :                                         c == 0x2757 ||
    3252             :                                         (0x2795 <= c && c <= 0x2797) ||
    3253             :                                         c == 0x27B0 ||
    3254             :                                         c == 0x27BF ||
    3255             :                                         (0x2B1B <= c && c <= 0x2B1C) ||
    3256             :                                         c == 0x2B50 ||
    3257             :                                         c == 0x2B55 ||
    3258             :                                         (0x2E80 <= c && c <= 0x2E99) ||
    3259             :                                         (0x2E9B <= c && c <= 0x2EF3) ||
    3260             :                                         (0x2F00 <= c && c <= 0x2FD5) ||
    3261             :                                         (0x2FF0 <= c && c <= 0x2FFB) ||
    3262             :                                         (0x3000 <= c && c <= 0x303E) ||
    3263             :                                         (0x3041 <= c && c <= 0x3096) ||
    3264             :                                         (0x3099 <= c && c <= 0x30FF) ||
    3265             :                                         (0x3105 <= c && c <= 0x312D) ||
    3266             :                                         (0x3131 <= c && c <= 0x318E) ||
    3267             :                                         (0x3190 <= c && c <= 0x31BA) ||
    3268             :                                         (0x31C0 <= c && c <= 0x31E3) ||
    3269             :                                         (0x31F0 <= c && c <= 0x321E) ||
    3270             :                                         (0x3220 <= c && c <= 0x3247) ||
    3271             :                                         (0x3250 <= c && c <= 0x32FE) ||
    3272             :                                         (0x3300 <= c && c <= 0x4DBF) ||
    3273             :                                         (0x4E00 <= c && c <= 0xA48C) ||
    3274             :                                         (0xA490 <= c && c <= 0xA4C6) ||
    3275             :                                         (0xA960 <= c && c <= 0xA97C) ||
    3276             :                                         (0xAC00 <= c && c <= 0xD7A3) ||
    3277             :                                         (0xF900 <= c && c <= 0xFAFF) ||
    3278             :                                         (0xFE10 <= c && c <= 0xFE19) ||
    3279             :                                         (0xFE30 <= c && c <= 0xFE52) ||
    3280             :                                         (0xFE54 <= c && c <= 0xFE66) ||
    3281             :                                         (0xFE68 <= c && c <= 0xFE6B) ||
    3282             :                                         (0xFF01 <= c && c <= 0xFF60) ||
    3283             :                                         (0xFFE0 <= c && c <= 0xFFE6) ||
    3284             :                                         c == 0x16FE0 ||
    3285             :                                         (0x17000 <= c && c <= 0x187EC) ||
    3286             :                                         (0x18800 <= c && c <= 0x18AF2) ||
    3287             :                                         (0x1B000 <= c && c <= 0x1B001) ||
    3288             :                                         c == 0x1F004 ||
    3289             :                                         c == 0x1F0CF ||
    3290             :                                         c == 0x1F18E || (0x1F191 <= c && c <= 0x1F19A) ||
    3291             :                                         /* removed 0x1F1E6..0x1F1FF */
    3292             :                                         (0x1F200 <= c && c <= 0x1F202) ||
    3293             :                                         (0x1F210 <= c && c <= 0x1F23B) ||
    3294             :                                         (0x1F240 <= c && c <= 0x1F248) ||
    3295             :                                         (0x1F250 <= c && c <= 0x1F251) ||
    3296             :                                         (0x1F300 <= c && c <= 0x1F320) ||
    3297             :                                         (0x1F32D <= c && c <= 0x1F335) ||
    3298             :                                         (0x1F337 <= c && c <= 0x1F37C) ||
    3299             :                                         (0x1F37E <= c && c <= 0x1F393) ||
    3300             :                                         (0x1F3A0 <= c && c <= 0x1F3CA) ||
    3301             :                                         (0x1F3CF <= c && c <= 0x1F3D3) ||
    3302             :                                         (0x1F3E0 <= c && c <= 0x1F3F0) ||
    3303             :                                         c == 0x1F3F4 ||
    3304             :                                         (0x1F3F8 <= c && c <= 0x1F43E) ||
    3305             :                                         c == 0x1F440 ||
    3306             :                                         (0x1F442 <= c && c <= 0x1F4FC) ||
    3307             :                                         (0x1F4FF <= c && c <= 0x1F53D) ||
    3308             :                                         (0x1F54B <= c && c <= 0x1F54E) ||
    3309             :                                         (0x1F550 <= c && c <= 0x1F567) ||
    3310             :                                         c == 0x1F57A ||
    3311             :                                         (0x1F595 <= c && c <= 0x1F596) ||
    3312             :                                         c == 0x1F5A4 ||
    3313             :                                         (0x1F5FB <= c && c <= 0x1F64F) ||
    3314             :                                         (0x1F680 <= c && c <= 0x1F6C5) ||
    3315             :                                         c == 0x1F6CC ||
    3316             :                                         (0x1F6D0 <= c && c <= 0x1F6D2) ||
    3317             :                                         (0x1F6EB <= c && c <= 0x1F6EC) ||
    3318             :                                         (0x1F6F4 <= c && c <= 0x1F6F6) ||
    3319             :                                         (0x1F910 <= c && c <= 0x1F91E) ||
    3320             :                                         (0x1F920 <= c && c <= 0x1F927) ||
    3321             :                                         c == 0x1F930 ||
    3322             :                                         (0x1F933 <= c && c <= 0x1F93E) ||
    3323             :                                         (0x1F940 <= c && c <= 0x1F94B) ||
    3324             :                                         (0x1F950 <= c && c <= 0x1F95E) ||
    3325             :                                         (0x1F980 <= c && c <= 0x1F991) ||
    3326             :                                         c == 0x1F9C0 ||
    3327             :                                         (0x20000 <= c && c <= 0x2FFFD) ||
    3328             :                                         (0x30000 <= c && c <= 0x3FFFD))
    3329          84 :                                         len++;
    3330             :                         }
    3331        4517 :                 } else if ((*s & 0xE0) == 0xC0) {
    3332         402 :                         assert(n == 0);
    3333         402 :                         n = 1;
    3334         402 :                         c = *s & 0x1F;
    3335        4115 :                 } else if ((*s & 0xF0) == 0xE0) {
    3336        4068 :                         assert(n == 0);
    3337        4068 :                         n = 2;
    3338        4068 :                         c = *s & 0x0F;
    3339          47 :                 } else if ((*s & 0xF8) == 0xF0) {
    3340          47 :                         assert(n == 0);
    3341          47 :                         n = 3;
    3342          47 :                         c = *s & 0x07;
    3343           0 :                 } else if ((*s & 0xFC) == 0xF8) {
    3344           0 :                         assert(n == 0);
    3345           0 :                         n = 4;
    3346           0 :                         c = *s & 0x03;
    3347             :                 } else {
    3348           0 :                         assert(0);
    3349             :                         n = 0;
    3350             :                 }
    3351   136242710 :                 s++;
    3352             :         }
    3353             :         return len;
    3354             : }
    3355             : 
    3356             : str
    3357      108615 : str_case_hash_lock(bool upper)
    3358             : {
    3359      108615 :         BAT *b = upper ? UTF8_toUpperFrom : UTF8_toLowerFrom;
    3360             : 
    3361      108615 :         if (BAThash(b) != GDK_SUCCEED)
    3362           0 :                 throw(MAL, "str.str_case_hash_lock", GDK_EXCEPTION);
    3363      108615 :         MT_rwlock_rdlock(&b->thashlock);
    3364      108615 :         if (b->thash)
    3365             :                 return MAL_SUCCEED;
    3366           0 :         MT_rwlock_rdunlock(&b->thashlock);
    3367           0 :         throw(MAL, "str.str_case_hash_lock", "Lost hash");
    3368             : }
    3369             : 
    3370             : void
    3371      108615 : str_case_hash_unlock(bool upper)
    3372             : {
    3373          90 :         BAT *b = upper ? UTF8_toUpperFrom : UTF8_toLowerFrom;
    3374          90 :         MT_rwlock_rdunlock(&b->thashlock);
    3375          90 : }
    3376             : 
    3377             : static inline str
    3378      289684 : convertCase(BAT *from, BAT *to, str *buf, size_t *buflen, const char *src,
    3379             :                         const char *malfunc)
    3380             : {
    3381      289684 :         size_t len = strlen(src);
    3382      289684 :         char *dst;
    3383      289684 :         const char *end = src + len;
    3384      289684 :         bool lower_to_upper = from == UTF8_toUpperFrom;
    3385      289684 :         const Hash *h = from->thash;
    3386      289684 :         const int *restrict fromb = (const int *restrict) from->theap->base;
    3387      289684 :         const int *restrict tob = (const int *restrict) to->theap->base;
    3388             : 
    3389             :         /* the from and to bats are not views */
    3390      289684 :         assert(from->tbaseoff == 0);
    3391      289684 :         assert(to->tbaseoff == 0);
    3392      289684 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, len + 1, malfunc);
    3393      289684 :         dst = *buf;
    3394     4183832 :         while (src < end) {
    3395     3894245 :                 int c;
    3396             : 
    3397     3894245 :                 UTF8_GETCHAR(c, src);
    3398     3894245 :                 if (c < 192) {                       /* the first 191 characters in unicode are trivial to convert */
    3399             :                         /* for ASCII characters we don't need to do a hash lookup */
    3400     3894176 :                         if (lower_to_upper) {
    3401     3183181 :                                 if ('a' <= c && c <= 'z')
    3402     2492153 :                                         c += 'A' - 'a';
    3403             :                         } else {
    3404      710995 :                                 if ('A' <= c && c <= 'Z')
    3405      670620 :                                         c += 'a' - 'A';
    3406             :                         }
    3407             :                 } else {
    3408             :                         /* use hash, even though BAT is sorted */
    3409          72 :                         for (BUN hb = HASHget(h, hash_int(h, &c));
    3410         114 :                                  hb != BUN_NONE; hb = HASHgetlink(h, hb)) {
    3411          62 :                                 if (c == fromb[hb]) {
    3412          17 :                                         c = tob[hb];
    3413          17 :                                         break;
    3414             :                                 }
    3415             :                         }
    3416             :                 }
    3417     3894245 :                 if (dst + UTF8_CHARLEN(c) > *buf + len) {
    3418             :                         /* doesn't fit, so allocate more space;
    3419             :                          * also allocate enough for the rest of the
    3420             :                          * source */
    3421          97 :                         size_t off = dst - *buf;
    3422          97 :                         size_t nextlen = (len += 4 + (end - src)) + 1;
    3423             : 
    3424             :                         /* Don't use CHECK_STR_BUFFER_LENGTH here, because it
    3425             :                          * does GDKmalloc instead of GDKrealloc and data could be lost */
    3426          97 :                         if (nextlen > *buflen) {
    3427          97 :                                 size_t newlen = ((nextlen + 1023) & ~1023); /* align to a multiple of 1024 bytes */
    3428          97 :                                 str newbuf = GDKrealloc(*buf, newlen);
    3429           0 :                                 if (!newbuf)
    3430           0 :                                         throw(MAL, malfunc, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3431           0 :                                 *buf = newbuf;
    3432           0 :                                 *buflen = newlen;
    3433             :                         }
    3434           0 :                         dst = *buf + off;
    3435             :                 }
    3436     3894148 :                 UTF8_PUTCHAR(c, dst);
    3437             :         }
    3438      289587 :         *dst = 0;
    3439      289587 :         return MAL_SUCCEED;
    3440           0 :   illegal:
    3441           0 :         throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
    3442             : }
    3443             : 
    3444             : /*
    3445             :  * Here you find the wrappers around the version 4 library code
    3446             :  * It also contains the direct implementation of the string
    3447             :  * matching support routines.
    3448             :  */
    3449             : #include "mal_exception.h"
    3450             : 
    3451             : /*
    3452             :  * The SQL like function return a boolean
    3453             :  */
    3454             : static bool
    3455           0 : STRlike(const char *s, const char *pat, const char *esc)
    3456             : {
    3457           0 :         const char *t, *p;
    3458             : 
    3459           0 :         t = s;
    3460           0 :         for (p = pat; *p && *t; p++) {
    3461           0 :                 if (esc && *p == *esc) {
    3462           0 :                         p++;
    3463           0 :                         if (*p != *t)
    3464             :                                 return false;
    3465           0 :                         t++;
    3466           0 :                 } else if (*p == '_')
    3467           0 :                         t++;
    3468           0 :                 else if (*p == '%') {
    3469           0 :                         p++;
    3470           0 :                         while (*p == '%')
    3471           0 :                                 p++;
    3472           0 :                         if (*p == 0)
    3473             :                                 return true;    /* tail is acceptable */
    3474           0 :                         for (; *p && *t; t++)
    3475           0 :                                 if (STRlike(t, p, esc))
    3476             :                                         return true;
    3477           0 :                         if (*p == 0 && *t == 0)
    3478             :                                 return true;
    3479             :                         return false;
    3480           0 :                 } else if (*p == *t)
    3481           0 :                         t++;
    3482             :                 else
    3483             :                         return false;
    3484             :         }
    3485           0 :         if (*p == '%' && *(p + 1) == 0)
    3486             :                 return true;
    3487           0 :         return *t == 0 && *p == 0;
    3488             : }
    3489             : 
    3490             : static str
    3491           0 : STRlikewrap3(bit *ret, const str *s, const str *pat, const str *esc)
    3492             : {
    3493           0 :         if (strNil(*s) || strNil(*pat) || strNil(*esc))
    3494           0 :                 *ret = bit_nil;
    3495             :         else
    3496           0 :                 *ret = (bit) STRlike(*s, *pat, *esc);
    3497           0 :         return MAL_SUCCEED;
    3498             : }
    3499             : 
    3500             : static str
    3501           0 : STRlikewrap(bit *ret, const str *s, const str *pat)
    3502             : {
    3503           0 :         if (strNil(*s) || strNil(*pat))
    3504           0 :                 *ret = bit_nil;
    3505             :         else
    3506           0 :                 *ret = (bit) STRlike(*s, *pat, NULL);
    3507           0 :         return MAL_SUCCEED;
    3508             : }
    3509             : 
    3510             : static str
    3511           0 : STRtostr(str *res, const str *src)
    3512             : {
    3513           0 :         if (*src == 0)
    3514           0 :                 *res = GDKstrdup(str_nil);
    3515             :         else
    3516           0 :                 *res = GDKstrdup(*src);
    3517           0 :         if (*res == NULL)
    3518           0 :                 throw(MAL, "str.str", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3519             :         return MAL_SUCCEED;
    3520             : }
    3521             : 
    3522             : static str
    3523          91 : STRLength(int *res, const str *arg1)
    3524             : {
    3525          91 :         const char *s = *arg1;
    3526             : 
    3527         182 :         *res = strNil(s) ? int_nil : UTF8_strlen(s);
    3528          91 :         return MAL_SUCCEED;
    3529             : }
    3530             : 
    3531             : static str
    3532           3 : STRBytes(int *res, const str *arg1)
    3533             : {
    3534           3 :         const char *s = *arg1;
    3535             : 
    3536           6 :         *res = strNil(s) ? int_nil : str_strlen(s);
    3537           3 :         return MAL_SUCCEED;
    3538             : }
    3539             : 
    3540             : str
    3541        4262 : str_tail(str *buf, size_t *buflen, const char *s, int off)
    3542             : {
    3543        4262 :         if (off < 0) {
    3544           1 :                 off += UTF8_strlen(s);
    3545           1 :                 if (off < 0)
    3546             :                         off = 0;
    3547             :         }
    3548        4262 :         char *tail = UTF8_strtail(s, off);
    3549        4262 :         size_t nextlen = strlen(tail) + 1;
    3550        4262 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.tail");
    3551        4262 :         strcpy(*buf, tail);
    3552        4262 :         return MAL_SUCCEED;
    3553             : }
    3554             : 
    3555             : static str
    3556           1 : STRTail(str *res, const str *arg1, const int *offset)
    3557             : {
    3558           1 :         str buf = NULL, msg = MAL_SUCCEED;
    3559           1 :         const char *s = *arg1;
    3560           1 :         int off = *offset;
    3561             : 
    3562           2 :         if (strNil(s) || is_int_nil(off)) {
    3563           0 :                 *res = GDKstrdup(str_nil);
    3564             :         } else {
    3565           1 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    3566             : 
    3567           1 :                 *res = NULL;
    3568           1 :                 if (!(buf = GDKmalloc(buflen)))
    3569           0 :                         throw(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3570           1 :                 if ((msg = str_tail(&buf, &buflen, s, off)) != MAL_SUCCEED) {
    3571           0 :                         GDKfree(buf);
    3572           0 :                         return msg;
    3573             :                 }
    3574           1 :                 *res = GDKstrdup(buf);
    3575             :         }
    3576             : 
    3577           1 :         GDKfree(buf);
    3578           1 :         if (!*res)
    3579           0 :                 msg = createException(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3580             :         return msg;
    3581             : }
    3582             : 
    3583             : /* copy the substring s[off:off+l] into *buf, replacing *buf with a
    3584             :  * freshly allocated buffer if the substring doesn't fit; off is 0
    3585             :  * based, and both off and l count in Unicode codepoints (i.e. not
    3586             :  * bytes); if off < 0, off counts from the end of the string */
    3587             : str
    3588     3901532 : str_Sub_String(str *buf, size_t *buflen, const char *s, int off, int l)
    3589             : {
    3590     3901532 :         size_t len;
    3591             : 
    3592     3901532 :         if (off < 0) {
    3593           4 :                 off += UTF8_strlen(s);
    3594           4 :                 if (off < 0) {
    3595           3 :                         l += off;
    3596           3 :                         off = 0;
    3597             :                 }
    3598             :         }
    3599             :         /* here, off >= 0 */
    3600     3901532 :         if (l < 0) {
    3601        1228 :                 strcpy(*buf, "");
    3602        1228 :                 return MAL_SUCCEED;
    3603             :         }
    3604     3900304 :         s = UTF8_strtail(s, off);
    3605     3918215 :         len = (size_t) (UTF8_strtail(s, l) - s + 1);
    3606     3916163 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.substring");
    3607     3916163 :         strcpy_len(*buf, s, len);
    3608     3916163 :         return MAL_SUCCEED;
    3609             : }
    3610             : 
    3611             : static str
    3612           4 : STRSubString(str *res, const str *arg1, const int *offset, const int *length)
    3613             : {
    3614           4 :         str buf = NULL, msg = MAL_SUCCEED;
    3615           4 :         const char *s = *arg1;
    3616           4 :         int off = *offset, len = *length;
    3617             : 
    3618           7 :         if (strNil(s) || is_int_nil(off) || is_int_nil(len)) {
    3619           1 :                 *res = GDKstrdup(str_nil);
    3620             :         } else {
    3621           3 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    3622             : 
    3623           3 :                 *res = NULL;
    3624           3 :                 if (!(buf = GDKmalloc(buflen)))
    3625           0 :                         throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3626           3 :                 if ((msg = str_Sub_String(&buf, &buflen, s, off, len)) != MAL_SUCCEED) {
    3627           0 :                         GDKfree(buf);
    3628           0 :                         return msg;
    3629             :                 }
    3630           3 :                 *res = GDKstrdup(buf);
    3631             :         }
    3632             : 
    3633           4 :         GDKfree(buf);
    3634           4 :         if (!*res)
    3635           0 :                 msg = createException(MAL, "str.substring",
    3636             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3637             :         return msg;
    3638             : }
    3639             : 
    3640             : str
    3641           4 : str_from_wchr(str *buf, size_t *buflen, int c)
    3642             : {
    3643           4 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, 5, "str.unicode");
    3644           4 :         str s = *buf;
    3645           4 :         UTF8_PUTCHAR(c, s);
    3646           4 :         *s = 0;
    3647           4 :         return MAL_SUCCEED;
    3648           0 :   illegal:
    3649           0 :         throw(MAL, "str.unicode", SQLSTATE(42000) "Illegal Unicode code point");
    3650             : }
    3651             : 
    3652             : static str
    3653           2 : STRFromWChr(str *res, const int *c)
    3654             : {
    3655           2 :         str buf = NULL, msg = MAL_SUCCEED;
    3656           2 :         int cc = *c;
    3657             : 
    3658           2 :         if (is_int_nil(cc)) {
    3659           0 :                 *res = GDKstrdup(str_nil);
    3660             :         } else {
    3661           2 :                 size_t buflen = MAX(strlen(str_nil) + 1, 8);
    3662             : 
    3663           2 :                 *res = NULL;
    3664           2 :                 if (!(buf = GDKmalloc(buflen)))
    3665           0 :                         throw(MAL, "str.unicode", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3666           2 :                 if ((msg = str_from_wchr(&buf, &buflen, cc)) != MAL_SUCCEED) {
    3667           0 :                         GDKfree(buf);
    3668           0 :                         return msg;
    3669             :                 }
    3670           2 :                 *res = GDKstrdup(buf);
    3671             :         }
    3672             : 
    3673           2 :         GDKfree(buf);
    3674           2 :         if (!*res)
    3675           0 :                 msg = createException(MAL, "str.unicode",
    3676             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3677             :         return msg;
    3678             : }
    3679             : 
    3680             : /* return the Unicode code point of arg1 at position at */
    3681             : str
    3682          31 : str_wchr_at(int *res, const char *s, int at)
    3683             : {
    3684             :         /* 64bit: should have lng arg */
    3685          60 :         if (strNil(s) || is_int_nil(at) || at < 0) {
    3686           2 :                 *res = int_nil;
    3687           2 :                 return MAL_SUCCEED;
    3688             :         }
    3689          29 :         s = UTF8_strtail(s, at);
    3690          29 :         if (s == NULL || *s == 0) {
    3691           6 :                 *res = int_nil;
    3692           6 :                 return MAL_SUCCEED;
    3693             :         }
    3694          23 :         UTF8_GETCHAR(*res, s);
    3695             :         return MAL_SUCCEED;
    3696           0 :   illegal:
    3697           0 :         throw(MAL, "str.unicodeAt", SQLSTATE(42000) "Illegal Unicode code point");
    3698             : }
    3699             : 
    3700             : static str
    3701           0 : STRWChrAt(int *res, const str *arg1, const int *at)
    3702             : {
    3703           0 :         return str_wchr_at(res, *arg1, *at);
    3704             : }
    3705             : 
    3706             : str
    3707       88451 : str_lower(str *buf, size_t *buflen, const char *s)
    3708             : {
    3709       88451 :         return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s,
    3710             :                                            "str.lower");
    3711             : }
    3712             : 
    3713             : static inline str
    3714        4612 : STRlower(str *res, const str *arg1)
    3715             : {
    3716        4612 :         str buf = NULL, msg = MAL_SUCCEED;
    3717        4612 :         const char *s = *arg1;
    3718             : 
    3719        4612 :         if (strNil(s)) {
    3720         312 :                 *res = GDKstrdup(str_nil);
    3721             :         } else {
    3722        4300 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    3723             : 
    3724        4300 :                 *res = NULL;
    3725        4300 :                 if (!(buf = GDKmalloc(buflen)))
    3726           0 :                         throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3727        4300 :                 if ((msg = str_case_hash_lock(false))) {
    3728           0 :                         GDKfree(buf);
    3729           0 :                         return msg;
    3730             :                 }
    3731        4300 :                 msg = str_lower(&buf, &buflen, s);
    3732        4300 :                 str_case_hash_unlock(false);
    3733        4300 :                 if (msg != MAL_SUCCEED) {
    3734           0 :                         GDKfree(buf);
    3735           0 :                         return msg;
    3736             :                 }
    3737        4300 :                 *res = GDKstrdup(buf);
    3738             :         }
    3739             : 
    3740        4612 :         GDKfree(buf);
    3741        4612 :         if (!*res)
    3742           0 :                 msg = createException(MAL, "str.lower",
    3743             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3744             :         return msg;
    3745             : }
    3746             : 
    3747             : str
    3748      201232 : str_upper(str *buf, size_t *buflen, const char *s)
    3749             : {
    3750      201232 :         return convertCase(UTF8_toUpperFrom, UTF8_toUpperTo, buf, buflen, s,
    3751             :                                            "str.upper");
    3752             : }
    3753             : 
    3754             : static str
    3755      104493 : STRupper(str *res, const str *arg1)
    3756             : {
    3757      104493 :         str buf = NULL, msg = MAL_SUCCEED;
    3758      104493 :         const char *s = *arg1;
    3759             : 
    3760      104493 :         if (strNil(s)) {
    3761         268 :                 *res = GDKstrdup(str_nil);
    3762             :         } else {
    3763      104225 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    3764             : 
    3765      104225 :                 *res = NULL;
    3766      104225 :                 if (!(buf = GDKmalloc(buflen)))
    3767           0 :                         throw(MAL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3768      104225 :                 if ((msg = str_case_hash_lock(true))) {
    3769           0 :                         GDKfree(buf);
    3770           0 :                         return msg;
    3771             :                 }
    3772      104225 :                 msg = str_upper(&buf, &buflen, s);
    3773      104225 :                 str_case_hash_unlock(true);
    3774      104225 :                 if (msg != MAL_SUCCEED) {
    3775           0 :                         GDKfree(buf);
    3776           0 :                         return msg;
    3777             :                 }
    3778      104225 :                 *res = GDKstrdup(buf);
    3779             :         }
    3780             : 
    3781      104493 :         GDKfree(buf);
    3782      104493 :         if (!*res)
    3783           0 :                 msg = createException(MAL, "str.upper",
    3784             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    3785             :         return msg;
    3786             : }
    3787             : 
    3788             : /* returns whether arg1 starts with arg2 */
    3789             : int
    3790        1368 : str_is_prefix(const char *s, const char *prefix, int plen)
    3791             : {
    3792        1368 :         return strncmp(s, prefix, plen);
    3793             : }
    3794             : 
    3795             : int
    3796          65 : str_is_iprefix(const char *s, const char *prefix, int plen)
    3797             : {
    3798          65 :         return utf8ncasecmp(s, prefix, plen);
    3799             : }
    3800             : 
    3801             : int
    3802        2360 : str_is_suffix(const char *s, const char *suffix, int sul)
    3803             : {
    3804        2360 :         int sl = str_strlen(s);
    3805             : 
    3806        2360 :         if (sl < sul)
    3807             :                 return -1;
    3808             :         else
    3809        2347 :                 return strcmp(s + sl - sul, suffix);
    3810             : }
    3811             : 
    3812             : /* case insensitive endswith check */
    3813             : int
    3814         120 : str_is_isuffix(const char *s, const char *suffix, int sul)
    3815             : {
    3816         120 :         const char *e = s + strlen(s);
    3817         120 :         const char *sf;
    3818             : 
    3819         120 :         (void) sul;
    3820             :         /* note that the uppercase and lowercase forms of a character aren't
    3821             :          * necessarily the same length in their UTF-8 encodings */
    3822         878 :         for (sf = suffix; *sf && e > s; sf++) {
    3823         758 :                 if ((*sf & 0xC0) != 0x80) {
    3824         764 :                         while ((*--e & 0xC0) == 0x80)
    3825             :                                 ;
    3826             :                 }
    3827             :         }
    3828         122 :         while ((*sf & 0xC0) == 0x80)
    3829           2 :                 sf++;
    3830         120 :         return *sf != 0 || utf8casecmp(e, suffix) != 0;
    3831             : }
    3832             : 
    3833             : int
    3834       14650 : str_contains(const char *h, const char *n, int nlen)
    3835             : {
    3836       14650 :         (void) nlen;
    3837       14650 :         return strstr(h, n) == NULL;
    3838             : }
    3839             : 
    3840             : int
    3841         164 : str_icontains(const char *h, const char *n, int nlen)
    3842             : {
    3843         164 :         (void) nlen;
    3844         164 :         return utf8casestr(h, n) == NULL;
    3845             : }
    3846             : 
    3847             : #define STR_MAPARGS(STK, PCI, R, S1, S2, ICASE)                                                 \
    3848             :         do{                                                                                                                                     \
    3849             :                 R = getArgReference(STK, PCI, 0);                                                               \
    3850             :                 S1 = *getArgReference_str(STK, PCI, 1);                                                 \
    3851             :                 S2 = *getArgReference_str(STK, PCI, 2);                                                 \
    3852             :                 icase = PCI->argc == 4 && *getArgReference_bit(STK, PCI, 3); \
    3853             :         } while(0)
    3854             : 
    3855             : static str
    3856          16 : STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    3857             : {
    3858          16 :         (void) cntxt;
    3859          16 :         (void) mb;
    3860             : 
    3861          16 :         str s1, s2;
    3862          16 :         bit *r, icase;
    3863             : 
    3864          16 :         STR_MAPARGS(stk, pci, r, s1, s2, icase);
    3865             : 
    3866          31 :         if (strNil(s1) || strNil(s2)) {
    3867           2 :                 *r = bit_nil;
    3868             :         } else {
    3869          14 :                 int s2_len = str_strlen(s2);
    3870          28 :                 *r = icase ?
    3871           5 :                         str_is_iprefix(s1, s2, s2_len) == 0 :
    3872           9 :                         str_is_prefix(s1, s2, s2_len) == 0;
    3873             :         }
    3874          16 :         return MAL_SUCCEED;
    3875             : }
    3876             : 
    3877             : static str
    3878          13 : STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    3879             : {
    3880          13 :         (void) cntxt;
    3881          13 :         (void) mb;
    3882             : 
    3883          13 :         str s1, s2;
    3884          13 :         bit *r, icase;
    3885             : 
    3886          13 :         STR_MAPARGS(stk, pci, r, s1, s2, icase);
    3887             : 
    3888          25 :         if (strNil(s1) || strNil(s2)) {
    3889           2 :                 *r = bit_nil;
    3890             :         } else {
    3891          11 :                 int s2_len = str_strlen(s2);
    3892          22 :                 *r = icase ?
    3893           4 :                         str_is_isuffix(s1, s2, s2_len) == 0 :
    3894           7 :                         str_is_suffix(s1, s2, s2_len) == 0;
    3895             :         }
    3896          13 :         return MAL_SUCCEED;
    3897             : }
    3898             : 
    3899             : /* returns whether haystack contains needle */
    3900             : static str
    3901          15 : STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    3902             : {
    3903          15 :         (void) cntxt;
    3904          15 :         (void) mb;
    3905             : 
    3906          15 :         str s1, s2;
    3907          15 :         bit *r, icase;
    3908             : 
    3909          15 :         STR_MAPARGS(stk, pci, r, s1, s2, icase);
    3910             : 
    3911          29 :         if (strNil(s1) || strNil(s2)) {
    3912           2 :                 *r = bit_nil;
    3913             :         } else {
    3914          13 :                 int s2_len = str_strlen(s2);
    3915          26 :                 *r = icase ?
    3916           5 :                         str_icontains(s1, s2, s2_len) == 0 :
    3917           8 :                         str_contains(s1, s2, s2_len) == 0;
    3918             :         }
    3919          15 :         return MAL_SUCCEED;
    3920             : }
    3921             : 
    3922             : int
    3923        4261 : str_search(const char *s, const char *s2, int slen)
    3924             : {
    3925        4261 :         (void) slen;
    3926             :         /* 64bit: should return lng */
    3927        4261 :         if ((s2 = strstr(s, s2)) != NULL)
    3928         723 :                 return UTF8_strpos(s, s2);
    3929             :         else
    3930             :                 return -1;
    3931             : }
    3932             : 
    3933             : int
    3934           0 : str_isearch(const char *s, const char *s2, int slen)
    3935             : {
    3936           0 :         (void) slen;
    3937             :         /* 64bit: should return lng */
    3938           0 :         if ((s2 = utf8casestr(s, s2)) != NULL)
    3939           0 :                 return UTF8_strpos(s, s2);
    3940             :         else
    3941             :                 return -1;
    3942             : }
    3943             : 
    3944             : /* find first occurrence of needle in haystack */
    3945             : static str
    3946           0 : STRstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    3947             : {
    3948           0 :         (void) cntxt;
    3949           0 :         (void) mb;
    3950           0 :         bit *res = getArgReference(stk, pci, 0);
    3951           0 :         const str *haystack = getArgReference(stk, pci, 1),
    3952           0 :                 *needle = getArgReference(stk, pci, 2);
    3953           0 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
    3954           0 :         str s = *haystack, h = *needle, msg = MAL_SUCCEED;
    3955           0 :         if (strNil(s) || strNil(h)) {
    3956           0 :                 *res = bit_nil;
    3957             :         } else {
    3958           0 :                 int needle_len = str_strlen(h);
    3959             : 
    3960           0 :                 *res = icase ?
    3961           0 :                         str_isearch(s, h, needle_len) :
    3962           0 :                         str_search(s, h, needle_len);
    3963             :         }
    3964           0 :         return msg;
    3965             : }
    3966             : 
    3967             : int
    3968           0 : str_reverse_str_search(const char *s, const char *s2, int slen)
    3969             : {
    3970             :         /* 64bit: should return lng */
    3971           0 :         int len = str_strlen(s);
    3972           0 :         int res = -1;                           /* changed if found */
    3973             : 
    3974           0 :         if (len >= slen) {
    3975           0 :                 const char *p = s + len - slen;
    3976           0 :                 do {
    3977           0 :                         if (strncmp(p, s2, slen) == 0) {
    3978           0 :                                 res = UTF8_strpos(s, p);
    3979           0 :                                 break;
    3980             :                         }
    3981           0 :                 } while (p-- > s);
    3982             :         }
    3983           0 :         return res;
    3984             : }
    3985             : 
    3986             : int
    3987           0 : str_reverse_str_isearch(const char *s, const char *s2, int slen)
    3988             : {
    3989             :         /* 64bit: should return lng */
    3990           0 :         int len = str_strlen(s);
    3991           0 :         int res = -1;                           /* changed if found */
    3992             : 
    3993           0 :         if (len >= slen) {
    3994           0 :                 const char *p = s + len - slen;
    3995           0 :                 do {
    3996           0 :                         if (utf8ncasecmp(p, s2, slen) == 0) {
    3997           0 :                                 res = UTF8_strpos(s, p);
    3998           0 :                                 break;
    3999             :                         }
    4000           0 :                 } while (p-- > s);
    4001             :         }
    4002           0 :         return res;
    4003             : }
    4004             : 
    4005             : /* find last occurrence of arg2 in arg1 */
    4006             : static str
    4007           0 : STRrevstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    4008             : {
    4009           0 :         (void) cntxt;
    4010           0 :         (void) mb;
    4011           0 :         bit *res = getArgReference(stk, pci, 0);
    4012           0 :         const str *haystack = getArgReference(stk, pci, 1);
    4013           0 :         const str *needle = getArgReference(stk, pci, 2);
    4014           0 :         bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
    4015           0 :         str s = *haystack, h = *needle, msg = MAL_SUCCEED;
    4016           0 :         if (strNil(s) || strNil(h)) {
    4017           0 :                 *res = bit_nil;
    4018             :         } else {
    4019           0 :                 int needle_len = str_strlen(h);
    4020             : 
    4021           0 :                 *res = icase ?
    4022           0 :                         str_reverse_str_isearch(s, h, needle_len) :
    4023           0 :                         str_reverse_str_search(s, h, needle_len);
    4024             :         }
    4025           0 :         return msg;
    4026             : }
    4027             : 
    4028             : str
    4029          37 : str_splitpart(str *buf, size_t *buflen, const char *s, const char *s2, int f)
    4030             : {
    4031          37 :         size_t len;
    4032          37 :         char *p = NULL;
    4033             : 
    4034          37 :         if (f <= 0)
    4035           4 :                 throw(MAL, "str.splitpart",
    4036             :                           SQLSTATE(42000) "field position must be greater than zero");
    4037             : 
    4038          33 :         len = strlen(s2);
    4039          33 :         if (len) {
    4040          42 :                 while ((p = strstr(s, s2)) != NULL && f > 1) {
    4041          13 :                         s = p + len;
    4042          13 :                         f--;
    4043             :                 }
    4044             :         }
    4045             : 
    4046          33 :         if (f != 1) {
    4047          12 :                 strcpy(*buf, "");
    4048          12 :                 return MAL_SUCCEED;
    4049             :         }
    4050             : 
    4051          21 :         if (p == NULL) {
    4052          10 :                 len = strlen(s);
    4053             :         } else {
    4054          11 :                 len = (size_t) (p - s);
    4055             :         }
    4056             : 
    4057          21 :         len++;
    4058          21 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.splitpart");
    4059          21 :         strcpy_len(*buf, s, len);
    4060          21 :         return MAL_SUCCEED;
    4061             : }
    4062             : 
    4063             : static str
    4064          23 : STRsplitpart(str *res, str *haystack, str *needle, int *field)
    4065             : {
    4066          23 :         str buf = NULL, msg = MAL_SUCCEED;
    4067          23 :         const char *s = *haystack, *s2 = *needle;
    4068          23 :         int f = *field;
    4069             : 
    4070          69 :         if (strNil(s) || strNil(s2) || is_int_nil(f)) {
    4071           0 :                 *res = GDKstrdup(str_nil);
    4072             :         } else {
    4073          23 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4074             : 
    4075          23 :                 *res = NULL;
    4076          23 :                 if (!(buf = GDKmalloc(buflen)))
    4077           4 :                         throw(MAL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4078          23 :                 if ((msg = str_splitpart(&buf, &buflen, s, s2, f)) != MAL_SUCCEED) {
    4079           4 :                         GDKfree(buf);
    4080           4 :                         return msg;
    4081             :                 }
    4082          19 :                 *res = GDKstrdup(buf);
    4083             :         }
    4084             : 
    4085          19 :         GDKfree(buf);
    4086          19 :         if (!*res)
    4087           0 :                 msg = createException(MAL, "str.splitpart",
    4088             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4089             :         return msg;
    4090             : }
    4091             : 
    4092             : /* returns number of bytes to remove from left to strip the codepoints in rm */
    4093             : static size_t
    4094         331 : lstrip(const char *s, size_t len, const int *rm, size_t nrm)
    4095             : {
    4096         331 :         int c;
    4097         331 :         size_t i, n, skip = 0;
    4098             : 
    4099         473 :         while (len > 0) {
    4100         458 :                 UTF8_NEXTCHAR(c, n, s);
    4101         458 :                 assert(n > 0 && n <= len);
    4102        6938 :                 for (i = 0; i < nrm; i++) {
    4103        6622 :                         if (rm[i] == c) {
    4104         142 :                                 s += n;
    4105         142 :                                 skip += n;
    4106         142 :                                 len -= n;
    4107         142 :                                 break;
    4108             :                         }
    4109             :                 }
    4110         458 :                 if (i == nrm)
    4111             :                         break;
    4112             :         }
    4113         331 :         return skip;
    4114             : }
    4115             : 
    4116             : /* returns the resulting length of s after stripping codepoints in rm
    4117             :  * from the right */
    4118             : static size_t
    4119         407 : rstrip(const char *s, size_t len, const int *rm, size_t nrm)
    4120             : {
    4121         407 :         int c;
    4122         407 :         size_t i, n;
    4123             : 
    4124         576 :         while (len > 0) {
    4125         565 :                 UTF8_LASTCHAR(c, n, s, len);
    4126         565 :                 assert(n > 0 && n <= len);
    4127        8999 :                 for (i = 0; i < nrm; i++) {
    4128        8603 :                         if (rm[i] == c) {
    4129         169 :                                 len -= n;
    4130         169 :                                 break;
    4131             :                         }
    4132             :                 }
    4133         565 :                 if (i == nrm)
    4134             :                         break;
    4135             :         }
    4136         407 :         return len;
    4137             : }
    4138             : 
    4139             : const int whitespace[] = {
    4140             :         ' ',                                            /* space */
    4141             :         '\t',                                           /* tab (character tabulation) */
    4142             :         '\n',                                           /* line feed */
    4143             :         '\r',                                           /* carriage return */
    4144             :         '\f',                                           /* form feed */
    4145             :         '\v',                                           /* vertical tab (line tabulation) */
    4146             : /* below the code points that have the Unicode Zs (space separator) property */
    4147             :         0x00A0,                                         /* no-break space */
    4148             :         0x1680,                                         /* ogham space mark */
    4149             :         0x2000,                                         /* en quad */
    4150             :         0x2001,                                         /* em quad */
    4151             :         0x2002,                                         /* en space */
    4152             :         0x2003,                                         /* em space */
    4153             :         0x2004,                                         /* three-per-em space */
    4154             :         0x2005,                                         /* four-per-em space */
    4155             :         0x2006,                                         /* six-per-em space */
    4156             :         0x2007,                                         /* figure space */
    4157             :         0x2008,                                         /* punctuation space */
    4158             :         0x2009,                                         /* thin space */
    4159             :         0x200A,                                         /* hair space */
    4160             :         0x202F,                                         /* narrow no-break space */
    4161             :         0x205F,                                         /* medium mathematical space */
    4162             :         0x3000,                                         /* ideographic space */
    4163             : };
    4164             : 
    4165             : #define NSPACES         (sizeof(whitespace) / sizeof(whitespace[0]))
    4166             : 
    4167             : str
    4168         279 : str_strip(str *buf, size_t *buflen, const char *s)
    4169             : {
    4170         279 :         size_t len = strlen(s);
    4171         279 :         size_t n = lstrip(s, len, whitespace, NSPACES);
    4172         279 :         s += n;
    4173         279 :         len -= n;
    4174         279 :         n = rstrip(s, len, whitespace, NSPACES);
    4175             : 
    4176         279 :         n++;
    4177         279 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip");
    4178         279 :         strcpy_len(*buf, s, n);
    4179         279 :         return MAL_SUCCEED;
    4180             : }
    4181             : 
    4182             : /* remove all whitespace from either side of arg1 */
    4183             : static str
    4184           8 : STRStrip(str *res, const str *arg1)
    4185             : {
    4186           8 :         str buf = NULL, msg = MAL_SUCCEED;
    4187           8 :         const char *s = *arg1;
    4188             : 
    4189           8 :         if (strNil(s)) {
    4190           0 :                 *res = GDKstrdup(str_nil);
    4191             :         } else {
    4192           8 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4193             : 
    4194           8 :                 *res = NULL;
    4195           8 :                 if (!(buf = GDKmalloc(buflen)))
    4196           0 :                         throw(MAL, "str.strip", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4197           8 :                 if ((msg = str_strip(&buf, &buflen, s)) != MAL_SUCCEED) {
    4198           0 :                         GDKfree(buf);
    4199           0 :                         return msg;
    4200             :                 }
    4201           8 :                 *res = GDKstrdup(buf);
    4202             :         }
    4203             : 
    4204           8 :         GDKfree(buf);
    4205           8 :         if (!*res)
    4206           0 :                 msg = createException(MAL, "str.strip",
    4207             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4208             :         return msg;
    4209             : }
    4210             : 
    4211             : str
    4212          18 : str_ltrim(str *buf, size_t *buflen, const char *s)
    4213             : {
    4214          18 :         size_t len = strlen(s);
    4215          18 :         size_t n = lstrip(s, len, whitespace, NSPACES);
    4216          18 :         size_t nallocate = len - n + 1;
    4217             : 
    4218          18 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim");
    4219          18 :         strcpy_len(*buf, s + n, nallocate);
    4220          18 :         return MAL_SUCCEED;
    4221             : }
    4222             : 
    4223             : /* remove all whitespace from the start (left) of arg1 */
    4224             : static str
    4225          10 : STRLtrim(str *res, const str *arg1)
    4226             : {
    4227          10 :         str buf = NULL, msg = MAL_SUCCEED;
    4228          10 :         const char *s = *arg1;
    4229             : 
    4230          10 :         if (strNil(s)) {
    4231           0 :                 *res = GDKstrdup(str_nil);
    4232             :         } else {
    4233          10 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4234             : 
    4235          10 :                 *res = NULL;
    4236          10 :                 if (!(buf = GDKmalloc(buflen)))
    4237           0 :                         throw(MAL, "str.ltrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4238          10 :                 if ((msg = str_ltrim(&buf, &buflen, s)) != MAL_SUCCEED) {
    4239           0 :                         GDKfree(buf);
    4240           0 :                         return msg;
    4241             :                 }
    4242          10 :                 *res = GDKstrdup(buf);
    4243             :         }
    4244             : 
    4245          10 :         GDKfree(buf);
    4246          10 :         if (!*res)
    4247           0 :                 msg = createException(MAL, "str.ltrim",
    4248             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4249             :         return msg;
    4250             : }
    4251             : 
    4252             : str
    4253          96 : str_rtrim(str *buf, size_t *buflen, const char *s)
    4254             : {
    4255          96 :         size_t len = strlen(s);
    4256          96 :         size_t n = rstrip(s, len, whitespace, NSPACES);
    4257             : 
    4258          96 :         n++;
    4259          96 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim");
    4260          96 :         strcpy_len(*buf, s, n);
    4261          96 :         return MAL_SUCCEED;
    4262             : }
    4263             : 
    4264             : /* remove all whitespace from the end (right) of arg1 */
    4265             : static str
    4266           6 : STRRtrim(str *res, const str *arg1)
    4267             : {
    4268           6 :         str buf = NULL, msg = MAL_SUCCEED;
    4269           6 :         const char *s = *arg1;
    4270             : 
    4271           6 :         if (strNil(s)) {
    4272           0 :                 *res = GDKstrdup(str_nil);
    4273             :         } else {
    4274           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4275             : 
    4276           6 :                 *res = NULL;
    4277           6 :                 if (!(buf = GDKmalloc(buflen)))
    4278           0 :                         throw(MAL, "str.rtrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4279           6 :                 if ((msg = str_rtrim(&buf, &buflen, s)) != MAL_SUCCEED) {
    4280           0 :                         GDKfree(buf);
    4281           0 :                         return msg;
    4282             :                 }
    4283           6 :                 *res = GDKstrdup(buf);
    4284             :         }
    4285             : 
    4286           6 :         GDKfree(buf);
    4287           6 :         if (!*res)
    4288           0 :                 msg = createException(MAL, "str.rtrim",
    4289             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4290             :         return msg;
    4291             : }
    4292             : 
    4293             : /* return a list of codepoints in s */
    4294             : static str
    4295          45 : trimchars(str *buf, size_t *buflen, size_t *n, const char *s, size_t len_s,
    4296             :                   const char *malfunc)
    4297             : {
    4298          45 :         size_t len = 0, nlen = len_s * sizeof(int);
    4299          45 :         int c, *cbuf;
    4300             : 
    4301          45 :         assert(s);
    4302          45 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
    4303          45 :         cbuf = *(int **) buf;
    4304             : 
    4305         221 :         while (*s) {
    4306         176 :                 UTF8_GETCHAR(c, s);
    4307         176 :                 assert(!is_int_nil(c));
    4308         176 :                 cbuf[len++] = c;
    4309             :         }
    4310          45 :         *n = len;
    4311          45 :         return MAL_SUCCEED;
    4312           0 :   illegal:
    4313           0 :         throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
    4314             : }
    4315             : 
    4316             : str
    4317          22 : str_strip2(str *buf, size_t *buflen, const char *s, const char *s2)
    4318             : {
    4319          22 :         str msg = MAL_SUCCEED;
    4320          22 :         size_t len, n, n2, n3;
    4321             : 
    4322          22 :         if ((n2 = strlen(s2)) == 0) {
    4323           1 :                 len = strlen(s) + 1;
    4324           1 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.strip2");
    4325           1 :                 strcpy(*buf, s);
    4326           1 :                 return MAL_SUCCEED;
    4327             :         } else {
    4328          21 :                 if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) != MAL_SUCCEED)
    4329             :                         return msg;
    4330          21 :                 len = strlen(s);
    4331          21 :                 n = lstrip(s, len, *(int **) buf, n3);
    4332          21 :                 s += n;
    4333          21 :                 len -= n;
    4334          21 :                 n = rstrip(s, len, *(int **) buf, n3);
    4335             : 
    4336          21 :                 n++;
    4337          21 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
    4338          21 :                 strcpy_len(*buf, s, n);
    4339          21 :                 return MAL_SUCCEED;
    4340             :         }
    4341             : }
    4342             : 
    4343             : /* remove the longest string containing only characters from arg2 from
    4344             :  * either side of arg1 */
    4345             : static str
    4346          19 : STRStrip2(str *res, const str *arg1, const str *arg2)
    4347             : {
    4348          19 :         str buf = NULL, msg = MAL_SUCCEED;
    4349          19 :         const char *s = *arg1, *s2 = *arg2;
    4350             : 
    4351          36 :         if (strNil(s) || strNil(s2)) {
    4352           3 :                 *res = GDKstrdup(str_nil);
    4353             :         } else {
    4354          16 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
    4355             : 
    4356          16 :                 *res = NULL;
    4357          16 :                 if (!(buf = GDKmalloc(buflen)))
    4358           0 :                         throw(MAL, "str.strip2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4359          16 :                 if ((msg = str_strip2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
    4360           0 :                         GDKfree(buf);
    4361           0 :                         return msg;
    4362             :                 }
    4363          16 :                 *res = GDKstrdup(buf);
    4364             :         }
    4365             : 
    4366          19 :         GDKfree(buf);
    4367          19 :         if (!*res)
    4368           0 :                 msg = createException(MAL, "str.strip2",
    4369             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4370             :         return msg;
    4371             : }
    4372             : 
    4373             : str
    4374          14 : str_ltrim2(str *buf, size_t *buflen, const char *s, const char *s2)
    4375             : {
    4376          14 :         str msg = MAL_SUCCEED;
    4377          14 :         size_t len, n, n2, n3, nallocate;
    4378             : 
    4379          14 :         if ((n2 = strlen(s2)) == 0) {
    4380           1 :                 len = strlen(s) + 1;
    4381           1 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.ltrim2");
    4382           1 :                 strcpy(*buf, s);
    4383           1 :                 return MAL_SUCCEED;
    4384             :         } else {
    4385          13 :                 if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
    4386             :                         return msg;
    4387          13 :                 len = strlen(s);
    4388          13 :                 n = lstrip(s, len, *(int **) buf, n3);
    4389          13 :                 nallocate = len - n + 1;
    4390             : 
    4391          13 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
    4392          13 :                 strcpy_len(*buf, s + n, nallocate);
    4393          13 :                 return MAL_SUCCEED;
    4394             :         }
    4395             : }
    4396             : 
    4397             : /* remove the longest string containing only characters from arg2 from
    4398             :  * the start (left) of arg1 */
    4399             : static str
    4400           8 : STRLtrim2(str *res, const str *arg1, const str *arg2)
    4401             : {
    4402           8 :         str buf = NULL, msg = MAL_SUCCEED;
    4403           8 :         const char *s = *arg1, *s2 = *arg2;
    4404             : 
    4405          16 :         if (strNil(s) || strNil(s2)) {
    4406           0 :                 *res = GDKstrdup(str_nil);
    4407             :         } else {
    4408           8 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
    4409             : 
    4410           8 :                 *res = NULL;
    4411           8 :                 if (!(buf = GDKmalloc(buflen)))
    4412           0 :                         throw(MAL, "str.ltrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4413           8 :                 if ((msg = str_ltrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
    4414           0 :                         GDKfree(buf);
    4415           0 :                         return msg;
    4416             :                 }
    4417           8 :                 *res = GDKstrdup(buf);
    4418             :         }
    4419             : 
    4420           8 :         GDKfree(buf);
    4421           8 :         if (!*res)
    4422           0 :                 msg = createException(MAL, "str.ltrim2",
    4423             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4424             :         return msg;
    4425             : }
    4426             : 
    4427             : str
    4428          13 : str_rtrim2(str *buf, size_t *buflen, const char *s, const char *s2)
    4429             : {
    4430          13 :         str msg = MAL_SUCCEED;
    4431          13 :         size_t len, n, n2, n3;
    4432             : 
    4433          13 :         if ((n2 = strlen(s2)) == 0) {
    4434           2 :                 len = strlen(s) + 1;
    4435           2 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.rtrim2");
    4436           2 :                 strcpy(*buf, s);
    4437           2 :                 return MAL_SUCCEED;
    4438             :         } else {
    4439          11 :                 if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
    4440             :                         return msg;
    4441          11 :                 len = strlen(s);
    4442          11 :                 n = rstrip(s, len, *(int **) buf, n3);
    4443          11 :                 n++;
    4444             : 
    4445          11 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
    4446          11 :                 strcpy_len(*buf, s, n);
    4447          11 :                 return MAL_SUCCEED;
    4448             :         }
    4449             : }
    4450             : 
    4451             : /* remove the longest string containing only characters from arg2 from
    4452             :  * the end (right) of arg1 */
    4453             : static str
    4454           7 : STRRtrim2(str *res, const str *arg1, const str *arg2)
    4455             : {
    4456           7 :         str buf = NULL, msg = MAL_SUCCEED;
    4457           7 :         const char *s = *arg1, *s2 = *arg2;
    4458             : 
    4459          14 :         if (strNil(s) || strNil(s2)) {
    4460           0 :                 *res = GDKstrdup(str_nil);
    4461             :         } else {
    4462           7 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
    4463             : 
    4464           7 :                 *res = NULL;
    4465           7 :                 if (!(buf = GDKmalloc(buflen)))
    4466           0 :                         throw(MAL, "str.rtrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4467           7 :                 if ((msg = str_rtrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
    4468           0 :                         GDKfree(buf);
    4469           0 :                         return msg;
    4470             :                 }
    4471           7 :                 *res = GDKstrdup(buf);
    4472             :         }
    4473             : 
    4474           7 :         GDKfree(buf);
    4475           7 :         if (!*res)
    4476           0 :                 msg = createException(MAL, "str.rtrim2",
    4477             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4478             :         return msg;
    4479             : }
    4480             : 
    4481             : static str
    4482          60 : pad(str *buf, size_t *buflen, const char *s, const char *pad, int len, int left,
    4483             :         const char *malfunc)
    4484             : {
    4485          60 :         size_t slen, padlen, repeats, residual, i, nlen;
    4486          60 :         char *res;
    4487             : 
    4488          60 :         if (len < 0)
    4489             :                 len = 0;
    4490             : 
    4491          60 :         slen = (size_t) UTF8_strlen(s);
    4492          60 :         if (slen > (size_t) len) {
    4493             :                 /* truncate */
    4494          20 :                 pad = UTF8_strtail(s, len);
    4495          20 :                 slen = pad - s + 1;
    4496             : 
    4497          20 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
    4498          20 :                 strcpy_len(*buf, s, slen);
    4499          20 :                 return MAL_SUCCEED;
    4500             :         }
    4501             : 
    4502          40 :         padlen = (size_t) UTF8_strlen(pad);
    4503          40 :         if (slen == (size_t) len || padlen == 0) {
    4504             :                 /* nothing to do (no padding if there is no pad string) */
    4505           0 :                 slen = strlen(s) + 1;
    4506           0 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
    4507           0 :                 strcpy(*buf, s);
    4508           0 :                 return MAL_SUCCEED;
    4509             :         }
    4510             : 
    4511          40 :         repeats = ((size_t) len - slen) / padlen;
    4512          40 :         residual = ((size_t) len - slen) % padlen;
    4513          40 :         if (residual > 0)
    4514          20 :                 residual = (size_t) (UTF8_strtail(pad, (int) residual) - pad);
    4515          40 :         padlen = strlen(pad);
    4516          40 :         slen = strlen(s);
    4517             : 
    4518          40 :         nlen = slen + repeats * padlen + residual + 1;
    4519          40 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
    4520          40 :         res = *buf;
    4521          40 :         if (left) {
    4522          87 :                 for (i = 0; i < repeats; i++)
    4523          67 :                         memcpy(res + i * padlen, pad, padlen);
    4524          20 :                 if (residual > 0)
    4525          10 :                         memcpy(res + repeats * padlen, pad, residual);
    4526          20 :                 if (slen > 0)
    4527          20 :                         memcpy(res + repeats * padlen + residual, s, slen);
    4528             :         } else {
    4529          20 :                 if (slen > 0)
    4530          20 :                         memcpy(res, s, slen);
    4531          87 :                 for (i = 0; i < repeats; i++)
    4532          67 :                         memcpy(res + slen + i * padlen, pad, padlen);
    4533          20 :                 if (residual > 0)
    4534          10 :                         memcpy(res + slen + repeats * padlen, pad, residual);
    4535             :         }
    4536          40 :         res[repeats * padlen + residual + slen] = 0;
    4537          40 :         return MAL_SUCCEED;
    4538             : }
    4539             : 
    4540             : str
    4541           8 : str_lpad(str *buf, size_t *buflen, const char *s, int len)
    4542             : {
    4543           4 :         return pad(buf, buflen, s, " ", len, 1, "str.lpad");
    4544             : }
    4545             : 
    4546             : /* Fill up 'arg1' to length 'len' by prepending whitespaces.
    4547             :  * If 'arg1' is already longer than 'len', then it's truncated on the right
    4548             :  * (NB: this is the PostgreSQL definition).
    4549             :  *
    4550             :  * Example: lpad('hi', 5)
    4551             :  * Result: '   hi'
    4552             :  */
    4553             : static str
    4554           4 : STRLpad(str *res, const str *arg1, const int *len)
    4555             : {
    4556           4 :         str buf = NULL, msg = MAL_SUCCEED;
    4557           4 :         const char *s = *arg1;
    4558           4 :         int l = *len;
    4559             : 
    4560           8 :         if (strNil(s) || is_int_nil(l)) {
    4561           0 :                 *res = GDKstrdup(str_nil);
    4562             :         } else {
    4563           4 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4564             : 
    4565           4 :                 *res = NULL;
    4566           4 :                 if (!(buf = GDKmalloc(buflen)))
    4567           0 :                         throw(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4568           4 :                 if ((msg = str_lpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    4569           0 :                         GDKfree(buf);
    4570           0 :                         return msg;
    4571             :                 }
    4572           4 :                 *res = GDKstrdup(buf);
    4573             :         }
    4574             : 
    4575           4 :         GDKfree(buf);
    4576           4 :         if (!*res)
    4577           0 :                 msg = createException(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4578             :         return msg;
    4579             : }
    4580             : 
    4581             : str
    4582           8 : str_rpad(str *buf, size_t *buflen, const char *s, int len)
    4583             : {
    4584           4 :         return pad(buf, buflen, s, " ", len, 0, "str.lpad");
    4585             : }
    4586             : 
    4587             : /* Fill up 'arg1' to length 'len' by appending whitespaces.
    4588             :  * If 'arg1' is already longer than 'len', then it's truncated (on the right)
    4589             :  * (NB: this is the PostgreSQL definition).
    4590             :  *
    4591             :  * Example: rpad('hi', 5)
    4592             :  * Result: 'hi   '
    4593             :  */
    4594             : static str
    4595           4 : STRRpad(str *res, const str *arg1, const int *len)
    4596             : {
    4597           4 :         str buf = NULL, msg = MAL_SUCCEED;
    4598           4 :         const char *s = *arg1;
    4599           4 :         int l = *len;
    4600             : 
    4601           8 :         if (strNil(s) || is_int_nil(l)) {
    4602           0 :                 *res = GDKstrdup(str_nil);
    4603             :         } else {
    4604           4 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4605             : 
    4606           4 :                 *res = NULL;
    4607           4 :                 if (!(buf = GDKmalloc(buflen)))
    4608           0 :                         throw(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4609           4 :                 if ((msg = str_rpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    4610           0 :                         GDKfree(buf);
    4611           0 :                         return msg;
    4612             :                 }
    4613           4 :                 *res = GDKstrdup(buf);
    4614             :         }
    4615             : 
    4616           4 :         GDKfree(buf);
    4617           4 :         if (!*res)
    4618           0 :                 msg = createException(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4619             :         return msg;
    4620             : }
    4621             : 
    4622             : str
    4623          22 : str_lpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
    4624             : {
    4625          16 :         return pad(buf, buflen, s, s2, len, 1, "str.lpad2");
    4626             : }
    4627             : 
    4628             : /* Fill up 'arg1' to length 'len' by prepending characters from 'arg2'
    4629             :  * If 'arg1' is already longer than 'len', then it's truncated on the right
    4630             :  * (NB: this is the PostgreSQL definition).
    4631             :  *
    4632             :  * Example: lpad('hi', 5, 'xy')
    4633             :  * Result: xyxhi
    4634             :  */
    4635             : static str
    4636           6 : STRLpad3(str *res, const str *arg1, const int *len, const str *arg2)
    4637             : {
    4638           6 :         str buf = NULL, msg = MAL_SUCCEED;
    4639           6 :         const char *s = *arg1, *s2 = *arg2;
    4640           6 :         int l = *len;
    4641             : 
    4642          18 :         if (strNil(s) || strNil(s2) || is_int_nil(l)) {
    4643           0 :                 *res = GDKstrdup(str_nil);
    4644             :         } else {
    4645           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4646             : 
    4647           6 :                 *res = NULL;
    4648           6 :                 if (!(buf = GDKmalloc(buflen)))
    4649           0 :                         throw(MAL, "str.lpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4650           6 :                 if ((msg = str_lpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
    4651           0 :                         GDKfree(buf);
    4652           0 :                         return msg;
    4653             :                 }
    4654           6 :                 *res = GDKstrdup(buf);
    4655             :         }
    4656             : 
    4657           6 :         GDKfree(buf);
    4658           6 :         if (!*res)
    4659           0 :                 msg = createException(MAL, "str.lpad2",
    4660             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4661             :         return msg;
    4662             : }
    4663             : 
    4664             : str
    4665          22 : str_rpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
    4666             : {
    4667          16 :         return pad(buf, buflen, s, s2, len, 0, "str.rpad2");
    4668             : }
    4669             : 
    4670             : /* Fill up 'arg1' to length 'len' by appending characters from 'arg2'
    4671             :  * If 'arg1' is already longer than 'len', then it's truncated (on the right)
    4672             :  * (NB: this is the PostgreSQL definition).
    4673             :  *
    4674             :  * Example: rpad('hi', 5, 'xy')
    4675             :  * Result: hixyx
    4676             :  */
    4677             : static str
    4678           6 : STRRpad3(str *res, const str *arg1, const int *len, const str *arg2)
    4679             : {
    4680           6 :         str buf = NULL, msg = MAL_SUCCEED;
    4681           6 :         const char *s = *arg1, *s2 = *arg2;
    4682           6 :         int l = *len;
    4683             : 
    4684          18 :         if (strNil(s) || strNil(s2) || is_int_nil(l)) {
    4685           0 :                 *res = GDKstrdup(str_nil);
    4686             :         } else {
    4687           6 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4688             : 
    4689           6 :                 *res = NULL;
    4690           6 :                 if (!(buf = GDKmalloc(buflen)))
    4691           0 :                         throw(MAL, "str.rpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4692           6 :                 if ((msg = str_rpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
    4693           0 :                         GDKfree(buf);
    4694           0 :                         return msg;
    4695             :                 }
    4696           6 :                 *res = GDKstrdup(buf);
    4697             :         }
    4698             : 
    4699           6 :         GDKfree(buf);
    4700           6 :         if (!*res)
    4701           0 :                 msg = createException(MAL, "str.rpad2",
    4702             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4703             :         return msg;
    4704             : }
    4705             : 
    4706             : str
    4707      100535 : str_substitute(str *buf, size_t *buflen, const char *s, const char *src,
    4708             :                            const char *dst, bit repeat)
    4709             : {
    4710      100535 :         size_t lsrc = strlen(src), ldst = strlen(dst), n, l = strlen(s);
    4711      100535 :         char *b, *fnd;
    4712      100535 :         const char *pfnd;
    4713             : 
    4714      100535 :         if (!lsrc || !l) {                      /* s/src is an empty string, there's nothing to substitute */
    4715           7 :                 l++;
    4716           7 :                 CHECK_STR_BUFFER_LENGTH(buf, buflen, l, "str.substitute");
    4717           7 :                 strcpy(*buf, s);
    4718           7 :                 return MAL_SUCCEED;
    4719             :         }
    4720             : 
    4721      100528 :         n = l + ldst;
    4722      100528 :         if (repeat && ldst > lsrc)
    4723       77340 :                 n = (ldst * l) / lsrc;  /* max length */
    4724             : 
    4725      100528 :         n++;
    4726      100528 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.substitute");
    4727      100528 :         b = *buf;
    4728      100528 :         pfnd = s;
    4729      105695 :         do {
    4730      105695 :                 fnd = strstr(pfnd, src);
    4731      105695 :                 if (fnd == NULL)
    4732             :                         break;
    4733        5168 :                 n = fnd - pfnd;
    4734        5168 :                 if (n > 0) {
    4735        4401 :                         strcpy_len(b, pfnd, n + 1);
    4736        4401 :                         b += n;
    4737             :                 }
    4738        5168 :                 if (ldst > 0) {
    4739         406 :                         strcpy_len(b, dst, ldst + 1);
    4740         405 :                         b += ldst;
    4741             :                 }
    4742        5167 :                 if (*fnd == 0)
    4743             :                         break;
    4744        5167 :                 pfnd = fnd + lsrc;
    4745        5167 :         } while (repeat);
    4746      100527 :         strcpy(b, pfnd);
    4747      100527 :         return MAL_SUCCEED;
    4748             : }
    4749             : 
    4750             : static str
    4751         197 : STRSubstitute(str *res, const str *arg1, const str *arg2, const str *arg3,
    4752             :                           const bit *g)
    4753             : {
    4754         197 :         str buf = NULL, msg = MAL_SUCCEED;
    4755         197 :         const char *s = *arg1, *s2 = *arg2, *s3 = *arg3;
    4756             : 
    4757         590 :         if (strNil(s) || strNil(s2) || strNil(s3)) {
    4758           2 :                 *res = GDKstrdup(str_nil);
    4759             :         } else {
    4760         195 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4761             : 
    4762         195 :                 *res = NULL;
    4763         195 :                 if (!(buf = GDKmalloc(buflen)))
    4764           0 :                         throw(MAL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4765         195 :                 if ((msg = str_substitute(&buf, &buflen, s, s2, s3, *g)) != MAL_SUCCEED) {
    4766           0 :                         GDKfree(buf);
    4767           0 :                         return msg;
    4768             :                 }
    4769         195 :                 *res = GDKstrdup(buf);
    4770             :         }
    4771             : 
    4772         197 :         GDKfree(buf);
    4773         197 :         if (!*res)
    4774           0 :                 msg = createException(MAL, "str.substitute",
    4775             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4776             :         return msg;
    4777             : }
    4778             : 
    4779             : static str
    4780           9 : STRascii(int *ret, const str *s)
    4781             : {
    4782           9 :         return str_wchr_at(ret, *s, 0);
    4783             : }
    4784             : 
    4785             : str
    4786        4256 : str_substring_tail(str *buf, size_t *buflen, const char *s, int start)
    4787             : {
    4788        4256 :         if (start < 1)
    4789             :                 start = 1;
    4790        4256 :         start--;
    4791        4248 :         return str_tail(buf, buflen, s, start);
    4792             : }
    4793             : 
    4794             : static str
    4795           8 : STRsubstringTail(str *res, const str *arg1, const int *start)
    4796             : {
    4797           8 :         str buf = NULL, msg = MAL_SUCCEED;
    4798           8 :         const char *s = *arg1;
    4799           8 :         int st = *start;
    4800             : 
    4801          16 :         if (strNil(s) || is_int_nil(st)) {
    4802           0 :                 *res = GDKstrdup(str_nil);
    4803             :         } else {
    4804           8 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4805             : 
    4806           8 :                 *res = NULL;
    4807           8 :                 if (!(buf = GDKmalloc(buflen)))
    4808           0 :                         throw(MAL, "str.substringTail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4809           8 :                 if ((msg = str_substring_tail(&buf, &buflen, s, st)) != MAL_SUCCEED) {
    4810           0 :                         GDKfree(buf);
    4811           0 :                         return msg;
    4812             :                 }
    4813           8 :                 *res = GDKstrdup(buf);
    4814             :         }
    4815             : 
    4816           8 :         GDKfree(buf);
    4817           8 :         if (!*res)
    4818           0 :                 msg = createException(MAL, "str.substringTail",
    4819             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4820             :         return msg;
    4821             : }
    4822             : 
    4823             : str
    4824     3802797 : str_sub_string(str *buf, size_t *buflen, const char *s, int start, int l)
    4825             : {
    4826     3802797 :         if (start < 1)
    4827             :                 start = 1;
    4828     3802797 :         start--;
    4829     3802777 :         return str_Sub_String(buf, buflen, s, start, l);
    4830             : }
    4831             : 
    4832             : static str
    4833          23 : STRsubstring(str *res, const str *arg1, const int *start, const int *ll)
    4834             : {
    4835          23 :         str buf = NULL, msg = MAL_SUCCEED;
    4836          23 :         const char *s = *arg1;
    4837          23 :         int st = *start, l = *ll;
    4838             : 
    4839          46 :         if (strNil(s) || is_int_nil(st) || is_int_nil(l)) {
    4840           3 :                 *res = GDKstrdup(str_nil);
    4841             :         } else {
    4842          20 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4843             : 
    4844          20 :                 *res = NULL;
    4845          20 :                 if (!(buf = GDKmalloc(buflen)))
    4846           0 :                         throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4847          20 :                 if ((msg = str_sub_string(&buf, &buflen, s, st, l)) != MAL_SUCCEED) {
    4848           0 :                         GDKfree(buf);
    4849           0 :                         return msg;
    4850             :                 }
    4851          20 :                 *res = GDKstrdup(buf);
    4852             :         }
    4853             : 
    4854          23 :         GDKfree(buf);
    4855          23 :         if (!*res)
    4856           0 :                 msg = createException(MAL, "str.substring",
    4857             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4858             :         return msg;
    4859             : }
    4860             : 
    4861             : static str
    4862          20 : STRprefix(str *res, const str *arg1, const int *ll)
    4863             : {
    4864          20 :         str buf = NULL, msg = MAL_SUCCEED;
    4865          20 :         const char *s = *arg1;
    4866          20 :         int l = *ll;
    4867             : 
    4868          40 :         if (strNil(s) || is_int_nil(l)) {
    4869           0 :                 *res = GDKstrdup(str_nil);
    4870             :         } else {
    4871          20 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4872             : 
    4873          20 :                 *res = NULL;
    4874          20 :                 if (!(buf = GDKmalloc(buflen)))
    4875           0 :                         throw(MAL, "str.prefix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4876          20 :                 if ((msg = str_Sub_String(&buf, &buflen, s, 0, l)) != MAL_SUCCEED) {
    4877           0 :                         GDKfree(buf);
    4878           0 :                         return msg;
    4879             :                 }
    4880          20 :                 *res = GDKstrdup(buf);
    4881             :         }
    4882             : 
    4883          20 :         GDKfree(buf);
    4884          20 :         if (!*res)
    4885           0 :                 msg = createException(MAL, "str.prefix",
    4886             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4887             :         return msg;
    4888             : }
    4889             : 
    4890             : str
    4891           9 : str_suffix(str *buf, size_t *buflen, const char *s, int l)
    4892             : {
    4893           9 :         int start = (int) (strlen(s) - l);
    4894           9 :         return str_Sub_String(buf, buflen, s, start, l);
    4895             : }
    4896             : 
    4897             : static str
    4898           5 : STRsuffix(str *res, const str *arg1, const int *ll)
    4899             : {
    4900           5 :         str buf = NULL, msg = MAL_SUCCEED;
    4901           5 :         const char *s = *arg1;
    4902           5 :         int l = *ll;
    4903             : 
    4904          10 :         if (strNil(s) || is_int_nil(l)) {
    4905           0 :                 *res = GDKstrdup(str_nil);
    4906             :         } else {
    4907           5 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    4908             : 
    4909           5 :                 *res = NULL;
    4910           5 :                 if (!(buf = GDKmalloc(buflen)))
    4911           0 :                         throw(MAL, "str.suffix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4912           5 :                 if ((msg = str_suffix(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    4913           0 :                         GDKfree(buf);
    4914           0 :                         return msg;
    4915             :                 }
    4916           5 :                 *res = GDKstrdup(buf);
    4917             :         }
    4918             : 
    4919           5 :         GDKfree(buf);
    4920           5 :         if (!*res)
    4921           0 :                 msg = createException(MAL, "str.suffix",
    4922             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    4923             :         return msg;
    4924             : }
    4925             : 
    4926             : int
    4927        4244 : str_locate2(const char *needle, const char *haystack, int start)
    4928             : {
    4929        4244 :         int off, res;
    4930        4244 :         char *s;
    4931             : 
    4932        4244 :         off = start <= 0 ? 1 : start;
    4933        4244 :         s = UTF8_strtail(haystack, off - 1);
    4934        4244 :         res = str_search(s, needle, str_strlen(needle));
    4935        4251 :         return res >= 0 ? res + off : 0;
    4936             : }
    4937             : 
    4938             : static str
    4939       27274 : STRlocate3(int *ret, const str *needle, const str *haystack, const int *start)
    4940             : {
    4941       27274 :         const char *s = *needle, *s2 = *haystack;
    4942       27274 :         int st = *start;
    4943             : 
    4944       54614 :         *ret = (strNil(s) || strNil(s2) || is_int_nil(st)) ?
    4945       27274 :                 int_nil :
    4946          66 :                 str_locate2(s, s2, st);
    4947       27274 :         return MAL_SUCCEED;
    4948             : }
    4949             : 
    4950             : static str
    4951          16 : STRlocate(int *ret, const str *needle, const str *haystack)
    4952             : {
    4953          16 :         const char *s = *needle, *s2 = *haystack;
    4954             : 
    4955          45 :         *ret = (strNil(s) || strNil(s2)) ? int_nil : str_locate2(s, s2, 1);
    4956          16 :         return MAL_SUCCEED;
    4957             : }
    4958             : 
    4959             : str
    4960         223 : str_insert(str *buf, size_t *buflen, const char *s, int strt, int l,
    4961             :                    const char *s2)
    4962             : {
    4963         223 :         str v;
    4964         223 :         int l1 = UTF8_strlen(s);
    4965         223 :         size_t nextlen;
    4966             : 
    4967         223 :         if (l < 0)
    4968           0 :                 throw(MAL, "str.insert",
    4969             :                           SQLSTATE(42000)
    4970             :                           "The number of characters for insert function must be non negative");
    4971         223 :         if (strt < 0) {
    4972           0 :                 if (-strt <= l1)
    4973           0 :                         strt = l1 + strt;
    4974             :                 else
    4975             :                         strt = 0;
    4976             :         }
    4977         223 :         if (strt > l1)
    4978             :                 strt = l1;
    4979             : 
    4980         223 :         nextlen = strlen(s) + strlen(s2) + 1;
    4981         223 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.insert");
    4982         223 :         v = *buf;
    4983         223 :         if (strt > 0)
    4984         216 :                 v = UTF8_strncpy(v, s, strt);
    4985         223 :         strcpy(v, s2);
    4986         223 :         if (strt + l < l1)
    4987          10 :                 strcat(v, UTF8_strtail((char *) s, strt + l));
    4988             :         return MAL_SUCCEED;
    4989             : }
    4990             : 
    4991             : static str
    4992         225 : STRinsert(str *res, const str *input, const int *start, const int *nchars,
    4993             :                   const str *input2)
    4994             : {
    4995         225 :         str buf = NULL, msg = MAL_SUCCEED;
    4996         225 :         const char *s = *input, *s2 = *input2;
    4997         225 :         int st = *start, n = *nchars;
    4998             : 
    4999         449 :         if (strNil(s) || is_int_nil(st) || is_int_nil(n) || strNil(s2)) {
    5000           2 :                 *res = GDKstrdup(str_nil);
    5001             :         } else {
    5002         223 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    5003             : 
    5004         223 :                 *res = NULL;
    5005         223 :                 if (!(buf = GDKmalloc(buflen)))
    5006           0 :                         throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5007         223 :                 if ((msg = str_insert(&buf, &buflen, s, st, n, s2)) != MAL_SUCCEED) {
    5008           0 :                         GDKfree(buf);
    5009           0 :                         return msg;
    5010             :                 }
    5011         223 :                 *res = GDKstrdup(buf);
    5012             :         }
    5013             : 
    5014         225 :         GDKfree(buf);
    5015         225 :         if (!*res)
    5016           0 :                 msg = createException(MAL, "str.insert",
    5017             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5018             :         return msg;
    5019             : }
    5020             : 
    5021             : static str
    5022         197 : STRreplace(str *ret, const str *s1, const str *s2, const str *s3)
    5023             : {
    5024         197 :         bit flag = TRUE;
    5025         197 :         return STRSubstitute(ret, s1, s2, s3, &flag);
    5026             : }
    5027             : 
    5028             : str
    5029          15 : str_repeat(str *buf, size_t *buflen, const char *s, int c)
    5030             : {
    5031          15 :         size_t l = strlen(s), nextlen;
    5032             : 
    5033          15 :         if (l >= INT_MAX)
    5034           0 :                 throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5035          15 :         nextlen = (size_t) c *l + 1;
    5036             : 
    5037          15 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.repeat");
    5038          15 :         str t = *buf;
    5039          15 :         *t = 0;
    5040      160043 :         for (int i = c; i > 0; i--, t += l)
    5041      160028 :                 strcpy(t, s);
    5042             :         return MAL_SUCCEED;
    5043             : }
    5044             : 
    5045             : static str
    5046          11 : STRrepeat(str *res, const str *arg1, const int *c)
    5047             : {
    5048          11 :         str buf = NULL, msg = MAL_SUCCEED;
    5049          11 :         const char *s = *arg1;
    5050          11 :         int cc = *c;
    5051             : 
    5052          21 :         if (strNil(s) || is_int_nil(cc) || cc < 0) {
    5053           1 :                 *res = GDKstrdup(str_nil);
    5054             :         } else {
    5055          10 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    5056             : 
    5057          10 :                 *res = NULL;
    5058          10 :                 if (!(buf = GDKmalloc(buflen)))
    5059           0 :                         throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5060          10 :                 if ((msg = str_repeat(&buf, &buflen, s, cc)) != MAL_SUCCEED) {
    5061           0 :                         GDKfree(buf);
    5062           0 :                         return msg;
    5063             :                 }
    5064          10 :                 *res = GDKstrdup(buf);
    5065             :         }
    5066             : 
    5067          11 :         GDKfree(buf);
    5068          11 :         if (!*res)
    5069           0 :                 msg = createException(MAL, "str.repeat",
    5070             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5071             :         return msg;
    5072             : }
    5073             : 
    5074             : static str
    5075           1 : STRspace(str *res, const int *ll)
    5076             : {
    5077           1 :         str buf = NULL, msg = MAL_SUCCEED;
    5078           1 :         int l = *ll;
    5079             : 
    5080           1 :         if (is_int_nil(l) || l < 0) {
    5081           0 :                 *res = GDKstrdup(str_nil);
    5082             :         } else {
    5083           1 :                 const char space[] = " ", *s = space;
    5084           1 :                 size_t buflen = INITIAL_STR_BUFFER_LENGTH;
    5085             : 
    5086           1 :                 *res = NULL;
    5087           1 :                 if (!(buf = GDKmalloc(buflen)))
    5088           0 :                         throw(MAL, "str.space", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5089           1 :                 if ((msg = str_repeat(&buf, &buflen, s, l)) != MAL_SUCCEED) {
    5090           0 :                         GDKfree(buf);
    5091           0 :                         return msg;
    5092             :                 }
    5093           1 :                 *res = GDKstrdup(buf);
    5094             :         }
    5095             : 
    5096           1 :         GDKfree(buf);
    5097           1 :         if (!*res)
    5098           0 :                 msg = createException(MAL, "str.space",
    5099             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5100             :         return msg;
    5101             : }
    5102             : 
    5103             : static str
    5104           4 : STRasciify(str *r, const str *s)
    5105             : {
    5106             : #ifdef HAVE_ICONV
    5107             : 
    5108           4 :         if (strNil(*s)) {
    5109           0 :                 if ((*r = GDKstrdup(str_nil)) == NULL)
    5110           0 :                         throw(MAL, "str.asciify", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5111             :                 else
    5112             :                         return MAL_SUCCEED;
    5113             :         }
    5114             : 
    5115           4 :         iconv_t cd;
    5116           4 :         const str f = "UTF-8", t = "ASCII//TRANSLIT";
    5117           4 :         str in = *s, out;
    5118           4 :         size_t in_len = strlen(in), out_len = in_len * 4; /* oversized as a single utf8 char could change into multiple ascii char */
    5119             : 
    5120           4 :         if ((cd = iconv_open(t, f)) == (iconv_t) (-1))
    5121           0 :                 throw(MAL, "str.asciify", "ICONV: cannot convert from (%s) to (%s).", f, t);
    5122             : 
    5123           4 :         if ((*r = out = GDKmalloc(out_len)) == NULL) {
    5124           0 :                 iconv_close(cd);
    5125           0 :                 throw(MAL, "str.asciify", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5126             :         }
    5127             : 
    5128           4 :         str o = out;
    5129             : 
    5130           4 :         if (iconv(cd, &in, &in_len, &o, &out_len) == (size_t) -1) {
    5131           0 :                 GDKfree(out);
    5132           0 :                 *r = NULL;
    5133           0 :                 iconv_close(cd);
    5134           0 :                 throw(MAL, "str.asciify", "Conversion failed, possibly due to system locale %s.", setlocale(0, NULL));
    5135             :         }
    5136             : 
    5137           4 :         *o = '\0';
    5138           4 :         iconv_close(cd);
    5139           4 :         return MAL_SUCCEED;
    5140             : 
    5141             : #else
    5142             :         throw(MAL, "str.asciify", "ICONV library not available.");
    5143             : #endif
    5144             : }
    5145             : 
    5146             : static inline void
    5147         154 : BBPnreclaim(int nargs, ...)
    5148             : {
    5149         154 :         va_list valist;
    5150         154 :         va_start(valist, nargs);
    5151         669 :         for (int i = 0; i < nargs; i++) {
    5152         514 :                 BAT *b = va_arg(valist, BAT *);
    5153         824 :                 BBPreclaim(b);
    5154             :         }
    5155         155 :         va_end(valist);
    5156         155 : }
    5157             : 
    5158             : /* scan select loop with or without candidates */
    5159             : #define scanloop(TEST, KEEP_NULLS)                                                                          \
    5160             :         do {                                                                                                                            \
    5161             :                 TRC_DEBUG(ALGO,                                                                                                 \
    5162             :                                   "scanselect(b=%s#"BUNFMT",anti=%d): "                                     \
    5163             :                                   "scanselect %s\n", BATgetId(b), BATcount(b),                        \
    5164             :                                   anti, #TEST);                                                                                 \
    5165             :                 if (!s || BATtdense(s)) {                                                                               \
    5166             :                         for (; p < q; p++) {                                                                         \
    5167             :                                 GDK_CHECK_TIMEOUT(timeoffset, counter,                                  \
    5168             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
    5169             :                                 const char *restrict v = BUNtvar(bi, p - off);                  \
    5170             :                                 if ((TEST) || ((KEEP_NULLS) && *v == '\200'))                   \
    5171             :                                         vals[cnt++] = p;                                                                        \
    5172             :                         }                                                                                                                       \
    5173             :                 } else {                                                                                                                \
    5174             :                         for (; p < ncands; p++) {                                                                    \
    5175             :                                 GDK_CHECK_TIMEOUT(timeoffset, counter,                                  \
    5176             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
    5177             :                                 oid o = canditer_next(ci);                                                              \
    5178             :                                 const char *restrict v = BUNtvar(bi, o - off);                  \
    5179             :                                 if ((TEST) || ((KEEP_NULLS) && *v == '\200'))                   \
    5180             :                                         vals[cnt++] = o;                                                                        \
    5181             :                         }                                                                                                                       \
    5182             :                 }                                                                                                                               \
    5183             :         } while (0)
    5184             : 
    5185             : /* scan select loop with or without candidates */
    5186             : #define scanloop_anti(TEST, KEEP_NULLS)                                                                 \
    5187             :         do {                                                                                                                            \
    5188             :                 TRC_DEBUG(ALGO,                                                                                                 \
    5189             :                                   "scanselect(b=%s#"BUNFMT",anti=%d): "                                     \
    5190             :                                   "scanselect %s\n", BATgetId(b), BATcount(b),                        \
    5191             :                                   anti, #TEST);                                                                                 \
    5192             :                 if (!s || BATtdense(s)) {                                                                               \
    5193             :                         for (; p < q; p++) {                                                                         \
    5194             :                                 GDK_CHECK_TIMEOUT(timeoffset, counter,                                  \
    5195             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
    5196             :                                 const char *restrict v = BUNtvar(bi, p - off);                  \
    5197             :                                 if ((TEST) || ((KEEP_NULLS) && *v == '\200'))                   \
    5198             :                                         vals[cnt++] = p;                                                                        \
    5199             :                         }                                                                                                                       \
    5200             :                 } else {                                                                                                                \
    5201             :                         for (; p < ncands; p++) {                                                                    \
    5202             :                                 GDK_CHECK_TIMEOUT(timeoffset, counter,                                  \
    5203             :                                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
    5204             :                                 oid o = canditer_next(ci);                                                              \
    5205             :                                 const char *restrict v = BUNtvar(bi, o - off);                  \
    5206             :                                 if ((TEST) || ((KEEP_NULLS) && *v == '\200'))                   \
    5207             :                                         vals[cnt++] = o;                                                                        \
    5208             :                         }                                                                                                                       \
    5209             :                 }                                                                                                                               \
    5210             :         } while (0)
    5211             : 
    5212             : static str
    5213          18 : str_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
    5214             :                                  BUN *rcnt, const char *key, bool anti,
    5215             :                                  int (*str_cmp)(const char *, const char *, int),
    5216             :                                  bool keep_nulls)
    5217             : {
    5218          18 :         if (strNil(key))
    5219             :                 return MAL_SUCCEED;
    5220             : 
    5221          18 :         BATiter bi = bat_iterator(b);
    5222          18 :         BUN cnt = 0, ncands = ci->ncand;
    5223          18 :         oid off = b->hseqbase, *restrict vals = Tloc(bn, 0);
    5224          18 :         str msg = MAL_SUCCEED;
    5225          18 :         int klen = str_strlen(key);
    5226             : 
    5227          18 :         size_t counter = 0;
    5228          18 :         lng timeoffset = 0;
    5229          18 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    5230          18 :         if (qry_ctx != NULL)
    5231          18 :                 timeoffset = (qry_ctx->starttime
    5232          18 :                                           && qry_ctx->querytimeout) ? (qry_ctx->starttime +
    5233          18 :                                                                                                    qry_ctx->querytimeout) : 0;
    5234             : 
    5235          18 :         if (anti)                                       /* keep nulls ? (use false for now) */
    5236           0 :                 scanloop_anti(!strNil(v) && str_cmp(v, key, klen) != 0, keep_nulls);
    5237             :         else
    5238          54 :                 scanloop(!strNil(v) && str_cmp(v, key, klen) == 0, keep_nulls);
    5239             : 
    5240           0 :   bailout:
    5241          18 :         bat_iterator_end(&bi);
    5242          18 :         *rcnt = cnt;
    5243          18 :         return msg;
    5244             : }
    5245             : 
    5246             : static str
    5247          18 : STRselect(bat *r_id, const bat *b_id, const bat *cb_id, const char *key,
    5248             :                           const bit anti, int (*str_cmp)(const char *, const char *, int),
    5249             :                           const str fname)
    5250             : {
    5251          18 :         str msg = MAL_SUCCEED;
    5252             : 
    5253          18 :         BAT *b, *cb = NULL, *r = NULL, *old_s = NULL;;
    5254          18 :         BUN p = 0, q = 0, rcnt = 0;
    5255          18 :         struct canditer ci;
    5256          18 :         bool with_strimps = false,
    5257          18 :                 with_strimps_anti = false;
    5258             : 
    5259          18 :         if (!(b = BATdescriptor(*b_id)))
    5260           0 :                 throw(MAL, fname, RUNTIME_OBJECT_MISSING);
    5261             : 
    5262          18 :         if (cb_id && !is_bat_nil(*cb_id) && !(cb = BATdescriptor(*cb_id))) {
    5263           0 :                 BBPreclaim(b);
    5264           0 :                 throw(MAL, fname, RUNTIME_OBJECT_MISSING);
    5265             :         }
    5266             : 
    5267          18 :         assert(ATOMstorage(b->ttype) == TYPE_str);
    5268             : 
    5269          18 :         if (BAThasstrimps(b)) {
    5270           0 :                 if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
    5271           0 :                         BAT *tmp_s = STRMPfilter(b, cb, key, anti);
    5272           0 :                         if (tmp_s) {
    5273           0 :                                 old_s = cb;
    5274           0 :                                 cb = tmp_s;
    5275           0 :                                 if (!anti)
    5276             :                                         with_strimps = true;
    5277             :                                 else
    5278           0 :                                         with_strimps_anti = true;
    5279             :                         }
    5280             :                 } else {
    5281           0 :                         GDKclrerr();
    5282             :                 }
    5283             :         }
    5284             : 
    5285          36 :         MT_thread_setalgorithm(with_strimps ?
    5286          18 :                                                    "string_select: strcmp function using strimps" :
    5287             :                                                    (with_strimps_anti ?
    5288             :                                                         "string_select: strcmp function using strimps anti"
    5289             :                                                         : "string_select: strcmp function with no accelerator"));
    5290             : 
    5291          18 :         canditer_init(&ci, b, cb);
    5292          18 :         if (!(r = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
    5293           0 :                 BBPnreclaim(2, b, cb);
    5294           0 :                 throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5295             :         }
    5296             : 
    5297          18 :         if (!cb || BATtdense(cb)) {
    5298           0 :                 if (cb) {
    5299          18 :                         assert(BATtdense(cb));
    5300          18 :                         p = (BUN) cb->tseqbase;
    5301          18 :                         q = p + BATcount(cb);
    5302          18 :                         if ((oid) p < b->hseqbase)
    5303             :                                 p = b->hseqbase;
    5304          18 :                         if ((oid) q > b->hseqbase + BATcount(b))
    5305             :                                 q = b->hseqbase + BATcount(b);
    5306             :                 } else {
    5307           0 :                         p = b->hseqbase;
    5308           0 :                         q = BATcount(b) + b->hseqbase;
    5309             :                 }
    5310             :         }
    5311             : 
    5312          36 :         msg = str_select(r, b, cb, &ci, p, q, &rcnt, key, anti
    5313          18 :                                          && !with_strimps_anti, str_cmp, with_strimps_anti);
    5314             : 
    5315          18 :         if (!msg) {
    5316          18 :                 BATsetcount(r, rcnt);
    5317          18 :                 r->tsorted = true;
    5318          18 :                 r->trevsorted = r->batCount <= 1;
    5319          18 :                 r->tkey = true;
    5320          18 :                 r->tnil = false;
    5321          18 :                 r->tnonil = true;
    5322          36 :                 r->tseqbase = rcnt == 0 ?
    5323          18 :                         0 : rcnt == 1 ?
    5324           8 :                         *(const oid *) Tloc(r, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
    5325             : 
    5326          18 :                 if (with_strimps_anti) {
    5327           0 :                         BAT *rev;
    5328           0 :                         if (old_s) {
    5329           0 :                                 rev = BATdiffcand(old_s, r);
    5330             : #ifndef NDEBUG
    5331           0 :                                 BAT *is = BATintersectcand(old_s, r);
    5332           0 :                                 if (is) {
    5333           0 :                                         assert(is->batCount == r->batCount);
    5334           0 :                                         BBPreclaim(is);
    5335             :                                 }
    5336           0 :                                 assert(rev->batCount == old_s->batCount - r->batCount);
    5337             : #endif
    5338             :                         } else
    5339           0 :                                 rev = BATnegcands(b->batCount, r);
    5340             : 
    5341           0 :                         BBPreclaim(r);
    5342           0 :                         r = rev;
    5343           0 :                         if (r == NULL)
    5344           0 :                                 msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    5345             :                 }
    5346             :         }
    5347             : 
    5348          18 :         if (r && !msg) {
    5349          18 :                 *r_id = r->batCacheid;
    5350          18 :                 BBPkeepref(r);
    5351             :         } else {
    5352           0 :                 BBPreclaim(r);
    5353             :         }
    5354             : 
    5355          18 :         BBPnreclaim(3, b, cb, old_s);
    5356          18 :         return msg;
    5357             : }
    5358             : 
    5359             : #define STRSELECT_MAPARGS(STK, PCI, R_ID, B_ID, CB_ID, KEY, ICASE, ANTI) \
    5360             :         do {                                                                                                                            \
    5361             :                 R_ID = getArgReference(STK, PCI, 0);                                                    \
    5362             :                 B_ID = getArgReference(STK, PCI, 1);                                                    \
    5363             :                 CB_ID = getArgReference(STK, PCI, 2);                                                   \
    5364             :                 KEY = *getArgReference_str(STK, PCI, 3);                                                \
    5365             :                 ICASE = PCI->argc != 5;                                                                                      \
    5366             :                 ANTI = PCI->argc == 5 ? *getArgReference_bit(STK, PCI, 4) :          \
    5367             :                         *getArgReference_bit(STK, PCI, 5);                                                      \
    5368             :         } while (0)
    5369             : 
    5370             : /**
    5371             :  * @r_id: result oid
    5372             :  * @b_id: input bat oid
    5373             :  * @cb_id: input bat candidates oid
    5374             :  * @key: input string
    5375             :  * @icase: ignore case
    5376             :  * @anti: anti join
    5377             :  */
    5378             : static str
    5379           6 : STRstartswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    5380             : {
    5381           6 :         (void) cntxt;
    5382           6 :         (void) mb;
    5383             : 
    5384           6 :         bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
    5385           6 :         char *key = NULL;
    5386           6 :         bit icase = 0, anti = 0;
    5387             : 
    5388           6 :         STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
    5389           6 :         return STRselect(r_id, b_id, cb_id, key, anti,
    5390             :                                          icase ? str_is_iprefix : str_is_prefix, "str.startswithselect");
    5391             : }
    5392             : 
    5393             : /**
    5394             :  * @r_id: result oid
    5395             :  * @b_id: input bat oid
    5396             :  * @cb_id: input bat candidates oid
    5397             :  * @key: input string
    5398             :  * @icase: ignore case
    5399             :  * @anti: anti join
    5400             :  */
    5401             : static str
    5402           6 : STRendswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    5403             : {
    5404           6 :         (void) cntxt;
    5405           6 :         (void) mb;
    5406             : 
    5407           6 :         bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
    5408           6 :         char *key = NULL;
    5409           6 :         bit icase = 0, anti = 0;
    5410             : 
    5411           6 :         STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
    5412           6 :         return STRselect(r_id, b_id, cb_id, key, anti,
    5413             :                                          icase ? str_is_isuffix : str_is_suffix, "str.endswithselect");
    5414             : }
    5415             : 
    5416             : /**
    5417             :  * @r_id: result oid
    5418             :  * @b_id: input bat oid
    5419             :  * @cb_id: input bat candidates oid
    5420             :  * @key: input string
    5421             :  * @icase: ignore case
    5422             :  * @anti: anti join
    5423             :  */
    5424             : static str
    5425           6 : STRcontainsselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    5426             : {
    5427           6 :         (void) cntxt;
    5428           6 :         (void) mb;
    5429             : 
    5430           6 :         bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
    5431           6 :         char *key = NULL;
    5432           6 :         bit icase = 0, anti = 0;
    5433             : 
    5434           6 :         STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
    5435           6 :         return STRselect(r_id, b_id, cb_id, key, anti,
    5436             :                                          icase ? str_icontains : str_contains, "str.containsselect");
    5437             : }
    5438             : 
    5439             : #define APPEND(b, o) (((oid *) b->theap->base)[b->batCount++] = (o))
    5440             : #define VALUE(s, x)  (s##vars + VarHeapVal(s##vals, (x), s##i.width))
    5441             : 
    5442             : #define set_empty_bat_props(B)                                  \
    5443             :         do {                                                                            \
    5444             :                 B->tnil = false;                                             \
    5445             :                 B->tnonil = true;                                            \
    5446             :                 B->tkey = true;                                                      \
    5447             :                 B->tsorted = true;                                           \
    5448             :                 B->trevsorted = true;                                        \
    5449             :                 B->tseqbase = 0;                                             \
    5450             :         } while (0)
    5451             : 
    5452             : #define CONTAINS_JOIN_LOOP(STR_CMP, STR_LEN)                                                    \
    5453             :         do {                                                                                                                            \
    5454             :                 canditer_init(&rci, r, cr);                                                                         \
    5455             :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                                       \
    5456             :                         BAT *filtered_sl = NULL;                                                                        \
    5457             :                         GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
    5458             :                         ro = canditer_next(&rci);                                                                   \
    5459             :                         vr = VALUE(r, ro - rbase);                                                                      \
    5460             :                         matches = 0;                                                                                            \
    5461             :                         if (!strNil(vr)) {                                                                                      \
    5462             :                                 vr_len = STR_LEN;                                                                               \
    5463             :                                 if (with_strimps)                                                                               \
    5464             :                                         filtered_sl = STRMPfilter(l, cl, vr, anti);                     \
    5465             :                                 if (filtered_sl)                                                                                \
    5466             :                                         canditer_init(&lci, l, filtered_sl);                                \
    5467             :                                 else                                                                                                    \
    5468             :                                         canditer_init(&lci, l, cl);                                                 \
    5469             :                                 for (BUN lidx = 0; lidx < lci.ncand; lidx++) {                       \
    5470             :                                         lo = canditer_next(&lci);                                                   \
    5471             :                                         vl = VALUE(l, lo - lbase);                                                      \
    5472             :                                         if (strNil(vl))                                                                         \
    5473             :                                                 continue;                                                                               \
    5474             :                                         if (STR_CMP)                                                                            \
    5475             :                                                 continue;                                                                               \
    5476             :                                         if (BATcount(rl) == BATcapacity(rl)) {                          \
    5477             :                                                 newcap = BATgrows(rl);                                                  \
    5478             :                                                 BATsetcount(rl, BATcount(rl));                                  \
    5479             :                                                 if (rr)                                                                                 \
    5480             :                                                         BATsetcount(rr, BATcount(rr));                          \
    5481             :                                                 if (BATextend(rl, newcap) != GDK_SUCCEED ||             \
    5482             :                                                         (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
    5483             :                                                         msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);     \
    5484             :                                                         goto exit;                                                                      \
    5485             :                                                 }                                                                                               \
    5486             :                                                 assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
    5487             :                                         }                                                                                                       \
    5488             :                                         if (BATcount(rl) > 0) {                                                              \
    5489             :                                                 if (lastl + 1 != lo)                                                    \
    5490             :                                                         rl->tseqbase = oid_nil;                                              \
    5491             :                                                 if (matches == 0) {                                                             \
    5492             :                                                         if (rr)                                                                         \
    5493             :                                                                 rr->trevsorted = false;                                      \
    5494             :                                                         if (lastl > lo) {                                                    \
    5495             :                                                                 rl->tsorted = false;                                 \
    5496             :                                                                 rl->tkey = false;                                            \
    5497             :                                                         } else if (lastl < lo) {                                     \
    5498             :                                                                 rl->trevsorted = false;                                      \
    5499             :                                                         } else {                                                                        \
    5500             :                                                                 rl->tkey = false;                                            \
    5501             :                                                         }                                                                                       \
    5502             :                                                 }                                                                                               \
    5503             :                                         }                                                                                                       \
    5504             :                                         APPEND(rl, lo);                                                                         \
    5505             :                                         if (rr)                                                                                         \
    5506             :                                                 APPEND(rr, ro);                                                                 \
    5507             :                                         lastl = lo;                                                                                     \
    5508             :                                         matches++;                                                                                      \
    5509             :                                 }                                                                                                               \
    5510             :                                 BBPreclaim(filtered_sl);                                                                \
    5511             :                         }                                                                                                                       \
    5512             :                         if (rr) {                                                                                                       \
    5513             :                                 if (matches > 1) {                                                                           \
    5514             :                                         rr->tkey = false;                                                                    \
    5515             :                                         rr->tseqbase = oid_nil;                                                              \
    5516             :                                         rl->trevsorted = false;                                                              \
    5517             :                                 } else if (matches == 0) {                                                              \
    5518             :                                         rskipped = BATcount(rr) > 0;                                         \
    5519             :                                 } else if (rskipped) {                                                                  \
    5520             :                                         rr->tseqbase = oid_nil;                                                              \
    5521             :                                 }                                                                                                               \
    5522             :                         } else if (matches > 1) {                                                                    \
    5523             :                                 rl->trevsorted = false;                                                                      \
    5524             :                         }                                                                                                                       \
    5525             :                 }                                                                                                                               \
    5526             :         } while (0)
    5527             : 
    5528             : #define STR_JOIN_NESTED_LOOP(STR_CMP, STR_LEN, FNAME)                                   \
    5529             :         do {                                                                                                                            \
    5530             :                 canditer_init(&rci, r, cr);                                                                         \
    5531             :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                                       \
    5532             :                         GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
    5533             :                         ro = canditer_next(&rci);                                                                   \
    5534             :                         vr = VALUE(r, ro - rbase);                                                                      \
    5535             :                         matches = 0;                                                                                            \
    5536             :                         if (!strNil(vr)) {                                                                                      \
    5537             :                                 vr_len = STR_LEN;                                                                               \
    5538             :                                 canditer_init(&lci, l, cl);                                                         \
    5539             :                                 for (BUN lidx = 0; lidx < lci.ncand; lidx++) {                       \
    5540             :                                         lo = canditer_next(&lci);                                                   \
    5541             :                                         vl = VALUE(l, lo - lbase);                                                      \
    5542             :                                         if (strNil(vl))                                                                         \
    5543             :                                                 continue;                                                                               \
    5544             :                                         if (!(STR_CMP))                                                                         \
    5545             :                                                 continue;                                                                               \
    5546             :                                         if (BATcount(rl) == BATcapacity(rl)) {                          \
    5547             :                                                 newcap = BATgrows(rl);                                                  \
    5548             :                                                 BATsetcount(rl, BATcount(rl));                                  \
    5549             :                                                 if (rr)                                                                                 \
    5550             :                                                         BATsetcount(rr, BATcount(rr));                          \
    5551             :                                                 if (BATextend(rl, newcap) != GDK_SUCCEED ||             \
    5552             :                                                         (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
    5553             :                                                         msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    5554             :                                                         goto exit;                                                                      \
    5555             :                                                 }                                                                                               \
    5556             :                                                 assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
    5557             :                                         }                                                                                                       \
    5558             :                                         if (BATcount(rl) > 0) {                                                              \
    5559             :                                                 if (last_lo + 1 != lo)                                                  \
    5560             :                                                         rl->tseqbase = oid_nil;                                              \
    5561             :                                                 if (matches == 0) {                                                             \
    5562             :                                                         if (rr)                                                                         \
    5563             :                                                                 rr->trevsorted = false;                                      \
    5564             :                                                         if (last_lo > lo) {                                                  \
    5565             :                                                                 rl->tsorted = false;                                 \
    5566             :                                                                 rl->tkey = false;                                            \
    5567             :                                                         } else if (last_lo < lo) {                                   \
    5568             :                                                                 rl->trevsorted = false;                                      \
    5569             :                                                         } else {                                                                        \
    5570             :                                                                 rl->tkey = false;                                            \
    5571             :                                                         }                                                                                       \
    5572             :                                                 }                                                                                               \
    5573             :                                         }                                                                                                       \
    5574             :                                         APPEND(rl, lo);                                                                         \
    5575             :                                         if (rr)                                                                                         \
    5576             :                                                 APPEND(rr, ro);                                                                 \
    5577             :                                         last_lo = lo;                                                                           \
    5578             :                                         matches++;                                                                                      \
    5579             :                                 }                                                                                                               \
    5580             :                         }                                                                                                                       \
    5581             :                         if (rr) {                                                                                                       \
    5582             :                                 if (matches > 1) {                                                                           \
    5583             :                                         rr->tkey = false;                                                                    \
    5584             :                                         rr->tseqbase = oid_nil;                                                              \
    5585             :                                         rl->trevsorted = false;                                                              \
    5586             :                                 } else if (matches == 0) {                                                              \
    5587             :                                         rskipped = BATcount(rr) > 0;                                         \
    5588             :                                 } else if (rskipped) {                                                                  \
    5589             :                                         rr->tseqbase = oid_nil;                                                              \
    5590             :                                 }                                                                                                               \
    5591             :                         } else if (matches > 1) {                                                                    \
    5592             :                                 rl->trevsorted = false;                                                                      \
    5593             :                         }                                                                                                                       \
    5594             :                 }                                                                                                                               \
    5595             :         } while (0)
    5596             : 
    5597             : #define STARTSWITH_SORTED_LOOP(STR_CMP, STR_LEN, FNAME)                                 \
    5598             :         do {                                                                                                                            \
    5599             :                 canditer_init(&rci, sorted_r, sorted_cr);                                           \
    5600             :                 canditer_init(&lci, sorted_l, sorted_cl);                                           \
    5601             :                 for (lx = 0; lx < lci.ncand; lx++) {                                                 \
    5602             :                         lo = canditer_next(&lci);                                                                   \
    5603             :                         vl = VALUE(l, lo - lbase);                                                                      \
    5604             :                         if (!strNil(vl))                                                                                        \
    5605             :                                 break;                                                                                                  \
    5606             :                 }                                                                                                                               \
    5607             :                 for (rx = 0; rx < rci.ncand; rx++) {                                                 \
    5608             :                         ro = canditer_next(&rci);                                                                   \
    5609             :                         vr = VALUE(r, ro - rbase);                                                                      \
    5610             :                         if (!strNil(vr)) {                                                                                      \
    5611             :                                 canditer_setidx(&rci, rx);                                                          \
    5612             :                                 break;                                                                                                  \
    5613             :                         }                                                                                                                       \
    5614             :                 }                                                                                                                               \
    5615             :                 for (; rx < rci.ncand; rx++) {                                                                       \
    5616             :                         GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
    5617             :                         ro = canditer_next(&rci);                                                                   \
    5618             :                         vr = VALUE(r, ro - rbase);                                                                      \
    5619             :                         vr_len = STR_LEN;                                                                                       \
    5620             :                         matches = 0;                                                                                            \
    5621             :                         for (canditer_setidx(&lci, lx), n = lx; n < lci.ncand; n++) { \
    5622             :                                 lo = canditer_next_dense(&lci);                                                     \
    5623             :                                 vl = VALUE(l, lo - lbase);                                                              \
    5624             :                                 cmp = STR_CMP;                                                                                  \
    5625             :                                 if (cmp < 0) {                                                                                       \
    5626             :                                         lx++;                                                                                           \
    5627             :                                         continue;                                                                                       \
    5628             :                                 }                                                                                                               \
    5629             :                                 else if (cmp > 0)                                                                            \
    5630             :                                         break;                                                                                          \
    5631             :                                 if (BATcount(rl) == BATcapacity(rl)) {                                  \
    5632             :                                         newcap = BATgrows(rl);                                                          \
    5633             :                                         BATsetcount(rl, BATcount(rl));                                          \
    5634             :                                         if (rr)                                                                                         \
    5635             :                                                 BATsetcount(rr, BATcount(rr));                                  \
    5636             :                                         if (BATextend(rl, newcap) != GDK_SUCCEED ||                     \
    5637             :                                                 (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
    5638             :                                                 msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    5639             :                                                 goto exit;                                                                              \
    5640             :                                         }                                                                                                       \
    5641             :                                         assert(!rr || BATcapacity(rl) == BATcapacity(rr));      \
    5642             :                                 }                                                                                                               \
    5643             :                                 if (BATcount(rl) > 0) {                                                                      \
    5644             :                                         if (last_lo + 1 != lo)                                                          \
    5645             :                                                 rl->tseqbase = oid_nil;                                                      \
    5646             :                                         if (matches == 0) {                                                                     \
    5647             :                                                 if (rr)                                                                                 \
    5648             :                                                         rr->trevsorted = false;                                              \
    5649             :                                                 if (last_lo > lo) {                                                          \
    5650             :                                                         rl->tsorted = false;                                         \
    5651             :                                                         rl->tkey = false;                                                    \
    5652             :                                                 } else if (last_lo < lo) {                                           \
    5653             :                                                         rl->trevsorted = false;                                              \
    5654             :                                                 } else {                                                                                \
    5655             :                                                         rl->tkey = false;                                                    \
    5656             :                                                 }                                                                                               \
    5657             :                                         }                                                                                                       \
    5658             :                                 }                                                                                                               \
    5659             :                                 APPEND(rl, lo);                                                                                 \
    5660             :                                 if (rr)                                                                                                 \
    5661             :                                         APPEND(rr, ro);                                                                         \
    5662             :                                 last_lo = lo;                                                                                   \
    5663             :                                 matches++;                                                                                              \
    5664             :                         }                                                                                                                       \
    5665             :                         if (rr) {                                                                                                       \
    5666             :                                 if (matches > 1) {                                                                           \
    5667             :                                         rr->tkey = false;                                                                    \
    5668             :                                         rr->tseqbase = oid_nil;                                                              \
    5669             :                                         rl->trevsorted = false;                                                              \
    5670             :                                 } else if (matches == 0) {                                                              \
    5671             :                                         rskipped = BATcount(rr) > 0;                                         \
    5672             :                                 } else if (rskipped) {                                                                  \
    5673             :                                         rr->tseqbase = oid_nil;                                                              \
    5674             :                                 }                                                                                                               \
    5675             :                         } else if (matches > 1) {                                                                    \
    5676             :                                 rl->trevsorted = false;                                                                      \
    5677             :                         }                                                                                                                       \
    5678             :                 }                                                                                                                               \
    5679             :         } while (0)
    5680             : 
    5681             : static void
    5682         533 : do_strrev(char *dst, const char *src, size_t len)
    5683             : {
    5684         533 :         dst[len] = 0;
    5685         533 :         if (strNil(src)) {
    5686           8 :                 assert(len == strlen(str_nil));
    5687           8 :                 strcpy(dst, str_nil);
    5688           8 :                 return;
    5689             :         }
    5690        4321 :         while (*src) {
    5691        3796 :                 if ((*src & 0xF8) == 0xF0) {
    5692           0 :                         assert(len >= 4);
    5693           0 :                         dst[len - 4] = *src++;
    5694           0 :                         assert((*src & 0xC0) == 0x80);
    5695           0 :                         dst[len - 3] = *src++;
    5696           0 :                         assert((*src & 0xC0) == 0x80);
    5697           0 :                         dst[len - 2] = *src++;
    5698           0 :                         assert((*src & 0xC0) == 0x80);
    5699           0 :                         dst[len - 1] = *src++;
    5700           0 :                         len -= 4;
    5701        3796 :                 } else if ((*src & 0xF0) == 0xE0) {
    5702           0 :                         assert(len >= 3);
    5703           0 :                         dst[len - 3] = *src++;
    5704           0 :                         assert((*src & 0xC0) == 0x80);
    5705           0 :                         dst[len - 2] = *src++;
    5706           0 :                         assert((*src & 0xC0) == 0x80);
    5707           0 :                         dst[len - 1] = *src++;
    5708           0 :                         len -= 3;
    5709        3796 :                 } else if ((*src & 0xE0) == 0xC0) {
    5710           0 :                         assert(len >= 2);
    5711           0 :                         dst[len - 2] = *src++;
    5712           0 :                         assert((*src & 0xC0) == 0x80);
    5713           0 :                         dst[len - 1] = *src++;
    5714           0 :                         len -= 2;
    5715             :                 } else {
    5716        3796 :                         assert(len >= 1);
    5717        3796 :                         assert((*src & 0x80) == 0);
    5718        3796 :                         dst[--len] = *src++;
    5719             :                 }
    5720             :         }
    5721         525 :         assert(len == 0);
    5722             : }
    5723             : 
    5724             : static BAT *
    5725          24 : batstr_strrev(BAT *b)
    5726             : {
    5727          24 :         BAT *bn = NULL;
    5728          24 :         BATiter bi;
    5729          24 :         BUN p, q;
    5730          24 :         const char *src;
    5731          24 :         size_t len;
    5732          24 :         char *dst;
    5733          24 :         size_t dstlen;
    5734             : 
    5735          24 :         dstlen = 1024;
    5736          24 :         dst = GDKmalloc(dstlen);
    5737          24 :         if (dst == NULL)
    5738             :                 return NULL;
    5739             : 
    5740          24 :         assert(b->ttype == TYPE_str);
    5741             : 
    5742          24 :         bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
    5743          24 :         if (bn == NULL) {
    5744           0 :                 GDKfree(dst);
    5745           0 :                 return NULL;
    5746             :         }
    5747             : 
    5748          24 :         bi = bat_iterator(b);
    5749         559 :         BATloop(b, p, q) {
    5750         535 :                 src = (const char *) BUNtail(bi, p);
    5751         533 :                 len = strlen(src);
    5752         533 :                 if (len >= dstlen) {
    5753           0 :                         char *ndst;
    5754           0 :                         dstlen = len + 1024;
    5755           0 :                         ndst = GDKrealloc(dst, dstlen);
    5756           0 :                         if (ndst == NULL) {
    5757           0 :                                 bat_iterator_end(&bi);
    5758           0 :                                 BBPreclaim(bn);
    5759           0 :                                 GDKfree(dst);
    5760           0 :                                 return NULL;
    5761             :                         }
    5762             :                         dst = ndst;
    5763             :                 }
    5764         533 :                 do_strrev(dst, src, len);
    5765         535 :                 if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
    5766           0 :                         bat_iterator_end(&bi);
    5767           0 :                         BBPreclaim(bn);
    5768           0 :                         GDKfree(dst);
    5769           0 :                         return NULL;
    5770             :                 }
    5771             :         }
    5772             : 
    5773          24 :         bat_iterator_end(&bi);
    5774          24 :         GDKfree(dst);
    5775          24 :         return bn;
    5776             : }
    5777             : 
    5778             : static BAT *
    5779          18 : batstr_strlower(BAT *b)
    5780             : {
    5781          18 :         BAT *bn = NULL;
    5782          18 :         BATiter bi;
    5783          18 :         BUN p, q;
    5784             : 
    5785          18 :         assert(b->ttype == TYPE_str);
    5786             : 
    5787          18 :         bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
    5788          18 :         if (bn == NULL)
    5789             :                 return NULL;
    5790             : 
    5791          18 :         bi = bat_iterator(b);
    5792          98 :         BATloop(b, p, q) {
    5793          80 :                 str vb = BUNtail(bi, p), vb_low = NULL;
    5794          80 :                 if (STRlower(&vb_low, &vb)) {
    5795           0 :                         bat_iterator_end(&bi);
    5796           0 :                         BBPreclaim(bn);
    5797           0 :                         return NULL;
    5798             :                 }
    5799          80 :                 if (BUNappend(bn, vb_low, false) != GDK_SUCCEED) {
    5800           0 :                         GDKfree(vb_low);
    5801           0 :                         bat_iterator_end(&bi);
    5802           0 :                         BBPreclaim(bn);
    5803           0 :                         return NULL;
    5804             :                 }
    5805          79 :                 GDKfree(vb_low);
    5806             :         }
    5807          18 :         bat_iterator_end(&bi);
    5808          18 :         return bn;
    5809             : }
    5810             : 
    5811             : static str
    5812          14 : str_join_nested(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr,
    5813             :                                 bit anti, int (*str_cmp)(const char *, const char *, int), str fname)
    5814             : {
    5815          14 :         str msg = MAL_SUCCEED;
    5816             : 
    5817          14 :         lng timeoffset = 0;
    5818          14 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    5819          14 :         if (qry_ctx != NULL)
    5820          14 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
    5821          14 :                                 (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    5822             : 
    5823          14 :         TRC_DEBUG(ALGO,
    5824             :                           "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
    5825             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    5826             :                           "sr=%s#" BUNFMT "%s%s)\n",
    5827             :                           fname, "nested loop",
    5828             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    5829             :                           l->tsorted ? "-sorted" : "",
    5830             :                           l->trevsorted ? "-revsorted" : "",
    5831             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    5832             :                           r->tsorted ? "-sorted" : "",
    5833             :                           r->trevsorted ? "-revsorted" : "",
    5834             :                           cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
    5835             :                           cl && cl->tsorted ? "-sorted" : "",
    5836             :                           cl && cl->trevsorted ? "-revsorted" : "",
    5837             :                           cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
    5838             :                           cr && cr->tsorted ? "-sorted" : "",
    5839             :                           cr && cr->trevsorted ? "-revsorted" : "");
    5840             : 
    5841          42 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    5842          14 :         assert(ATOMtype(l->ttype) == TYPE_str);
    5843             : 
    5844          14 :         BATiter li = bat_iterator(l);
    5845          14 :         BATiter ri = bat_iterator(r);
    5846          14 :         assert(ri.vh && r->ttype);
    5847             : 
    5848          14 :         struct canditer lci, rci;
    5849          14 :         oid lbase = l->hseqbase,
    5850          14 :                 rbase = r->hseqbase,
    5851          14 :                 lo, ro, last_lo = 0;
    5852          14 :         const char *lvals = (const char *) li.base,
    5853          14 :                 *rvals = (const char *) ri.base,
    5854          14 :                 *lvars = li.vh->base,
    5855          14 :                 *rvars = ri.vh->base,
    5856             :                 *vl, *vr;
    5857          14 :         BUN matches, newcap;
    5858          14 :         int rskipped = 0, vr_len = 0;
    5859          14 :         size_t counter = 0;
    5860             : 
    5861          14 :         if (anti)
    5862           0 :                 STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) != 0), str_strlen(vr), fname);
    5863             :         else
    5864         734 :                 STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) == 0), str_strlen(vr), fname);
    5865             : 
    5866          14 :         assert(!rr || BATcount(rl) == BATcount(rr));
    5867          14 :         BATsetcount(rl, BATcount(rl));
    5868          14 :         if (rr)
    5869          14 :                 BATsetcount(rr, BATcount(rr));
    5870             : 
    5871          14 :         if (BATcount(rl) > 0) {
    5872          13 :                 if (BATtdense(rl))
    5873           2 :                         rl->tseqbase = ((oid *) rl->theap->base)[0];
    5874          13 :                 if (rr && BATtdense(rr))
    5875           4 :                         rr->tseqbase = ((oid *) rr->theap->base)[0];
    5876             :         } else {
    5877           1 :                 rl->tseqbase = 0;
    5878           1 :                 if (rr)
    5879           1 :                         rr->tseqbase = 0;
    5880             :         }
    5881             : 
    5882          14 :         TRC_DEBUG(ALGO,
    5883             :                           "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
    5884             :                           fname,
    5885             :                           BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
    5886             :                           rl->tsorted ? "-sorted" : "",
    5887             :                           rl->trevsorted ? "-revsorted" : "",
    5888             :                           rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
    5889             :                           rr && rr->tsorted ? "-sorted" : "",
    5890             :                           rr && rr->trevsorted ? "-revsorted" : "");
    5891             : 
    5892          14 : exit:
    5893          14 :         bat_iterator_end(&li);
    5894          14 :         bat_iterator_end(&ri);
    5895          14 :         return msg;
    5896             : }
    5897             : 
    5898             : static str
    5899          32 : contains_join(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr, bit anti,
    5900             :                           int (*str_cmp)(const char *, const char *, int), const str fname)
    5901             : {
    5902          32 :         str msg = MAL_SUCCEED;
    5903             : 
    5904          32 :         lng timeoffset = 0;
    5905          32 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    5906          32 :         if (qry_ctx != NULL)
    5907          32 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
    5908          32 :                         (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    5909             : 
    5910          32 :         TRC_DEBUG(ALGO,
    5911             :                           "(%s, l=%s#" BUNFMT "[%s]%s%s,"
    5912             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    5913             :                           "sr=%s#" BUNFMT "%s%s)\n",
    5914             :                           fname,
    5915             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    5916             :                           l->tsorted ? "-sorted" : "",
    5917             :                           l->trevsorted ? "-revsorted" : "",
    5918             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    5919             :                           r->tsorted ? "-sorted" : "",
    5920             :                           r->trevsorted ? "-revsorted" : "",
    5921             :                           cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
    5922             :                           cl && cl->tsorted ? "-sorted" : "",
    5923             :                           cl && cl->trevsorted ? "-revsorted" : "",
    5924             :                           cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
    5925             :                           cr && cr->tsorted ? "-sorted" : "",
    5926             :                           cr && cr->trevsorted ? "-revsorted" : "");
    5927             : 
    5928          32 :         bool with_strimps = false;
    5929             : 
    5930          32 :         if (BAThasstrimps(l)) {
    5931           8 :                 with_strimps = true;
    5932           8 :                 if (STRMPcreate(l, NULL) != GDK_SUCCEED) {
    5933           0 :                         GDKclrerr();
    5934           0 :                         with_strimps = false;
    5935             :                 }
    5936             :         }
    5937             : 
    5938          96 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    5939          32 :         assert(ATOMtype(l->ttype) == TYPE_str);
    5940             : 
    5941          32 :         BATiter li = bat_iterator(l);
    5942          32 :         BATiter ri = bat_iterator(r);
    5943          32 :         assert(ri.vh && r->ttype);
    5944             : 
    5945          32 :         struct canditer lci, rci;
    5946          32 :         oid lbase = l->hseqbase,
    5947          32 :                 rbase = r->hseqbase,
    5948          32 :                 lo, ro, lastl = 0;
    5949          32 :         const char *lvals = (const char *) li.base,
    5950          32 :                 *rvals = (const char *) ri.base,
    5951          32 :                 *lvars = li.vh->base,
    5952          32 :                 *rvars = ri.vh->base,
    5953             :                 *vl, *vr;
    5954          32 :         int rskipped = 0, vr_len = 0;
    5955          32 :         BUN matches, newcap;
    5956          32 :         size_t counter = 0;
    5957             : 
    5958          32 :         if (anti)
    5959           0 :                 CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) == 0, str_strlen(vr));
    5960             :         else
    5961       30207 :                 CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr));
    5962             : 
    5963          32 :         assert(!rr || BATcount(rl) == BATcount(rr));
    5964          32 :         BATsetcount(rl, BATcount(rl));
    5965          32 :         if (rr)
    5966          32 :                 BATsetcount(rr, BATcount(rr));
    5967          32 :         if (BATcount(rl) > 0) {
    5968          29 :                 if (BATtdense(rl))
    5969           7 :                         rl->tseqbase = ((oid *) rl->theap->base)[0];
    5970          29 :                 if (rr && BATtdense(rr))
    5971           5 :                         rr->tseqbase = ((oid *) rr->theap->base)[0];
    5972             :         } else {
    5973           3 :                 rl->tseqbase = 0;
    5974           3 :                 if (rr)
    5975           3 :                         rr->tseqbase = 0;
    5976             :         }
    5977             : 
    5978          32 :         TRC_DEBUG(ALGO,
    5979             :                           "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
    5980             :                           fname,
    5981             :                           BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
    5982             :                           rl->tsorted ? "-sorted" : "",
    5983             :                           rl->trevsorted ? "-revsorted" : "",
    5984             :                           rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
    5985             :                           rr && rr->tsorted ? "-sorted" : "",
    5986             :                           rr && rr->trevsorted ? "-revsorted" : "");
    5987          32 : exit:
    5988          32 :         bat_iterator_end(&li);
    5989          32 :         bat_iterator_end(&ri);
    5990          32 :         return msg;
    5991             : }
    5992             : 
    5993             : static str
    5994          26 : startswith_join(BAT **rl_ptr, BAT **rr_ptr, BAT *l, BAT *r, BAT *cl, BAT *cr,
    5995             :                                 bit anti, int (*str_cmp)(const char *, const char *, int), str fname)
    5996             : {
    5997          26 :         str msg = MAL_SUCCEED;
    5998          26 :         gdk_return rc;
    5999             : 
    6000          26 :         lng timeoffset = 0;
    6001          26 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    6002          26 :         if (qry_ctx != NULL)
    6003          26 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
    6004          26 :                         (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    6005             : 
    6006          26 :         assert(*rl_ptr && *rr_ptr);
    6007             : 
    6008          26 :         BAT *sorted_l = NULL, *sorted_r = NULL,
    6009          26 :                 *sorted_cl = NULL, *sorted_cr = NULL,
    6010          26 :                 *ord_sorted_l = NULL, *ord_sorted_r = NULL,
    6011          26 :                 *proj_rl = NULL, *proj_rr = NULL,
    6012          26 :                 *rl = *rl_ptr, *rr = *rr_ptr;
    6013             : 
    6014          26 :         TRC_DEBUG(ALGO,
    6015             :                           "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
    6016             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    6017             :                           "sr=%s#" BUNFMT "%s%s)\n",
    6018             :                           fname, "sorted inputs",
    6019             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    6020             :                           l->tsorted ? "-sorted" : "",
    6021             :                           l->trevsorted ? "-revsorted" : "",
    6022             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    6023             :                           r->tsorted ? "-sorted" : "",
    6024             :                           r->trevsorted ? "-revsorted" : "",
    6025             :                           cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
    6026             :                           cl && cl->tsorted ? "-sorted" : "",
    6027             :                           cl && cl->trevsorted ? "-revsorted" : "",
    6028             :                           cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
    6029             :                           cr && cr->tsorted ? "-sorted" : "",
    6030             :                           cr && cr->trevsorted ? "-revsorted" : "");
    6031             : 
    6032          26 :         bool l_sorted = BATordered(l);
    6033          26 :         bool r_sorted = BATordered(r);
    6034             : 
    6035          26 :         if (l_sorted == FALSE) {
    6036          26 :                 rc = BATsort(&sorted_l, &ord_sorted_l, NULL,
    6037             :                                          l, NULL, NULL, false, false, false);
    6038          26 :                 if (rc != GDK_SUCCEED) {
    6039           0 :                         throw(MAL, fname, "Sorting left input failed");
    6040             :                 } else {
    6041          26 :                         if (cl) {
    6042           0 :                                 rc = BATsort(&sorted_cl, NULL, NULL,
    6043             :                                                          cl, ord_sorted_l, NULL, false, false, false);
    6044           0 :                                 if (rc != GDK_SUCCEED) {
    6045           0 :                                         BBPnreclaim(2, sorted_l, ord_sorted_l);
    6046           0 :                                         throw(MAL, fname, "Sorting left candidates input failed");
    6047             :                                 }
    6048             :                         }
    6049             :                 }
    6050             :         } else {
    6051           0 :                 sorted_l = l;
    6052           0 :                 sorted_cl = cl;
    6053             :         }
    6054             : 
    6055          26 :         if (r_sorted == FALSE) {
    6056          18 :                 rc = BATsort(&sorted_r, &ord_sorted_r, NULL,
    6057             :                                          r, NULL, NULL, false, false, false);
    6058          18 :                 if (rc != GDK_SUCCEED) {
    6059           0 :                         BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
    6060           0 :                         throw(MAL, fname, "Sorting right input failed");
    6061             :                 } else {
    6062          18 :                         if (cr) {
    6063           0 :                                 rc = BATsort(&sorted_cr, NULL, NULL,
    6064             :                                                          cr, ord_sorted_r, NULL, false, false, false);
    6065           0 :                                 if (rc != GDK_SUCCEED) {
    6066           0 :                                         BBPnreclaim(5, sorted_l, ord_sorted_l, sorted_cl, sorted_r, ord_sorted_r);
    6067           0 :                                         throw(MAL, fname, "Sorting right candidates input failed");
    6068             :                                 }
    6069             :                         }
    6070             :                 }
    6071             :         } else {
    6072           8 :                 sorted_r = r;
    6073           8 :                 sorted_cr = cr;
    6074             :         }
    6075             : 
    6076          26 :         assert(BATordered(sorted_l) && BATordered(sorted_r));
    6077             : 
    6078          26 :         BATiter li = bat_iterator(sorted_l);
    6079          26 :         BATiter ri = bat_iterator(sorted_r);
    6080          26 :         assert(ri.vh && r->ttype);
    6081             : 
    6082          26 :         struct canditer lci, rci;
    6083          26 :         oid lbase = sorted_l->hseqbase,
    6084          26 :                 rbase = sorted_r->hseqbase,
    6085          26 :                 lo, ro, last_lo = 0;
    6086          26 :         const char *lvals = (const char *) li.base,
    6087          26 :                 *rvals = (const char *) ri.base,
    6088          26 :                 *lvars = li.vh->base,
    6089          26 :                 *rvars = ri.vh->base,
    6090             :                 *vl, *vr;
    6091          26 :         BUN matches, newcap, n = 0, rx = 0, lx = 0;
    6092          26 :         int rskipped = 0, vr_len = 0, cmp = 0;
    6093          26 :         size_t counter = 0;
    6094             : 
    6095          26 :         if (anti)
    6096           0 :                 STR_JOIN_NESTED_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr), fname);
    6097             :         else
    6098        1325 :                 STARTSWITH_SORTED_LOOP(str_cmp(vl, vr, vr_len), str_strlen(vr), fname);
    6099             : 
    6100          26 :         assert(!rr || BATcount(rl) == BATcount(rr));
    6101          26 :         BATsetcount(rl, BATcount(rl));
    6102          26 :         if (rr)
    6103          26 :                 BATsetcount(rr, BATcount(rr));
    6104             : 
    6105          26 :         if (BATcount(rl) > 0) {
    6106          18 :                 if (BATtdense(rl))
    6107          11 :                         rl->tseqbase = ((oid *) rl->theap->base)[0];
    6108          18 :                 if (rr && BATtdense(rr))
    6109           7 :                         rr->tseqbase = ((oid *) rr->theap->base)[0];
    6110             :         } else {
    6111           8 :                 rl->tseqbase = 0;
    6112           8 :                 if (rr)
    6113           8 :                         rr->tseqbase = 0;
    6114             :         }
    6115             : 
    6116          26 :         if (l_sorted == FALSE) {
    6117          26 :                 proj_rl = BATproject(rl, ord_sorted_l);
    6118          26 :                 if (!proj_rl) {
    6119           0 :                         msg = createException(MAL, fname, "Project left pre-sort order failed");
    6120           0 :                         goto exit;
    6121             :                 } else {
    6122          26 :                         BBPreclaim(rl);
    6123          26 :                         *rl_ptr = proj_rl;
    6124             :                 }
    6125             :         }
    6126             : 
    6127          26 :         if (rr && r_sorted == FALSE) {
    6128          18 :                 proj_rr = BATproject(rr, ord_sorted_r);
    6129          18 :                 if (!proj_rr) {
    6130           0 :                         BBPreclaim(proj_rl);
    6131           0 :                         msg = createException(MAL, fname, "Project right pre-sort order failed");
    6132           0 :                         goto exit;
    6133             :                 } else {
    6134          18 :                         BBPreclaim(rr);
    6135          18 :                         *rr_ptr = proj_rr;
    6136             :                 }
    6137             :         }
    6138             : 
    6139          26 :         TRC_DEBUG(ALGO,
    6140             :                           "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
    6141             :                           fname,
    6142             :                           BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
    6143             :                           rl->tsorted ? "-sorted" : "",
    6144             :                           rl->trevsorted ? "-revsorted" : "",
    6145             :                           rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
    6146             :                           rr && rr->tsorted ? "-sorted" : "",
    6147             :                           rr && rr->trevsorted ? "-revsorted" : "");
    6148             : 
    6149          26 : exit:
    6150          26 :         if (l_sorted == FALSE)
    6151          26 :                 BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
    6152             : 
    6153          26 :         if (r_sorted == FALSE)
    6154          18 :                 BBPnreclaim(3, sorted_r, ord_sorted_r, sorted_cr);
    6155             : 
    6156          26 :         bat_iterator_end(&li);
    6157          26 :         bat_iterator_end(&ri);
    6158          26 :         return msg;
    6159             : }
    6160             : 
    6161             : static str
    6162          72 : STRjoin(bat *rl_id, bat *rr_id, const bat l_id, const bat r_id,
    6163             :                 const bat cl_id, const bat cr_id, const bit anti, bool icase,
    6164             :                 int (*str_cmp)(const char *, const char *, int), const str fname)
    6165             : {
    6166          72 :         str msg = MAL_SUCCEED;
    6167             : 
    6168          72 :         BAT *rl = NULL, *rr = NULL, *l = NULL, *r = NULL, *cl = NULL, *cr = NULL;
    6169             : 
    6170          72 :         if (!(l = BATdescriptor(l_id)) || !(r = BATdescriptor(r_id))) {
    6171           0 :                 BBPnreclaim(2, l, r);
    6172           0 :                 throw(MAL, fname, RUNTIME_OBJECT_MISSING);
    6173             :         }
    6174             : 
    6175          72 :         if ((cl_id && !is_bat_nil(cl_id) && (cl = BATdescriptor(cl_id)) == NULL) ||
    6176          72 :                 (cr_id && !is_bat_nil(cr_id) && (cr = BATdescriptor(cr_id)) == NULL)) {
    6177           0 :                 BBPnreclaim(4, l, r, cl, cr);
    6178           0 :                 throw(MAL, fname, RUNTIME_OBJECT_MISSING);
    6179             :         }
    6180             : 
    6181          72 :         rl = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
    6182          72 :         if (rr_id)
    6183          72 :                 rr = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
    6184             : 
    6185          72 :         if (!rl || (rr_id && !rr)) {
    6186           0 :                 BBPnreclaim(6, l, r, cl, cr, rl, rr);
    6187           0 :                 throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
    6188             :         }
    6189             : 
    6190          72 :         set_empty_bat_props(rl);
    6191          72 :         if (rr_id)
    6192          72 :                 set_empty_bat_props(rr);
    6193             : 
    6194         216 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    6195          72 :         assert(ATOMtype(l->ttype) == TYPE_str);
    6196             : 
    6197          72 :         BAT *nl = l, *nr = r;
    6198             : 
    6199          72 :         if (strcmp(fname, "str.containsjoin") == 0) {
    6200          32 :                 msg = contains_join(rl, rr, l, r, cl, cr, anti, str_cmp, fname);
    6201          32 :                 if (msg) {
    6202           0 :                         BBPnreclaim(6, rl, rr, l, r, cl, cr);
    6203           0 :                         return msg;
    6204             :                 }
    6205             :         } else {
    6206          40 :                 struct canditer lci, rci;
    6207          40 :                 canditer_init(&lci, l, cl);
    6208          40 :                 canditer_init(&rci, r, cr);
    6209          40 :                 BUN lcnt = lci.ncand, rcnt = rci.ncand;
    6210          40 :                 BUN nl_cost = lci.ncand * rci.ncand,
    6211          40 :                         sorted_cost =
    6212          40 :                         (BUN) floor(0.8 * (lcnt*log2((double)lcnt)
    6213          40 :                                                            + rcnt*log2((double)rcnt)));
    6214             : 
    6215          40 :                 if (nl_cost < sorted_cost) {
    6216          14 :                         msg = str_join_nested(rl, rr, nl, nr, cl, cr, anti, str_cmp, fname);
    6217             :                 } else {
    6218          26 :                         BAT *l_low = NULL, *r_low = NULL, *l_rev = NULL, *r_rev = NULL;
    6219          26 :                         if (icase) {
    6220           9 :                                 l_low = batstr_strlower(nl);
    6221           9 :                                 if (l_low == NULL) {
    6222           0 :                                         BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
    6223           0 :                                         throw(MAL, fname, "Failed lowering strings of left input");
    6224             :                                 }
    6225           9 :                                 r_low = batstr_strlower(nr);
    6226           9 :                                 if (r_low == NULL) {
    6227           0 :                                         BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_low);
    6228           0 :                                         throw(MAL, fname, "Failed lowering strings of right input");
    6229             :                                 }
    6230           9 :                                 BBPnreclaim(2, nl, nr);
    6231           9 :                                 nl = l_low;
    6232           9 :                                 nr = r_low;
    6233             :                         }
    6234          26 :                         if (strcmp(fname, "str.endswithjoin") == 0) {
    6235          12 :                                 l_rev = batstr_strrev(nl);
    6236          12 :                                 if (l_rev == NULL) {
    6237           0 :                                         BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
    6238           0 :                                         throw(MAL, fname, "Failed reversing strings of left input");
    6239             :                                 }
    6240          12 :                                 r_rev = batstr_strrev(nr);
    6241          12 :                                 if (r_rev == NULL) {
    6242           0 :                                         BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_rev);
    6243           0 :                                         throw(MAL, fname, "Failed reversing strings of right input");
    6244             :                                 }
    6245          12 :                                 BBPnreclaim(2, nl, nr);
    6246          12 :                                 nl = l_rev;
    6247          12 :                                 nr = r_rev;
    6248             :                         }
    6249          26 :                         msg = startswith_join(&rl, &rr, nl, nr, cl, cr, anti, str_is_prefix, fname);
    6250             :                 }
    6251             :         }
    6252             : 
    6253          72 :         if (!msg) {
    6254          72 :                 *rl_id = rl->batCacheid;
    6255          72 :                 BBPkeepref(rl);
    6256          72 :                 if (rr_id) {
    6257          72 :                         *rr_id = rr->batCacheid;
    6258          72 :                         BBPkeepref(rr);
    6259             :                 }
    6260             :         } else {
    6261           0 :                 BBPnreclaim(2, rl, rr);
    6262             :         }
    6263             : 
    6264          72 :         BBPnreclaim(4, nl, nr, cl, cr);
    6265          72 :         return msg;
    6266             : }
    6267             : 
    6268             : #define STRJOIN_MAPARGS(STK, PCI, RL_ID, RR_ID, L_ID, R_ID, CL_ID, CR_ID, IC_ID, ANTI) \
    6269             :         do {                                                                                                                            \
    6270             :                 RL_ID = getArgReference(STK, PCI, 0);                                                   \
    6271             :                 RR_ID = PCI->retc == 1 ? 0 : getArgReference(STK, PCI, 1);           \
    6272             :                 int i = PCI->retc == 1 ? 1 : 2;                                                              \
    6273             :                 L_ID = getArgReference(STK, PCI, i++);                                                  \
    6274             :                 R_ID = getArgReference(STK, PCI, i++);                                                  \
    6275             :                 IC_ID = PCI->argc - PCI->retc == 7 ?                                                      \
    6276             :                         NULL : getArgReference(stk, pci, i++);                                          \
    6277             :                 CL_ID = getArgReference(STK, PCI, i++);                                         \
    6278             :                 CR_ID = getArgReference(STK, PCI, i++);                                         \
    6279             :                 ANTI = PCI->argc - PCI->retc == 7 ?                                                       \
    6280             :                         getArgReference(STK, PCI, 8) : getArgReference(STK, PCI, 9);\
    6281             :         } while (0)
    6282             : 
    6283             : static inline str
    6284          44 : ignorecase(const bat *ic_id, bool *icase, str fname)
    6285             : {
    6286          44 :         BAT *c = NULL;
    6287             : 
    6288          44 :         if ((c = BATdescriptor(*ic_id)) == NULL)
    6289           0 :                 throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    6290             : 
    6291          44 :         assert(BATcount(c) == 1);
    6292             : 
    6293          44 :         BATiter bi = bat_iterator(c);
    6294          44 :         *icase = *(bit *) BUNtloc(bi, 0);
    6295          44 :         bat_iterator_end(&bi);
    6296             : 
    6297          44 :         BBPreclaim(c);
    6298          44 :         return MAL_SUCCEED;
    6299             : }
    6300             : 
    6301             : /**
    6302             :  * @rl_id: result left oid
    6303             :  * @rr_id: result right oid
    6304             :  * @l_id: left oid
    6305             :  * @r_id: right oid
    6306             :  * @cl_id: candidates left oid
    6307             :  * @cr_id: candidates right oid
    6308             :  * @ic_id: ignore case oid
    6309             :  * @anti: anti join oid
    6310             :  */
    6311             : static str
    6312          20 : STRstartswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    6313             : {
    6314          20 :         (void)cntxt;
    6315          20 :         (void)mb;
    6316             : 
    6317          20 :         str msg = MAL_SUCCEED;
    6318          20 :         bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
    6319          20 :                 *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
    6320          20 :         bit *anti = NULL;
    6321          20 :         bool icase = false;
    6322             : 
    6323          40 :         STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
    6324             : 
    6325          20 :         if (pci->argc - pci->retc == 8)
    6326          16 :                 msg = ignorecase(ic_id, &icase, "str.startswithjoin");
    6327             : 
    6328          36 :         return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
    6329             :                                                            cl_id ? *cl_id : 0,
    6330             :                                                            cr_id ? *cr_id : 0,
    6331          32 :                                                            *anti, icase, icase ? str_is_iprefix : str_is_prefix,
    6332             :                                                            "str.startswithjoin");
    6333             : }
    6334             : 
    6335             : /**
    6336             :  * @rl_id: result left oid
    6337             :  * @rr_id: result right oid
    6338             :  * @l_id: left oid
    6339             :  * @r_id: right oid
    6340             :  * @cl_id: candidates left oid
    6341             :  * @cr_id: candidates right oid
    6342             :  * @ic_id: ignore case oid
    6343             :  * @anti: anti join oid
    6344             :  */
    6345             : static str
    6346          20 : STRendswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    6347             : {
    6348          20 :         (void) cntxt;
    6349          20 :         (void) mb;
    6350             : 
    6351          20 :         str msg = MAL_SUCCEED;
    6352          20 :         bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
    6353          20 :                 *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
    6354          20 :         bit *anti = NULL;
    6355          20 :         bool icase = false;
    6356             : 
    6357          40 :         STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
    6358             : 
    6359          20 :         if (pci->argc - pci->retc == 8)
    6360          16 :                 msg = ignorecase(ic_id, &icase, "str.endswithjoin");
    6361             : 
    6362          36 :         return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
    6363             :                                                            cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
    6364          32 :                                                            *anti, icase, icase ? str_is_isuffix : str_is_suffix,
    6365             :                                                            "str.endswithjoin");
    6366             : }
    6367             : 
    6368             : /**
    6369             :  * @rl_id: result left oid
    6370             :  * @rr_id: result right oid
    6371             :  * @l_id: left oid
    6372             :  * @r_id: right oid
    6373             :  * @cl_id: candidates left oid
    6374             :  * @cr_id: candidates right oid
    6375             :  * @ic_id: ignore case oid
    6376             :  * @anti: anti join oid
    6377             :  */
    6378             : static str
    6379          32 : STRcontainsjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    6380             : {
    6381          32 :         (void) cntxt;
    6382          32 :         (void) mb;
    6383             : 
    6384          32 :         str msg = MAL_SUCCEED;
    6385          32 :         bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
    6386          32 :                 *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
    6387          32 :         bit *anti = NULL;
    6388          32 :         bool icase = false;
    6389             : 
    6390          64 :         STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
    6391             : 
    6392          32 :         if (pci->argc - pci->retc == 8)
    6393          12 :                 msg = ignorecase(ic_id, &icase, "str.containsjoin");
    6394             : 
    6395          44 :         return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
    6396             :                                                            cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
    6397          56 :                                                            *anti, icase, icase ? str_icontains : str_contains,
    6398             :                                                            "str.containsjoin");
    6399             : }
    6400             : 
    6401             : #include "mel.h"
    6402             : mel_func str_init_funcs[] = {
    6403             :  command("str", "str", STRtostr, false, "Noop routine.", args(1,2, arg("",str),arg("s",str))),
    6404             :  command("str", "string", STRTail, false, "Return the tail s[offset..n]\nof a string s[0..n].", args(1,3, arg("",str),arg("s",str),arg("offset",int))),
    6405             :  command("str", "string3", STRSubString, false, "Return substring s[offset..offset+count] of a string s[0..n]", args(1,4, arg("",str),arg("s",str),arg("offset",int),arg("count",int))),
    6406             :  command("str", "length", STRLength, false, "Return the length of a string.", args(1,2, arg("",int),arg("s",str))),
    6407             :  command("str", "nbytes", STRBytes, false, "Return the string length in bytes.", args(1,2, arg("",int),arg("s",str))),
    6408             :  command("str", "unicodeAt", STRWChrAt, false, "get a unicode character\n(as an int) from a string position.", args(1,3, arg("",int),arg("s",str),arg("index",int))),
    6409             :  command("str", "unicode", STRFromWChr, false, "convert a unicode to a character.", args(1,2, arg("",str),arg("wchar",int))),
    6410             :  pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring.", args(1,3, arg("",bit),arg("s",str),arg("prefix",str))),
    6411             :  pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
    6412             :  pattern("str", "endswith", STRendswith, false, "Check if string ends with substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
    6413             :  pattern("str", "endswith", STRendswith, false, "Check if string ends with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
    6414             :  pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle.", args(1,3, arg("",bit),arg("haystack",str),arg("needle",str))),
    6415             :  pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle, icase flag.", args(1,4, arg("",bit),arg("haystack",str),arg("needle",str),arg("icase",bit))),
    6416             :  command("str", "toLower", STRlower, false, "Convert a string to lower case.", args(1,2, arg("",str),arg("s",str))),
    6417             :  command("str", "toUpper", STRupper, false, "Convert a string to upper case.", args(1,2, arg("",str),arg("s",str))),
    6418             :  pattern("str", "search", STRstr_search, false, "Search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
    6419             :  pattern("str", "search", STRstr_search, false, "Search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
    6420             :  pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
    6421             :  pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
    6422             :  command("str", "splitpart", STRsplitpart, false, "Split string on delimiter. Returns\ngiven field (counting from one.)", args(1,4, arg("",str),arg("s",str),arg("needle",str),arg("field",int))),
    6423             :  command("str", "trim", STRStrip, false, "Strip whitespaces around a string.", args(1,2, arg("",str),arg("s",str))),
    6424             :  command("str", "ltrim", STRLtrim, false, "Strip whitespaces from start of a string.", args(1,2, arg("",str),arg("s",str))),
    6425             :  command("str", "rtrim", STRRtrim, false, "Strip whitespaces from end of a string.", args(1,2, arg("",str),arg("s",str))),
    6426             :  command("str", "trim2", STRStrip2, false, "Remove the longest string containing only characters from the second string around the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
    6427             :  command("str", "ltrim2", STRLtrim2, false, "Remove the longest string containing only characters from the second string from the start of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
    6428             :  command("str", "rtrim2", STRRtrim2, false, "Remove the longest string containing only characters from the second string from the end of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
    6429             :  command("str", "lpad", STRLpad, false, "Fill up a string to the given length prepending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
    6430             :  command("str", "rpad", STRRpad, false, "Fill up a string to the given length appending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
    6431             :  command("str", "lpad3", STRLpad3, false, "Fill up the first string to the given length prepending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
    6432             :  command("str", "rpad3", STRRpad3, false, "Fill up the first string to the given length appending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
    6433             :  command("str", "substitute", STRSubstitute, false, "Substitute first occurrence of 'src' by\n'dst'.  Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.", args(1,5, arg("",str),arg("s",str),arg("src",str),arg("dst",str),arg("rep",bit))),
    6434             :  command("str", "like", STRlikewrap, false, "SQL pattern match function", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    6435             :  command("str", "like3", STRlikewrap3, false, "SQL pattern match function", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
    6436             :  command("str", "ascii", STRascii, false, "Return unicode of head of string", args(1,2, arg("",int),arg("s",str))),
    6437             :  command("str", "substring", STRsubstringTail, false, "Extract the tail of a string", args(1,3, arg("",str),arg("s",str),arg("start",int))),
    6438             :  command("str", "substring3", STRsubstring, false, "Extract a substring from str starting at start, for length len", args(1,4, arg("",str),arg("s",str),arg("start",int),arg("len",int))),
    6439             :  command("str", "prefix", STRprefix, false, "Extract the prefix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    6440             :  command("str", "suffix", STRsuffix, false, "Extract the suffix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    6441             :  command("str", "stringleft", STRprefix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    6442             :  command("str", "stringright", STRsuffix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
    6443             :  command("str", "locate", STRlocate, false, "Locate the start position of a string", args(1,3, arg("",int),arg("s1",str),arg("s2",str))),
    6444             :  command("str", "locate3", STRlocate3, false, "Locate the start position of a string", args(1,4, arg("",int),arg("s1",str),arg("s2",str),arg("start",int))),
    6445             :  command("str", "insert", STRinsert, false, "Insert a string into another", args(1,5, arg("",str),arg("s",str),arg("start",int),arg("l",int),arg("s2",str))),
    6446             :  command("str", "replace", STRreplace, false, "Insert a string into another", args(1,4, arg("",str),arg("s",str),arg("pat",str),arg("s2",str))),
    6447             :  command("str", "repeat", STRrepeat, false, "", args(1,3, arg("",str),arg("s2",str),arg("c",int))),
    6448             :  command("str", "space", STRspace, false, "", args(1,2, arg("",str),arg("l",int))),
    6449             :  command("str", "epilogue", STRepilogue, false, "", args(1,1, arg("",void))),
    6450             :  command("str", "asciify", STRasciify, false, "Transform string from UTF8 to ASCII", args(1, 2, arg("out",str), arg("in",str))),
    6451             :  pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("anti",bit))),
    6452             :  pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("caseignore",bit),arg("anti",bit))),
    6453             :  pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("anti",bit))),
    6454             :  pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("caseignore",bit),arg("anti",bit))),
    6455             :  pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("anti",bit))),
    6456             :  pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("caseignore",bit),arg("anti",bit))),
    6457             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    6458             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    6459             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    6460             :  pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    6461             :  pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    6462             :  pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    6463             :  pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    6464             :  pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    6465             :  pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    6466             :  pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    6467             :  pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    6468             :  pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    6469             :  { .imp=NULL }
    6470             : };
    6471             : #include "mal_import.h"
    6472             : #ifdef _MSC_VER
    6473             : #undef read
    6474             : #pragma section(".CRT$XCU",read)
    6475             : #endif
    6476         329 : LIB_STARTUP_FUNC(init_str_mal)
    6477         329 : { mal_module2("str", NULL, str_init_funcs, STRprelude, NULL); }

Generated by: LCOV version 1.14