Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * N.J. Nes, M.L. Kersten
15 : * The String Module
16 : * Strings can be created in many ways. Already in the built-in
17 : * operations each atom can be cast to a string using the str(atom)
18 : * mil command. The string module gives the possibility of
19 : * construction string as a substring of the a given string (s). There
20 : * are two such construction functions. The first is the substring
21 : * from some position (offset) until the end of the string. The second
22 : * start again on the given offset position but only copies count
23 : * number of bytes. The functions fail when the position and count
24 : * fall out of bounds. A negative position indicates that the position
25 : * is computed from the end of the source string.
26 : *
27 : * The strings can be compared using the "=" and "!=" operators.
28 : *
29 : * The operator "+" concatenates a string and an atom. The atom will
30 : * be converted to a string using the atom to string c function. The
31 : * string and the result of the conversion are concatenated to form a
32 : * new string. This string is returned.
33 : *
34 : * The length function returns the length of the string. The length is
35 : * the number of characters in the string. The maximum string length
36 : * handled by the kernel is 32-bits long.
37 : *
38 : * chrAt() returns the character at position index in the string
39 : * s. The function will fail when the index is out of range. The range
40 : * is from 0 to length(s)-1.
41 : *
42 : * The startsWith and endsWith functions test if the string s starts
43 : * with or ends with the given prefix or suffix.
44 : *
45 : * The toLower and toUpper functions cast the string to lower or upper
46 : * case characters.
47 : *
48 : * The search(str,chr) function searches for the first occurrence of a
49 : * character from the begining of the string. The search(chr,str)
50 : * searches for the last occurrence (or first from the end of the
51 : * string). The last search function locates the position of first
52 : * occurrence of the string s2 in string s. All search functions
53 : * return -1 when the search failed. Otherwise the position is
54 : * returned.
55 : *
56 : * All string functions fail when an incorrect string (NULL pointer)
57 : * is given. In the current implementation, a fail is signaled by
58 : * returning nil, since this facilitates the use of the string module
59 : * in bulk operations.
60 : *
61 : * All functions in the module have now been converted to
62 : * Unicode. Internally, we use UTF-8 to store strings as Unicode in
63 : * zero-terminated byte-sequences.
64 : */
65 : #include "monetdb_config.h"
66 : #include "str.h"
67 : #include <string.h>
68 : #ifdef HAVE_ICONV
69 : #include <iconv.h>
70 : #include <locale.h>
71 : #endif
72 : #include "mal_interpreter.h"
73 :
74 : #include "utf8.h"
75 :
76 : /*
77 : * UTF-8 Handling
78 : * UTF-8 is a way to store Unicode strings in zero-terminated byte
79 : * sequences, which you can e.g. strcmp() with old 8-bit Latin-1
80 : * strcmp() functions and which then gives the same results as doing
81 : * the strcmp() on equivalent Latin-1 and ASCII character strings
82 : * stored in simple one-byte sequences. These characteristics make
83 : * UTF-8 an attractive format for upgrading an ASCII-oriented computer
84 : * program towards one that supports Unicode. That is why we use UTF-8
85 : * in MonetDB.
86 : *
87 : * For MonetDB, UTF-8 mostly has no consequences, as strings stored in
88 : * BATs are regarded as data, and it does not matter for the database
89 : * kernel whether the zero-terminated byte sequence it is processing
90 : * has UTF-8 or Latin-1 semantics. This module is the only place where
91 : * explicit string functionality is located. We {\bf do} have to adapt
92 : * the behavior of the length(), search(), substring() and the
93 : * like commands to the fact that one (Unicode) character is now
94 : * stored in a variable number of bytes (possibly > 1).
95 : *
96 : * One of the things that become more complex in Unicode are
97 : * uppercase/lowercase conversions. The below tables are the simple
98 : * one-to-one Unicode case mappings. We do not support the special
99 : * casing mappings (e.g. from one to two letters).
100 : *
101 : * References:
102 : * simple casing: http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
103 : * complex casing: http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt
104 : *
105 : * The Unicode case conversion implementation in MonetDB fills a
106 : * mapping BAT of int,int combinations, in which we perform
107 : * high-performance hash-lookup (all code inlined).
108 : */
109 :
110 : /* These tables were generated from the Unicode 13.0.0 spec. */
111 : static const struct UTF8_lower_upper {
112 : const unsigned int from, to;
113 : } UTF8_toUpper[] = { /* code points with non-null uppercase conversion */
114 : {0x0061, 0x0041,},
115 : {0x0062, 0x0042,},
116 : {0x0063, 0x0043,},
117 : {0x0064, 0x0044,},
118 : {0x0065, 0x0045,},
119 : {0x0066, 0x0046,},
120 : {0x0067, 0x0047,},
121 : {0x0068, 0x0048,},
122 : {0x0069, 0x0049,},
123 : {0x006A, 0x004A,},
124 : {0x006B, 0x004B,},
125 : {0x006C, 0x004C,},
126 : {0x006D, 0x004D,},
127 : {0x006E, 0x004E,},
128 : {0x006F, 0x004F,},
129 : {0x0070, 0x0050,},
130 : {0x0071, 0x0051,},
131 : {0x0072, 0x0052,},
132 : {0x0073, 0x0053,},
133 : {0x0074, 0x0054,},
134 : {0x0075, 0x0055,},
135 : {0x0076, 0x0056,},
136 : {0x0077, 0x0057,},
137 : {0x0078, 0x0058,},
138 : {0x0079, 0x0059,},
139 : {0x007A, 0x005A,},
140 : {0x00B5, 0x039C,},
141 : {0x00E0, 0x00C0,},
142 : {0x00E1, 0x00C1,},
143 : {0x00E2, 0x00C2,},
144 : {0x00E3, 0x00C3,},
145 : {0x00E4, 0x00C4,},
146 : {0x00E5, 0x00C5,},
147 : {0x00E6, 0x00C6,},
148 : {0x00E7, 0x00C7,},
149 : {0x00E8, 0x00C8,},
150 : {0x00E9, 0x00C9,},
151 : {0x00EA, 0x00CA,},
152 : {0x00EB, 0x00CB,},
153 : {0x00EC, 0x00CC,},
154 : {0x00ED, 0x00CD,},
155 : {0x00EE, 0x00CE,},
156 : {0x00EF, 0x00CF,},
157 : {0x00F0, 0x00D0,},
158 : {0x00F1, 0x00D1,},
159 : {0x00F2, 0x00D2,},
160 : {0x00F3, 0x00D3,},
161 : {0x00F4, 0x00D4,},
162 : {0x00F5, 0x00D5,},
163 : {0x00F6, 0x00D6,},
164 : {0x00F8, 0x00D8,},
165 : {0x00F9, 0x00D9,},
166 : {0x00FA, 0x00DA,},
167 : {0x00FB, 0x00DB,},
168 : {0x00FC, 0x00DC,},
169 : {0x00FD, 0x00DD,},
170 : {0x00FE, 0x00DE,},
171 : {0x00FF, 0x0178,},
172 : {0x0101, 0x0100,},
173 : {0x0103, 0x0102,},
174 : {0x0105, 0x0104,},
175 : {0x0107, 0x0106,},
176 : {0x0109, 0x0108,},
177 : {0x010B, 0x010A,},
178 : {0x010D, 0x010C,},
179 : {0x010F, 0x010E,},
180 : {0x0111, 0x0110,},
181 : {0x0113, 0x0112,},
182 : {0x0115, 0x0114,},
183 : {0x0117, 0x0116,},
184 : {0x0119, 0x0118,},
185 : {0x011B, 0x011A,},
186 : {0x011D, 0x011C,},
187 : {0x011F, 0x011E,},
188 : {0x0121, 0x0120,},
189 : {0x0123, 0x0122,},
190 : {0x0125, 0x0124,},
191 : {0x0127, 0x0126,},
192 : {0x0129, 0x0128,},
193 : {0x012B, 0x012A,},
194 : {0x012D, 0x012C,},
195 : {0x012F, 0x012E,},
196 : {0x0131, 0x0049,},
197 : {0x0133, 0x0132,},
198 : {0x0135, 0x0134,},
199 : {0x0137, 0x0136,},
200 : {0x013A, 0x0139,},
201 : {0x013C, 0x013B,},
202 : {0x013E, 0x013D,},
203 : {0x0140, 0x013F,},
204 : {0x0142, 0x0141,},
205 : {0x0144, 0x0143,},
206 : {0x0146, 0x0145,},
207 : {0x0148, 0x0147,},
208 : {0x014B, 0x014A,},
209 : {0x014D, 0x014C,},
210 : {0x014F, 0x014E,},
211 : {0x0151, 0x0150,},
212 : {0x0153, 0x0152,},
213 : {0x0155, 0x0154,},
214 : {0x0157, 0x0156,},
215 : {0x0159, 0x0158,},
216 : {0x015B, 0x015A,},
217 : {0x015D, 0x015C,},
218 : {0x015F, 0x015E,},
219 : {0x0161, 0x0160,},
220 : {0x0163, 0x0162,},
221 : {0x0165, 0x0164,},
222 : {0x0167, 0x0166,},
223 : {0x0169, 0x0168,},
224 : {0x016B, 0x016A,},
225 : {0x016D, 0x016C,},
226 : {0x016F, 0x016E,},
227 : {0x0171, 0x0170,},
228 : {0x0173, 0x0172,},
229 : {0x0175, 0x0174,},
230 : {0x0177, 0x0176,},
231 : {0x017A, 0x0179,},
232 : {0x017C, 0x017B,},
233 : {0x017E, 0x017D,},
234 : {0x017F, 0x0053,},
235 : {0x0180, 0x0243,},
236 : {0x0183, 0x0182,},
237 : {0x0185, 0x0184,},
238 : {0x0188, 0x0187,},
239 : {0x018C, 0x018B,},
240 : {0x0192, 0x0191,},
241 : {0x0195, 0x01F6,},
242 : {0x0199, 0x0198,},
243 : {0x019A, 0x023D,},
244 : {0x019E, 0x0220,},
245 : {0x01A1, 0x01A0,},
246 : {0x01A3, 0x01A2,},
247 : {0x01A5, 0x01A4,},
248 : {0x01A8, 0x01A7,},
249 : {0x01AD, 0x01AC,},
250 : {0x01B0, 0x01AF,},
251 : {0x01B4, 0x01B3,},
252 : {0x01B6, 0x01B5,},
253 : {0x01B9, 0x01B8,},
254 : {0x01BD, 0x01BC,},
255 : {0x01BF, 0x01F7,},
256 : {0x01C5, 0x01C4,},
257 : {0x01C6, 0x01C4,},
258 : {0x01C8, 0x01C7,},
259 : {0x01C9, 0x01C7,},
260 : {0x01CB, 0x01CA,},
261 : {0x01CC, 0x01CA,},
262 : {0x01CE, 0x01CD,},
263 : {0x01D0, 0x01CF,},
264 : {0x01D2, 0x01D1,},
265 : {0x01D4, 0x01D3,},
266 : {0x01D6, 0x01D5,},
267 : {0x01D8, 0x01D7,},
268 : {0x01DA, 0x01D9,},
269 : {0x01DC, 0x01DB,},
270 : {0x01DD, 0x018E,},
271 : {0x01DF, 0x01DE,},
272 : {0x01E1, 0x01E0,},
273 : {0x01E3, 0x01E2,},
274 : {0x01E5, 0x01E4,},
275 : {0x01E7, 0x01E6,},
276 : {0x01E9, 0x01E8,},
277 : {0x01EB, 0x01EA,},
278 : {0x01ED, 0x01EC,},
279 : {0x01EF, 0x01EE,},
280 : {0x01F2, 0x01F1,},
281 : {0x01F3, 0x01F1,},
282 : {0x01F5, 0x01F4,},
283 : {0x01F9, 0x01F8,},
284 : {0x01FB, 0x01FA,},
285 : {0x01FD, 0x01FC,},
286 : {0x01FF, 0x01FE,},
287 : {0x0201, 0x0200,},
288 : {0x0203, 0x0202,},
289 : {0x0205, 0x0204,},
290 : {0x0207, 0x0206,},
291 : {0x0209, 0x0208,},
292 : {0x020B, 0x020A,},
293 : {0x020D, 0x020C,},
294 : {0x020F, 0x020E,},
295 : {0x0211, 0x0210,},
296 : {0x0213, 0x0212,},
297 : {0x0215, 0x0214,},
298 : {0x0217, 0x0216,},
299 : {0x0219, 0x0218,},
300 : {0x021B, 0x021A,},
301 : {0x021D, 0x021C,},
302 : {0x021F, 0x021E,},
303 : {0x0223, 0x0222,},
304 : {0x0225, 0x0224,},
305 : {0x0227, 0x0226,},
306 : {0x0229, 0x0228,},
307 : {0x022B, 0x022A,},
308 : {0x022D, 0x022C,},
309 : {0x022F, 0x022E,},
310 : {0x0231, 0x0230,},
311 : {0x0233, 0x0232,},
312 : {0x023C, 0x023B,},
313 : {0x023F, 0x2C7E,},
314 : {0x0240, 0x2C7F,},
315 : {0x0242, 0x0241,},
316 : {0x0247, 0x0246,},
317 : {0x0249, 0x0248,},
318 : {0x024B, 0x024A,},
319 : {0x024D, 0x024C,},
320 : {0x024F, 0x024E,},
321 : {0x0250, 0x2C6F,},
322 : {0x0251, 0x2C6D,},
323 : {0x0252, 0x2C70,},
324 : {0x0253, 0x0181,},
325 : {0x0254, 0x0186,},
326 : {0x0256, 0x0189,},
327 : {0x0257, 0x018A,},
328 : {0x0259, 0x018F,},
329 : {0x025B, 0x0190,},
330 : {0x025C, 0xA7AB,},
331 : {0x0260, 0x0193,},
332 : {0x0261, 0xA7AC,},
333 : {0x0263, 0x0194,},
334 : {0x0265, 0xA78D,},
335 : {0x0266, 0xA7AA,},
336 : {0x0268, 0x0197,},
337 : {0x0269, 0x0196,},
338 : {0x026A, 0xA7AE,},
339 : {0x026B, 0x2C62,},
340 : {0x026C, 0xA7AD,},
341 : {0x026F, 0x019C,},
342 : {0x0271, 0x2C6E,},
343 : {0x0272, 0x019D,},
344 : {0x0275, 0x019F,},
345 : {0x027D, 0x2C64,},
346 : {0x0280, 0x01A6,},
347 : {0x0282, 0xA7C5,},
348 : {0x0283, 0x01A9,},
349 : {0x0287, 0xA7B1,},
350 : {0x0288, 0x01AE,},
351 : {0x0289, 0x0244,},
352 : {0x028A, 0x01B1,},
353 : {0x028B, 0x01B2,},
354 : {0x028C, 0x0245,},
355 : {0x0292, 0x01B7,},
356 : {0x029D, 0xA7B2,},
357 : {0x029E, 0xA7B0,},
358 : {0x0345, 0x0399,},
359 : {0x0371, 0x0370,},
360 : {0x0373, 0x0372,},
361 : {0x0377, 0x0376,},
362 : {0x037B, 0x03FD,},
363 : {0x037C, 0x03FE,},
364 : {0x037D, 0x03FF,},
365 : {0x03AC, 0x0386,},
366 : {0x03AD, 0x0388,},
367 : {0x03AE, 0x0389,},
368 : {0x03AF, 0x038A,},
369 : {0x03B1, 0x0391,},
370 : {0x03B2, 0x0392,},
371 : {0x03B3, 0x0393,},
372 : {0x03B4, 0x0394,},
373 : {0x03B5, 0x0395,},
374 : {0x03B6, 0x0396,},
375 : {0x03B7, 0x0397,},
376 : {0x03B8, 0x0398,},
377 : {0x03B9, 0x0399,},
378 : {0x03BA, 0x039A,},
379 : {0x03BB, 0x039B,},
380 : {0x03BC, 0x039C,},
381 : {0x03BD, 0x039D,},
382 : {0x03BE, 0x039E,},
383 : {0x03BF, 0x039F,},
384 : {0x03C0, 0x03A0,},
385 : {0x03C1, 0x03A1,},
386 : {0x03C2, 0x03A3,},
387 : {0x03C3, 0x03A3,},
388 : {0x03C4, 0x03A4,},
389 : {0x03C5, 0x03A5,},
390 : {0x03C6, 0x03A6,},
391 : {0x03C7, 0x03A7,},
392 : {0x03C8, 0x03A8,},
393 : {0x03C9, 0x03A9,},
394 : {0x03CA, 0x03AA,},
395 : {0x03CB, 0x03AB,},
396 : {0x03CC, 0x038C,},
397 : {0x03CD, 0x038E,},
398 : {0x03CE, 0x038F,},
399 : {0x03D0, 0x0392,},
400 : {0x03D1, 0x0398,},
401 : {0x03D5, 0x03A6,},
402 : {0x03D6, 0x03A0,},
403 : {0x03D7, 0x03CF,},
404 : {0x03D9, 0x03D8,},
405 : {0x03DB, 0x03DA,},
406 : {0x03DD, 0x03DC,},
407 : {0x03DF, 0x03DE,},
408 : {0x03E1, 0x03E0,},
409 : {0x03E3, 0x03E2,},
410 : {0x03E5, 0x03E4,},
411 : {0x03E7, 0x03E6,},
412 : {0x03E9, 0x03E8,},
413 : {0x03EB, 0x03EA,},
414 : {0x03ED, 0x03EC,},
415 : {0x03EF, 0x03EE,},
416 : {0x03F0, 0x039A,},
417 : {0x03F1, 0x03A1,},
418 : {0x03F2, 0x03F9,},
419 : {0x03F3, 0x037F,},
420 : {0x03F5, 0x0395,},
421 : {0x03F8, 0x03F7,},
422 : {0x03FB, 0x03FA,},
423 : {0x0430, 0x0410,},
424 : {0x0431, 0x0411,},
425 : {0x0432, 0x0412,},
426 : {0x0433, 0x0413,},
427 : {0x0434, 0x0414,},
428 : {0x0435, 0x0415,},
429 : {0x0436, 0x0416,},
430 : {0x0437, 0x0417,},
431 : {0x0438, 0x0418,},
432 : {0x0439, 0x0419,},
433 : {0x043A, 0x041A,},
434 : {0x043B, 0x041B,},
435 : {0x043C, 0x041C,},
436 : {0x043D, 0x041D,},
437 : {0x043E, 0x041E,},
438 : {0x043F, 0x041F,},
439 : {0x0440, 0x0420,},
440 : {0x0441, 0x0421,},
441 : {0x0442, 0x0422,},
442 : {0x0443, 0x0423,},
443 : {0x0444, 0x0424,},
444 : {0x0445, 0x0425,},
445 : {0x0446, 0x0426,},
446 : {0x0447, 0x0427,},
447 : {0x0448, 0x0428,},
448 : {0x0449, 0x0429,},
449 : {0x044A, 0x042A,},
450 : {0x044B, 0x042B,},
451 : {0x044C, 0x042C,},
452 : {0x044D, 0x042D,},
453 : {0x044E, 0x042E,},
454 : {0x044F, 0x042F,},
455 : {0x0450, 0x0400,},
456 : {0x0451, 0x0401,},
457 : {0x0452, 0x0402,},
458 : {0x0453, 0x0403,},
459 : {0x0454, 0x0404,},
460 : {0x0455, 0x0405,},
461 : {0x0456, 0x0406,},
462 : {0x0457, 0x0407,},
463 : {0x0458, 0x0408,},
464 : {0x0459, 0x0409,},
465 : {0x045A, 0x040A,},
466 : {0x045B, 0x040B,},
467 : {0x045C, 0x040C,},
468 : {0x045D, 0x040D,},
469 : {0x045E, 0x040E,},
470 : {0x045F, 0x040F,},
471 : {0x0461, 0x0460,},
472 : {0x0463, 0x0462,},
473 : {0x0465, 0x0464,},
474 : {0x0467, 0x0466,},
475 : {0x0469, 0x0468,},
476 : {0x046B, 0x046A,},
477 : {0x046D, 0x046C,},
478 : {0x046F, 0x046E,},
479 : {0x0471, 0x0470,},
480 : {0x0473, 0x0472,},
481 : {0x0475, 0x0474,},
482 : {0x0477, 0x0476,},
483 : {0x0479, 0x0478,},
484 : {0x047B, 0x047A,},
485 : {0x047D, 0x047C,},
486 : {0x047F, 0x047E,},
487 : {0x0481, 0x0480,},
488 : {0x048B, 0x048A,},
489 : {0x048D, 0x048C,},
490 : {0x048F, 0x048E,},
491 : {0x0491, 0x0490,},
492 : {0x0493, 0x0492,},
493 : {0x0495, 0x0494,},
494 : {0x0497, 0x0496,},
495 : {0x0499, 0x0498,},
496 : {0x049B, 0x049A,},
497 : {0x049D, 0x049C,},
498 : {0x049F, 0x049E,},
499 : {0x04A1, 0x04A0,},
500 : {0x04A3, 0x04A2,},
501 : {0x04A5, 0x04A4,},
502 : {0x04A7, 0x04A6,},
503 : {0x04A9, 0x04A8,},
504 : {0x04AB, 0x04AA,},
505 : {0x04AD, 0x04AC,},
506 : {0x04AF, 0x04AE,},
507 : {0x04B1, 0x04B0,},
508 : {0x04B3, 0x04B2,},
509 : {0x04B5, 0x04B4,},
510 : {0x04B7, 0x04B6,},
511 : {0x04B9, 0x04B8,},
512 : {0x04BB, 0x04BA,},
513 : {0x04BD, 0x04BC,},
514 : {0x04BF, 0x04BE,},
515 : {0x04C2, 0x04C1,},
516 : {0x04C4, 0x04C3,},
517 : {0x04C6, 0x04C5,},
518 : {0x04C8, 0x04C7,},
519 : {0x04CA, 0x04C9,},
520 : {0x04CC, 0x04CB,},
521 : {0x04CE, 0x04CD,},
522 : {0x04CF, 0x04C0,},
523 : {0x04D1, 0x04D0,},
524 : {0x04D3, 0x04D2,},
525 : {0x04D5, 0x04D4,},
526 : {0x04D7, 0x04D6,},
527 : {0x04D9, 0x04D8,},
528 : {0x04DB, 0x04DA,},
529 : {0x04DD, 0x04DC,},
530 : {0x04DF, 0x04DE,},
531 : {0x04E1, 0x04E0,},
532 : {0x04E3, 0x04E2,},
533 : {0x04E5, 0x04E4,},
534 : {0x04E7, 0x04E6,},
535 : {0x04E9, 0x04E8,},
536 : {0x04EB, 0x04EA,},
537 : {0x04ED, 0x04EC,},
538 : {0x04EF, 0x04EE,},
539 : {0x04F1, 0x04F0,},
540 : {0x04F3, 0x04F2,},
541 : {0x04F5, 0x04F4,},
542 : {0x04F7, 0x04F6,},
543 : {0x04F9, 0x04F8,},
544 : {0x04FB, 0x04FA,},
545 : {0x04FD, 0x04FC,},
546 : {0x04FF, 0x04FE,},
547 : {0x0501, 0x0500,},
548 : {0x0503, 0x0502,},
549 : {0x0505, 0x0504,},
550 : {0x0507, 0x0506,},
551 : {0x0509, 0x0508,},
552 : {0x050B, 0x050A,},
553 : {0x050D, 0x050C,},
554 : {0x050F, 0x050E,},
555 : {0x0511, 0x0510,},
556 : {0x0513, 0x0512,},
557 : {0x0515, 0x0514,},
558 : {0x0517, 0x0516,},
559 : {0x0519, 0x0518,},
560 : {0x051B, 0x051A,},
561 : {0x051D, 0x051C,},
562 : {0x051F, 0x051E,},
563 : {0x0521, 0x0520,},
564 : {0x0523, 0x0522,},
565 : {0x0525, 0x0524,},
566 : {0x0527, 0x0526,},
567 : {0x0529, 0x0528,},
568 : {0x052B, 0x052A,},
569 : {0x052D, 0x052C,},
570 : {0x052F, 0x052E,},
571 : {0x0561, 0x0531,},
572 : {0x0562, 0x0532,},
573 : {0x0563, 0x0533,},
574 : {0x0564, 0x0534,},
575 : {0x0565, 0x0535,},
576 : {0x0566, 0x0536,},
577 : {0x0567, 0x0537,},
578 : {0x0568, 0x0538,},
579 : {0x0569, 0x0539,},
580 : {0x056A, 0x053A,},
581 : {0x056B, 0x053B,},
582 : {0x056C, 0x053C,},
583 : {0x056D, 0x053D,},
584 : {0x056E, 0x053E,},
585 : {0x056F, 0x053F,},
586 : {0x0570, 0x0540,},
587 : {0x0571, 0x0541,},
588 : {0x0572, 0x0542,},
589 : {0x0573, 0x0543,},
590 : {0x0574, 0x0544,},
591 : {0x0575, 0x0545,},
592 : {0x0576, 0x0546,},
593 : {0x0577, 0x0547,},
594 : {0x0578, 0x0548,},
595 : {0x0579, 0x0549,},
596 : {0x057A, 0x054A,},
597 : {0x057B, 0x054B,},
598 : {0x057C, 0x054C,},
599 : {0x057D, 0x054D,},
600 : {0x057E, 0x054E,},
601 : {0x057F, 0x054F,},
602 : {0x0580, 0x0550,},
603 : {0x0581, 0x0551,},
604 : {0x0582, 0x0552,},
605 : {0x0583, 0x0553,},
606 : {0x0584, 0x0554,},
607 : {0x0585, 0x0555,},
608 : {0x0586, 0x0556,},
609 : {0x10D0, 0x1C90,},
610 : {0x10D1, 0x1C91,},
611 : {0x10D2, 0x1C92,},
612 : {0x10D3, 0x1C93,},
613 : {0x10D4, 0x1C94,},
614 : {0x10D5, 0x1C95,},
615 : {0x10D6, 0x1C96,},
616 : {0x10D7, 0x1C97,},
617 : {0x10D8, 0x1C98,},
618 : {0x10D9, 0x1C99,},
619 : {0x10DA, 0x1C9A,},
620 : {0x10DB, 0x1C9B,},
621 : {0x10DC, 0x1C9C,},
622 : {0x10DD, 0x1C9D,},
623 : {0x10DE, 0x1C9E,},
624 : {0x10DF, 0x1C9F,},
625 : {0x10E0, 0x1CA0,},
626 : {0x10E1, 0x1CA1,},
627 : {0x10E2, 0x1CA2,},
628 : {0x10E3, 0x1CA3,},
629 : {0x10E4, 0x1CA4,},
630 : {0x10E5, 0x1CA5,},
631 : {0x10E6, 0x1CA6,},
632 : {0x10E7, 0x1CA7,},
633 : {0x10E8, 0x1CA8,},
634 : {0x10E9, 0x1CA9,},
635 : {0x10EA, 0x1CAA,},
636 : {0x10EB, 0x1CAB,},
637 : {0x10EC, 0x1CAC,},
638 : {0x10ED, 0x1CAD,},
639 : {0x10EE, 0x1CAE,},
640 : {0x10EF, 0x1CAF,},
641 : {0x10F0, 0x1CB0,},
642 : {0x10F1, 0x1CB1,},
643 : {0x10F2, 0x1CB2,},
644 : {0x10F3, 0x1CB3,},
645 : {0x10F4, 0x1CB4,},
646 : {0x10F5, 0x1CB5,},
647 : {0x10F6, 0x1CB6,},
648 : {0x10F7, 0x1CB7,},
649 : {0x10F8, 0x1CB8,},
650 : {0x10F9, 0x1CB9,},
651 : {0x10FA, 0x1CBA,},
652 : {0x10FD, 0x1CBD,},
653 : {0x10FE, 0x1CBE,},
654 : {0x10FF, 0x1CBF,},
655 : {0x13F8, 0x13F0,},
656 : {0x13F9, 0x13F1,},
657 : {0x13FA, 0x13F2,},
658 : {0x13FB, 0x13F3,},
659 : {0x13FC, 0x13F4,},
660 : {0x13FD, 0x13F5,},
661 : {0x1C80, 0x0412,},
662 : {0x1C81, 0x0414,},
663 : {0x1C82, 0x041E,},
664 : {0x1C83, 0x0421,},
665 : {0x1C84, 0x0422,},
666 : {0x1C85, 0x0422,},
667 : {0x1C86, 0x042A,},
668 : {0x1C87, 0x0462,},
669 : {0x1C88, 0xA64A,},
670 : {0x1D79, 0xA77D,},
671 : {0x1D7D, 0x2C63,},
672 : {0x1D8E, 0xA7C6,},
673 : {0x1E01, 0x1E00,},
674 : {0x1E03, 0x1E02,},
675 : {0x1E05, 0x1E04,},
676 : {0x1E07, 0x1E06,},
677 : {0x1E09, 0x1E08,},
678 : {0x1E0B, 0x1E0A,},
679 : {0x1E0D, 0x1E0C,},
680 : {0x1E0F, 0x1E0E,},
681 : {0x1E11, 0x1E10,},
682 : {0x1E13, 0x1E12,},
683 : {0x1E15, 0x1E14,},
684 : {0x1E17, 0x1E16,},
685 : {0x1E19, 0x1E18,},
686 : {0x1E1B, 0x1E1A,},
687 : {0x1E1D, 0x1E1C,},
688 : {0x1E1F, 0x1E1E,},
689 : {0x1E21, 0x1E20,},
690 : {0x1E23, 0x1E22,},
691 : {0x1E25, 0x1E24,},
692 : {0x1E27, 0x1E26,},
693 : {0x1E29, 0x1E28,},
694 : {0x1E2B, 0x1E2A,},
695 : {0x1E2D, 0x1E2C,},
696 : {0x1E2F, 0x1E2E,},
697 : {0x1E31, 0x1E30,},
698 : {0x1E33, 0x1E32,},
699 : {0x1E35, 0x1E34,},
700 : {0x1E37, 0x1E36,},
701 : {0x1E39, 0x1E38,},
702 : {0x1E3B, 0x1E3A,},
703 : {0x1E3D, 0x1E3C,},
704 : {0x1E3F, 0x1E3E,},
705 : {0x1E41, 0x1E40,},
706 : {0x1E43, 0x1E42,},
707 : {0x1E45, 0x1E44,},
708 : {0x1E47, 0x1E46,},
709 : {0x1E49, 0x1E48,},
710 : {0x1E4B, 0x1E4A,},
711 : {0x1E4D, 0x1E4C,},
712 : {0x1E4F, 0x1E4E,},
713 : {0x1E51, 0x1E50,},
714 : {0x1E53, 0x1E52,},
715 : {0x1E55, 0x1E54,},
716 : {0x1E57, 0x1E56,},
717 : {0x1E59, 0x1E58,},
718 : {0x1E5B, 0x1E5A,},
719 : {0x1E5D, 0x1E5C,},
720 : {0x1E5F, 0x1E5E,},
721 : {0x1E61, 0x1E60,},
722 : {0x1E63, 0x1E62,},
723 : {0x1E65, 0x1E64,},
724 : {0x1E67, 0x1E66,},
725 : {0x1E69, 0x1E68,},
726 : {0x1E6B, 0x1E6A,},
727 : {0x1E6D, 0x1E6C,},
728 : {0x1E6F, 0x1E6E,},
729 : {0x1E71, 0x1E70,},
730 : {0x1E73, 0x1E72,},
731 : {0x1E75, 0x1E74,},
732 : {0x1E77, 0x1E76,},
733 : {0x1E79, 0x1E78,},
734 : {0x1E7B, 0x1E7A,},
735 : {0x1E7D, 0x1E7C,},
736 : {0x1E7F, 0x1E7E,},
737 : {0x1E81, 0x1E80,},
738 : {0x1E83, 0x1E82,},
739 : {0x1E85, 0x1E84,},
740 : {0x1E87, 0x1E86,},
741 : {0x1E89, 0x1E88,},
742 : {0x1E8B, 0x1E8A,},
743 : {0x1E8D, 0x1E8C,},
744 : {0x1E8F, 0x1E8E,},
745 : {0x1E91, 0x1E90,},
746 : {0x1E93, 0x1E92,},
747 : {0x1E95, 0x1E94,},
748 : {0x1E9B, 0x1E60,},
749 : {0x1EA1, 0x1EA0,},
750 : {0x1EA3, 0x1EA2,},
751 : {0x1EA5, 0x1EA4,},
752 : {0x1EA7, 0x1EA6,},
753 : {0x1EA9, 0x1EA8,},
754 : {0x1EAB, 0x1EAA,},
755 : {0x1EAD, 0x1EAC,},
756 : {0x1EAF, 0x1EAE,},
757 : {0x1EB1, 0x1EB0,},
758 : {0x1EB3, 0x1EB2,},
759 : {0x1EB5, 0x1EB4,},
760 : {0x1EB7, 0x1EB6,},
761 : {0x1EB9, 0x1EB8,},
762 : {0x1EBB, 0x1EBA,},
763 : {0x1EBD, 0x1EBC,},
764 : {0x1EBF, 0x1EBE,},
765 : {0x1EC1, 0x1EC0,},
766 : {0x1EC3, 0x1EC2,},
767 : {0x1EC5, 0x1EC4,},
768 : {0x1EC7, 0x1EC6,},
769 : {0x1EC9, 0x1EC8,},
770 : {0x1ECB, 0x1ECA,},
771 : {0x1ECD, 0x1ECC,},
772 : {0x1ECF, 0x1ECE,},
773 : {0x1ED1, 0x1ED0,},
774 : {0x1ED3, 0x1ED2,},
775 : {0x1ED5, 0x1ED4,},
776 : {0x1ED7, 0x1ED6,},
777 : {0x1ED9, 0x1ED8,},
778 : {0x1EDB, 0x1EDA,},
779 : {0x1EDD, 0x1EDC,},
780 : {0x1EDF, 0x1EDE,},
781 : {0x1EE1, 0x1EE0,},
782 : {0x1EE3, 0x1EE2,},
783 : {0x1EE5, 0x1EE4,},
784 : {0x1EE7, 0x1EE6,},
785 : {0x1EE9, 0x1EE8,},
786 : {0x1EEB, 0x1EEA,},
787 : {0x1EED, 0x1EEC,},
788 : {0x1EEF, 0x1EEE,},
789 : {0x1EF1, 0x1EF0,},
790 : {0x1EF3, 0x1EF2,},
791 : {0x1EF5, 0x1EF4,},
792 : {0x1EF7, 0x1EF6,},
793 : {0x1EF9, 0x1EF8,},
794 : {0x1EFB, 0x1EFA,},
795 : {0x1EFD, 0x1EFC,},
796 : {0x1EFF, 0x1EFE,},
797 : {0x1F00, 0x1F08,},
798 : {0x1F01, 0x1F09,},
799 : {0x1F02, 0x1F0A,},
800 : {0x1F03, 0x1F0B,},
801 : {0x1F04, 0x1F0C,},
802 : {0x1F05, 0x1F0D,},
803 : {0x1F06, 0x1F0E,},
804 : {0x1F07, 0x1F0F,},
805 : {0x1F10, 0x1F18,},
806 : {0x1F11, 0x1F19,},
807 : {0x1F12, 0x1F1A,},
808 : {0x1F13, 0x1F1B,},
809 : {0x1F14, 0x1F1C,},
810 : {0x1F15, 0x1F1D,},
811 : {0x1F20, 0x1F28,},
812 : {0x1F21, 0x1F29,},
813 : {0x1F22, 0x1F2A,},
814 : {0x1F23, 0x1F2B,},
815 : {0x1F24, 0x1F2C,},
816 : {0x1F25, 0x1F2D,},
817 : {0x1F26, 0x1F2E,},
818 : {0x1F27, 0x1F2F,},
819 : {0x1F30, 0x1F38,},
820 : {0x1F31, 0x1F39,},
821 : {0x1F32, 0x1F3A,},
822 : {0x1F33, 0x1F3B,},
823 : {0x1F34, 0x1F3C,},
824 : {0x1F35, 0x1F3D,},
825 : {0x1F36, 0x1F3E,},
826 : {0x1F37, 0x1F3F,},
827 : {0x1F40, 0x1F48,},
828 : {0x1F41, 0x1F49,},
829 : {0x1F42, 0x1F4A,},
830 : {0x1F43, 0x1F4B,},
831 : {0x1F44, 0x1F4C,},
832 : {0x1F45, 0x1F4D,},
833 : {0x1F51, 0x1F59,},
834 : {0x1F53, 0x1F5B,},
835 : {0x1F55, 0x1F5D,},
836 : {0x1F57, 0x1F5F,},
837 : {0x1F60, 0x1F68,},
838 : {0x1F61, 0x1F69,},
839 : {0x1F62, 0x1F6A,},
840 : {0x1F63, 0x1F6B,},
841 : {0x1F64, 0x1F6C,},
842 : {0x1F65, 0x1F6D,},
843 : {0x1F66, 0x1F6E,},
844 : {0x1F67, 0x1F6F,},
845 : {0x1F70, 0x1FBA,},
846 : {0x1F71, 0x1FBB,},
847 : {0x1F72, 0x1FC8,},
848 : {0x1F73, 0x1FC9,},
849 : {0x1F74, 0x1FCA,},
850 : {0x1F75, 0x1FCB,},
851 : {0x1F76, 0x1FDA,},
852 : {0x1F77, 0x1FDB,},
853 : {0x1F78, 0x1FF8,},
854 : {0x1F79, 0x1FF9,},
855 : {0x1F7A, 0x1FEA,},
856 : {0x1F7B, 0x1FEB,},
857 : {0x1F7C, 0x1FFA,},
858 : {0x1F7D, 0x1FFB,},
859 : {0x1F80, 0x1F88,},
860 : {0x1F81, 0x1F89,},
861 : {0x1F82, 0x1F8A,},
862 : {0x1F83, 0x1F8B,},
863 : {0x1F84, 0x1F8C,},
864 : {0x1F85, 0x1F8D,},
865 : {0x1F86, 0x1F8E,},
866 : {0x1F87, 0x1F8F,},
867 : {0x1F90, 0x1F98,},
868 : {0x1F91, 0x1F99,},
869 : {0x1F92, 0x1F9A,},
870 : {0x1F93, 0x1F9B,},
871 : {0x1F94, 0x1F9C,},
872 : {0x1F95, 0x1F9D,},
873 : {0x1F96, 0x1F9E,},
874 : {0x1F97, 0x1F9F,},
875 : {0x1FA0, 0x1FA8,},
876 : {0x1FA1, 0x1FA9,},
877 : {0x1FA2, 0x1FAA,},
878 : {0x1FA3, 0x1FAB,},
879 : {0x1FA4, 0x1FAC,},
880 : {0x1FA5, 0x1FAD,},
881 : {0x1FA6, 0x1FAE,},
882 : {0x1FA7, 0x1FAF,},
883 : {0x1FB0, 0x1FB8,},
884 : {0x1FB1, 0x1FB9,},
885 : {0x1FB3, 0x1FBC,},
886 : {0x1FBE, 0x0399,},
887 : {0x1FC3, 0x1FCC,},
888 : {0x1FD0, 0x1FD8,},
889 : {0x1FD1, 0x1FD9,},
890 : {0x1FE0, 0x1FE8,},
891 : {0x1FE1, 0x1FE9,},
892 : {0x1FE5, 0x1FEC,},
893 : {0x1FF3, 0x1FFC,},
894 : {0x214E, 0x2132,},
895 : {0x2170, 0x2160,},
896 : {0x2171, 0x2161,},
897 : {0x2172, 0x2162,},
898 : {0x2173, 0x2163,},
899 : {0x2174, 0x2164,},
900 : {0x2175, 0x2165,},
901 : {0x2176, 0x2166,},
902 : {0x2177, 0x2167,},
903 : {0x2178, 0x2168,},
904 : {0x2179, 0x2169,},
905 : {0x217A, 0x216A,},
906 : {0x217B, 0x216B,},
907 : {0x217C, 0x216C,},
908 : {0x217D, 0x216D,},
909 : {0x217E, 0x216E,},
910 : {0x217F, 0x216F,},
911 : {0x2184, 0x2183,},
912 : {0x24D0, 0x24B6,},
913 : {0x24D1, 0x24B7,},
914 : {0x24D2, 0x24B8,},
915 : {0x24D3, 0x24B9,},
916 : {0x24D4, 0x24BA,},
917 : {0x24D5, 0x24BB,},
918 : {0x24D6, 0x24BC,},
919 : {0x24D7, 0x24BD,},
920 : {0x24D8, 0x24BE,},
921 : {0x24D9, 0x24BF,},
922 : {0x24DA, 0x24C0,},
923 : {0x24DB, 0x24C1,},
924 : {0x24DC, 0x24C2,},
925 : {0x24DD, 0x24C3,},
926 : {0x24DE, 0x24C4,},
927 : {0x24DF, 0x24C5,},
928 : {0x24E0, 0x24C6,},
929 : {0x24E1, 0x24C7,},
930 : {0x24E2, 0x24C8,},
931 : {0x24E3, 0x24C9,},
932 : {0x24E4, 0x24CA,},
933 : {0x24E5, 0x24CB,},
934 : {0x24E6, 0x24CC,},
935 : {0x24E7, 0x24CD,},
936 : {0x24E8, 0x24CE,},
937 : {0x24E9, 0x24CF,},
938 : {0x2C30, 0x2C00,},
939 : {0x2C31, 0x2C01,},
940 : {0x2C32, 0x2C02,},
941 : {0x2C33, 0x2C03,},
942 : {0x2C34, 0x2C04,},
943 : {0x2C35, 0x2C05,},
944 : {0x2C36, 0x2C06,},
945 : {0x2C37, 0x2C07,},
946 : {0x2C38, 0x2C08,},
947 : {0x2C39, 0x2C09,},
948 : {0x2C3A, 0x2C0A,},
949 : {0x2C3B, 0x2C0B,},
950 : {0x2C3C, 0x2C0C,},
951 : {0x2C3D, 0x2C0D,},
952 : {0x2C3E, 0x2C0E,},
953 : {0x2C3F, 0x2C0F,},
954 : {0x2C40, 0x2C10,},
955 : {0x2C41, 0x2C11,},
956 : {0x2C42, 0x2C12,},
957 : {0x2C43, 0x2C13,},
958 : {0x2C44, 0x2C14,},
959 : {0x2C45, 0x2C15,},
960 : {0x2C46, 0x2C16,},
961 : {0x2C47, 0x2C17,},
962 : {0x2C48, 0x2C18,},
963 : {0x2C49, 0x2C19,},
964 : {0x2C4A, 0x2C1A,},
965 : {0x2C4B, 0x2C1B,},
966 : {0x2C4C, 0x2C1C,},
967 : {0x2C4D, 0x2C1D,},
968 : {0x2C4E, 0x2C1E,},
969 : {0x2C4F, 0x2C1F,},
970 : {0x2C50, 0x2C20,},
971 : {0x2C51, 0x2C21,},
972 : {0x2C52, 0x2C22,},
973 : {0x2C53, 0x2C23,},
974 : {0x2C54, 0x2C24,},
975 : {0x2C55, 0x2C25,},
976 : {0x2C56, 0x2C26,},
977 : {0x2C57, 0x2C27,},
978 : {0x2C58, 0x2C28,},
979 : {0x2C59, 0x2C29,},
980 : {0x2C5A, 0x2C2A,},
981 : {0x2C5B, 0x2C2B,},
982 : {0x2C5C, 0x2C2C,},
983 : {0x2C5D, 0x2C2D,},
984 : {0x2C5E, 0x2C2E,},
985 : {0x2C5F, 0x2C2F,},
986 : {0x2C61, 0x2C60,},
987 : {0x2C65, 0x023A,},
988 : {0x2C66, 0x023E,},
989 : {0x2C68, 0x2C67,},
990 : {0x2C6A, 0x2C69,},
991 : {0x2C6C, 0x2C6B,},
992 : {0x2C73, 0x2C72,},
993 : {0x2C76, 0x2C75,},
994 : {0x2C81, 0x2C80,},
995 : {0x2C83, 0x2C82,},
996 : {0x2C85, 0x2C84,},
997 : {0x2C87, 0x2C86,},
998 : {0x2C89, 0x2C88,},
999 : {0x2C8B, 0x2C8A,},
1000 : {0x2C8D, 0x2C8C,},
1001 : {0x2C8F, 0x2C8E,},
1002 : {0x2C91, 0x2C90,},
1003 : {0x2C93, 0x2C92,},
1004 : {0x2C95, 0x2C94,},
1005 : {0x2C97, 0x2C96,},
1006 : {0x2C99, 0x2C98,},
1007 : {0x2C9B, 0x2C9A,},
1008 : {0x2C9D, 0x2C9C,},
1009 : {0x2C9F, 0x2C9E,},
1010 : {0x2CA1, 0x2CA0,},
1011 : {0x2CA3, 0x2CA2,},
1012 : {0x2CA5, 0x2CA4,},
1013 : {0x2CA7, 0x2CA6,},
1014 : {0x2CA9, 0x2CA8,},
1015 : {0x2CAB, 0x2CAA,},
1016 : {0x2CAD, 0x2CAC,},
1017 : {0x2CAF, 0x2CAE,},
1018 : {0x2CB1, 0x2CB0,},
1019 : {0x2CB3, 0x2CB2,},
1020 : {0x2CB5, 0x2CB4,},
1021 : {0x2CB7, 0x2CB6,},
1022 : {0x2CB9, 0x2CB8,},
1023 : {0x2CBB, 0x2CBA,},
1024 : {0x2CBD, 0x2CBC,},
1025 : {0x2CBF, 0x2CBE,},
1026 : {0x2CC1, 0x2CC0,},
1027 : {0x2CC3, 0x2CC2,},
1028 : {0x2CC5, 0x2CC4,},
1029 : {0x2CC7, 0x2CC6,},
1030 : {0x2CC9, 0x2CC8,},
1031 : {0x2CCB, 0x2CCA,},
1032 : {0x2CCD, 0x2CCC,},
1033 : {0x2CCF, 0x2CCE,},
1034 : {0x2CD1, 0x2CD0,},
1035 : {0x2CD3, 0x2CD2,},
1036 : {0x2CD5, 0x2CD4,},
1037 : {0x2CD7, 0x2CD6,},
1038 : {0x2CD9, 0x2CD8,},
1039 : {0x2CDB, 0x2CDA,},
1040 : {0x2CDD, 0x2CDC,},
1041 : {0x2CDF, 0x2CDE,},
1042 : {0x2CE1, 0x2CE0,},
1043 : {0x2CE3, 0x2CE2,},
1044 : {0x2CEC, 0x2CEB,},
1045 : {0x2CEE, 0x2CED,},
1046 : {0x2CF3, 0x2CF2,},
1047 : {0x2D00, 0x10A0,},
1048 : {0x2D01, 0x10A1,},
1049 : {0x2D02, 0x10A2,},
1050 : {0x2D03, 0x10A3,},
1051 : {0x2D04, 0x10A4,},
1052 : {0x2D05, 0x10A5,},
1053 : {0x2D06, 0x10A6,},
1054 : {0x2D07, 0x10A7,},
1055 : {0x2D08, 0x10A8,},
1056 : {0x2D09, 0x10A9,},
1057 : {0x2D0A, 0x10AA,},
1058 : {0x2D0B, 0x10AB,},
1059 : {0x2D0C, 0x10AC,},
1060 : {0x2D0D, 0x10AD,},
1061 : {0x2D0E, 0x10AE,},
1062 : {0x2D0F, 0x10AF,},
1063 : {0x2D10, 0x10B0,},
1064 : {0x2D11, 0x10B1,},
1065 : {0x2D12, 0x10B2,},
1066 : {0x2D13, 0x10B3,},
1067 : {0x2D14, 0x10B4,},
1068 : {0x2D15, 0x10B5,},
1069 : {0x2D16, 0x10B6,},
1070 : {0x2D17, 0x10B7,},
1071 : {0x2D18, 0x10B8,},
1072 : {0x2D19, 0x10B9,},
1073 : {0x2D1A, 0x10BA,},
1074 : {0x2D1B, 0x10BB,},
1075 : {0x2D1C, 0x10BC,},
1076 : {0x2D1D, 0x10BD,},
1077 : {0x2D1E, 0x10BE,},
1078 : {0x2D1F, 0x10BF,},
1079 : {0x2D20, 0x10C0,},
1080 : {0x2D21, 0x10C1,},
1081 : {0x2D22, 0x10C2,},
1082 : {0x2D23, 0x10C3,},
1083 : {0x2D24, 0x10C4,},
1084 : {0x2D25, 0x10C5,},
1085 : {0x2D27, 0x10C7,},
1086 : {0x2D2D, 0x10CD,},
1087 : {0xA641, 0xA640,},
1088 : {0xA643, 0xA642,},
1089 : {0xA645, 0xA644,},
1090 : {0xA647, 0xA646,},
1091 : {0xA649, 0xA648,},
1092 : {0xA64B, 0xA64A,},
1093 : {0xA64D, 0xA64C,},
1094 : {0xA64F, 0xA64E,},
1095 : {0xA651, 0xA650,},
1096 : {0xA653, 0xA652,},
1097 : {0xA655, 0xA654,},
1098 : {0xA657, 0xA656,},
1099 : {0xA659, 0xA658,},
1100 : {0xA65B, 0xA65A,},
1101 : {0xA65D, 0xA65C,},
1102 : {0xA65F, 0xA65E,},
1103 : {0xA661, 0xA660,},
1104 : {0xA663, 0xA662,},
1105 : {0xA665, 0xA664,},
1106 : {0xA667, 0xA666,},
1107 : {0xA669, 0xA668,},
1108 : {0xA66B, 0xA66A,},
1109 : {0xA66D, 0xA66C,},
1110 : {0xA681, 0xA680,},
1111 : {0xA683, 0xA682,},
1112 : {0xA685, 0xA684,},
1113 : {0xA687, 0xA686,},
1114 : {0xA689, 0xA688,},
1115 : {0xA68B, 0xA68A,},
1116 : {0xA68D, 0xA68C,},
1117 : {0xA68F, 0xA68E,},
1118 : {0xA691, 0xA690,},
1119 : {0xA693, 0xA692,},
1120 : {0xA695, 0xA694,},
1121 : {0xA697, 0xA696,},
1122 : {0xA699, 0xA698,},
1123 : {0xA69B, 0xA69A,},
1124 : {0xA723, 0xA722,},
1125 : {0xA725, 0xA724,},
1126 : {0xA727, 0xA726,},
1127 : {0xA729, 0xA728,},
1128 : {0xA72B, 0xA72A,},
1129 : {0xA72D, 0xA72C,},
1130 : {0xA72F, 0xA72E,},
1131 : {0xA733, 0xA732,},
1132 : {0xA735, 0xA734,},
1133 : {0xA737, 0xA736,},
1134 : {0xA739, 0xA738,},
1135 : {0xA73B, 0xA73A,},
1136 : {0xA73D, 0xA73C,},
1137 : {0xA73F, 0xA73E,},
1138 : {0xA741, 0xA740,},
1139 : {0xA743, 0xA742,},
1140 : {0xA745, 0xA744,},
1141 : {0xA747, 0xA746,},
1142 : {0xA749, 0xA748,},
1143 : {0xA74B, 0xA74A,},
1144 : {0xA74D, 0xA74C,},
1145 : {0xA74F, 0xA74E,},
1146 : {0xA751, 0xA750,},
1147 : {0xA753, 0xA752,},
1148 : {0xA755, 0xA754,},
1149 : {0xA757, 0xA756,},
1150 : {0xA759, 0xA758,},
1151 : {0xA75B, 0xA75A,},
1152 : {0xA75D, 0xA75C,},
1153 : {0xA75F, 0xA75E,},
1154 : {0xA761, 0xA760,},
1155 : {0xA763, 0xA762,},
1156 : {0xA765, 0xA764,},
1157 : {0xA767, 0xA766,},
1158 : {0xA769, 0xA768,},
1159 : {0xA76B, 0xA76A,},
1160 : {0xA76D, 0xA76C,},
1161 : {0xA76F, 0xA76E,},
1162 : {0xA77A, 0xA779,},
1163 : {0xA77C, 0xA77B,},
1164 : {0xA77F, 0xA77E,},
1165 : {0xA781, 0xA780,},
1166 : {0xA783, 0xA782,},
1167 : {0xA785, 0xA784,},
1168 : {0xA787, 0xA786,},
1169 : {0xA78C, 0xA78B,},
1170 : {0xA791, 0xA790,},
1171 : {0xA793, 0xA792,},
1172 : {0xA794, 0xA7C4,},
1173 : {0xA797, 0xA796,},
1174 : {0xA799, 0xA798,},
1175 : {0xA79B, 0xA79A,},
1176 : {0xA79D, 0xA79C,},
1177 : {0xA79F, 0xA79E,},
1178 : {0xA7A1, 0xA7A0,},
1179 : {0xA7A3, 0xA7A2,},
1180 : {0xA7A5, 0xA7A4,},
1181 : {0xA7A7, 0xA7A6,},
1182 : {0xA7A9, 0xA7A8,},
1183 : {0xA7B5, 0xA7B4,},
1184 : {0xA7B7, 0xA7B6,},
1185 : {0xA7B9, 0xA7B8,},
1186 : {0xA7BB, 0xA7BA,},
1187 : {0xA7BD, 0xA7BC,},
1188 : {0xA7BF, 0xA7BE,},
1189 : {0xA7C1, 0xA7C0,},
1190 : {0xA7C3, 0xA7C2,},
1191 : {0xA7C8, 0xA7C7,},
1192 : {0xA7CA, 0xA7C9,},
1193 : {0xA7D1, 0xA7D0,},
1194 : {0xA7D7, 0xA7D6,},
1195 : {0xA7D9, 0xA7D8,},
1196 : {0xA7F6, 0xA7F5,},
1197 : {0xAB53, 0xA7B3,},
1198 : {0xAB70, 0x13A0,},
1199 : {0xAB71, 0x13A1,},
1200 : {0xAB72, 0x13A2,},
1201 : {0xAB73, 0x13A3,},
1202 : {0xAB74, 0x13A4,},
1203 : {0xAB75, 0x13A5,},
1204 : {0xAB76, 0x13A6,},
1205 : {0xAB77, 0x13A7,},
1206 : {0xAB78, 0x13A8,},
1207 : {0xAB79, 0x13A9,},
1208 : {0xAB7A, 0x13AA,},
1209 : {0xAB7B, 0x13AB,},
1210 : {0xAB7C, 0x13AC,},
1211 : {0xAB7D, 0x13AD,},
1212 : {0xAB7E, 0x13AE,},
1213 : {0xAB7F, 0x13AF,},
1214 : {0xAB80, 0x13B0,},
1215 : {0xAB81, 0x13B1,},
1216 : {0xAB82, 0x13B2,},
1217 : {0xAB83, 0x13B3,},
1218 : {0xAB84, 0x13B4,},
1219 : {0xAB85, 0x13B5,},
1220 : {0xAB86, 0x13B6,},
1221 : {0xAB87, 0x13B7,},
1222 : {0xAB88, 0x13B8,},
1223 : {0xAB89, 0x13B9,},
1224 : {0xAB8A, 0x13BA,},
1225 : {0xAB8B, 0x13BB,},
1226 : {0xAB8C, 0x13BC,},
1227 : {0xAB8D, 0x13BD,},
1228 : {0xAB8E, 0x13BE,},
1229 : {0xAB8F, 0x13BF,},
1230 : {0xAB90, 0x13C0,},
1231 : {0xAB91, 0x13C1,},
1232 : {0xAB92, 0x13C2,},
1233 : {0xAB93, 0x13C3,},
1234 : {0xAB94, 0x13C4,},
1235 : {0xAB95, 0x13C5,},
1236 : {0xAB96, 0x13C6,},
1237 : {0xAB97, 0x13C7,},
1238 : {0xAB98, 0x13C8,},
1239 : {0xAB99, 0x13C9,},
1240 : {0xAB9A, 0x13CA,},
1241 : {0xAB9B, 0x13CB,},
1242 : {0xAB9C, 0x13CC,},
1243 : {0xAB9D, 0x13CD,},
1244 : {0xAB9E, 0x13CE,},
1245 : {0xAB9F, 0x13CF,},
1246 : {0xABA0, 0x13D0,},
1247 : {0xABA1, 0x13D1,},
1248 : {0xABA2, 0x13D2,},
1249 : {0xABA3, 0x13D3,},
1250 : {0xABA4, 0x13D4,},
1251 : {0xABA5, 0x13D5,},
1252 : {0xABA6, 0x13D6,},
1253 : {0xABA7, 0x13D7,},
1254 : {0xABA8, 0x13D8,},
1255 : {0xABA9, 0x13D9,},
1256 : {0xABAA, 0x13DA,},
1257 : {0xABAB, 0x13DB,},
1258 : {0xABAC, 0x13DC,},
1259 : {0xABAD, 0x13DD,},
1260 : {0xABAE, 0x13DE,},
1261 : {0xABAF, 0x13DF,},
1262 : {0xABB0, 0x13E0,},
1263 : {0xABB1, 0x13E1,},
1264 : {0xABB2, 0x13E2,},
1265 : {0xABB3, 0x13E3,},
1266 : {0xABB4, 0x13E4,},
1267 : {0xABB5, 0x13E5,},
1268 : {0xABB6, 0x13E6,},
1269 : {0xABB7, 0x13E7,},
1270 : {0xABB8, 0x13E8,},
1271 : {0xABB9, 0x13E9,},
1272 : {0xABBA, 0x13EA,},
1273 : {0xABBB, 0x13EB,},
1274 : {0xABBC, 0x13EC,},
1275 : {0xABBD, 0x13ED,},
1276 : {0xABBE, 0x13EE,},
1277 : {0xABBF, 0x13EF,},
1278 : {0xFF41, 0xFF21,},
1279 : {0xFF42, 0xFF22,},
1280 : {0xFF43, 0xFF23,},
1281 : {0xFF44, 0xFF24,},
1282 : {0xFF45, 0xFF25,},
1283 : {0xFF46, 0xFF26,},
1284 : {0xFF47, 0xFF27,},
1285 : {0xFF48, 0xFF28,},
1286 : {0xFF49, 0xFF29,},
1287 : {0xFF4A, 0xFF2A,},
1288 : {0xFF4B, 0xFF2B,},
1289 : {0xFF4C, 0xFF2C,},
1290 : {0xFF4D, 0xFF2D,},
1291 : {0xFF4E, 0xFF2E,},
1292 : {0xFF4F, 0xFF2F,},
1293 : {0xFF50, 0xFF30,},
1294 : {0xFF51, 0xFF31,},
1295 : {0xFF52, 0xFF32,},
1296 : {0xFF53, 0xFF33,},
1297 : {0xFF54, 0xFF34,},
1298 : {0xFF55, 0xFF35,},
1299 : {0xFF56, 0xFF36,},
1300 : {0xFF57, 0xFF37,},
1301 : {0xFF58, 0xFF38,},
1302 : {0xFF59, 0xFF39,},
1303 : {0xFF5A, 0xFF3A,},
1304 : {0x10428, 0x10400,},
1305 : {0x10429, 0x10401,},
1306 : {0x1042A, 0x10402,},
1307 : {0x1042B, 0x10403,},
1308 : {0x1042C, 0x10404,},
1309 : {0x1042D, 0x10405,},
1310 : {0x1042E, 0x10406,},
1311 : {0x1042F, 0x10407,},
1312 : {0x10430, 0x10408,},
1313 : {0x10431, 0x10409,},
1314 : {0x10432, 0x1040A,},
1315 : {0x10433, 0x1040B,},
1316 : {0x10434, 0x1040C,},
1317 : {0x10435, 0x1040D,},
1318 : {0x10436, 0x1040E,},
1319 : {0x10437, 0x1040F,},
1320 : {0x10438, 0x10410,},
1321 : {0x10439, 0x10411,},
1322 : {0x1043A, 0x10412,},
1323 : {0x1043B, 0x10413,},
1324 : {0x1043C, 0x10414,},
1325 : {0x1043D, 0x10415,},
1326 : {0x1043E, 0x10416,},
1327 : {0x1043F, 0x10417,},
1328 : {0x10440, 0x10418,},
1329 : {0x10441, 0x10419,},
1330 : {0x10442, 0x1041A,},
1331 : {0x10443, 0x1041B,},
1332 : {0x10444, 0x1041C,},
1333 : {0x10445, 0x1041D,},
1334 : {0x10446, 0x1041E,},
1335 : {0x10447, 0x1041F,},
1336 : {0x10448, 0x10420,},
1337 : {0x10449, 0x10421,},
1338 : {0x1044A, 0x10422,},
1339 : {0x1044B, 0x10423,},
1340 : {0x1044C, 0x10424,},
1341 : {0x1044D, 0x10425,},
1342 : {0x1044E, 0x10426,},
1343 : {0x1044F, 0x10427,},
1344 : {0x104D8, 0x104B0,},
1345 : {0x104D9, 0x104B1,},
1346 : {0x104DA, 0x104B2,},
1347 : {0x104DB, 0x104B3,},
1348 : {0x104DC, 0x104B4,},
1349 : {0x104DD, 0x104B5,},
1350 : {0x104DE, 0x104B6,},
1351 : {0x104DF, 0x104B7,},
1352 : {0x104E0, 0x104B8,},
1353 : {0x104E1, 0x104B9,},
1354 : {0x104E2, 0x104BA,},
1355 : {0x104E3, 0x104BB,},
1356 : {0x104E4, 0x104BC,},
1357 : {0x104E5, 0x104BD,},
1358 : {0x104E6, 0x104BE,},
1359 : {0x104E7, 0x104BF,},
1360 : {0x104E8, 0x104C0,},
1361 : {0x104E9, 0x104C1,},
1362 : {0x104EA, 0x104C2,},
1363 : {0x104EB, 0x104C3,},
1364 : {0x104EC, 0x104C4,},
1365 : {0x104ED, 0x104C5,},
1366 : {0x104EE, 0x104C6,},
1367 : {0x104EF, 0x104C7,},
1368 : {0x104F0, 0x104C8,},
1369 : {0x104F1, 0x104C9,},
1370 : {0x104F2, 0x104CA,},
1371 : {0x104F3, 0x104CB,},
1372 : {0x104F4, 0x104CC,},
1373 : {0x104F5, 0x104CD,},
1374 : {0x104F6, 0x104CE,},
1375 : {0x104F7, 0x104CF,},
1376 : {0x104F8, 0x104D0,},
1377 : {0x104F9, 0x104D1,},
1378 : {0x104FA, 0x104D2,},
1379 : {0x104FB, 0x104D3,},
1380 : {0x10597, 0x10570,},
1381 : {0x10598, 0x10571,},
1382 : {0x10599, 0x10572,},
1383 : {0x1059A, 0x10573,},
1384 : {0x1059B, 0x10574,},
1385 : {0x1059C, 0x10575,},
1386 : {0x1059D, 0x10576,},
1387 : {0x1059E, 0x10577,},
1388 : {0x1059F, 0x10578,},
1389 : {0x105A0, 0x10579,},
1390 : {0x105A1, 0x1057A,},
1391 : {0x105A3, 0x1057C,},
1392 : {0x105A4, 0x1057D,},
1393 : {0x105A5, 0x1057E,},
1394 : {0x105A6, 0x1057F,},
1395 : {0x105A7, 0x10580,},
1396 : {0x105A8, 0x10581,},
1397 : {0x105A9, 0x10582,},
1398 : {0x105AA, 0x10583,},
1399 : {0x105AB, 0x10584,},
1400 : {0x105AC, 0x10585,},
1401 : {0x105AD, 0x10586,},
1402 : {0x105AE, 0x10587,},
1403 : {0x105AF, 0x10588,},
1404 : {0x105B0, 0x10589,},
1405 : {0x105B1, 0x1058A,},
1406 : {0x105B3, 0x1058C,},
1407 : {0x105B4, 0x1058D,},
1408 : {0x105B5, 0x1058E,},
1409 : {0x105B6, 0x1058F,},
1410 : {0x105B7, 0x10590,},
1411 : {0x105B8, 0x10591,},
1412 : {0x105B9, 0x10592,},
1413 : {0x105BB, 0x10594,},
1414 : {0x105BC, 0x10595,},
1415 : {0x10CC0, 0x10C80,},
1416 : {0x10CC1, 0x10C81,},
1417 : {0x10CC2, 0x10C82,},
1418 : {0x10CC3, 0x10C83,},
1419 : {0x10CC4, 0x10C84,},
1420 : {0x10CC5, 0x10C85,},
1421 : {0x10CC6, 0x10C86,},
1422 : {0x10CC7, 0x10C87,},
1423 : {0x10CC8, 0x10C88,},
1424 : {0x10CC9, 0x10C89,},
1425 : {0x10CCA, 0x10C8A,},
1426 : {0x10CCB, 0x10C8B,},
1427 : {0x10CCC, 0x10C8C,},
1428 : {0x10CCD, 0x10C8D,},
1429 : {0x10CCE, 0x10C8E,},
1430 : {0x10CCF, 0x10C8F,},
1431 : {0x10CD0, 0x10C90,},
1432 : {0x10CD1, 0x10C91,},
1433 : {0x10CD2, 0x10C92,},
1434 : {0x10CD3, 0x10C93,},
1435 : {0x10CD4, 0x10C94,},
1436 : {0x10CD5, 0x10C95,},
1437 : {0x10CD6, 0x10C96,},
1438 : {0x10CD7, 0x10C97,},
1439 : {0x10CD8, 0x10C98,},
1440 : {0x10CD9, 0x10C99,},
1441 : {0x10CDA, 0x10C9A,},
1442 : {0x10CDB, 0x10C9B,},
1443 : {0x10CDC, 0x10C9C,},
1444 : {0x10CDD, 0x10C9D,},
1445 : {0x10CDE, 0x10C9E,},
1446 : {0x10CDF, 0x10C9F,},
1447 : {0x10CE0, 0x10CA0,},
1448 : {0x10CE1, 0x10CA1,},
1449 : {0x10CE2, 0x10CA2,},
1450 : {0x10CE3, 0x10CA3,},
1451 : {0x10CE4, 0x10CA4,},
1452 : {0x10CE5, 0x10CA5,},
1453 : {0x10CE6, 0x10CA6,},
1454 : {0x10CE7, 0x10CA7,},
1455 : {0x10CE8, 0x10CA8,},
1456 : {0x10CE9, 0x10CA9,},
1457 : {0x10CEA, 0x10CAA,},
1458 : {0x10CEB, 0x10CAB,},
1459 : {0x10CEC, 0x10CAC,},
1460 : {0x10CED, 0x10CAD,},
1461 : {0x10CEE, 0x10CAE,},
1462 : {0x10CEF, 0x10CAF,},
1463 : {0x10CF0, 0x10CB0,},
1464 : {0x10CF1, 0x10CB1,},
1465 : {0x10CF2, 0x10CB2,},
1466 : {0x118C0, 0x118A0,},
1467 : {0x118C1, 0x118A1,},
1468 : {0x118C2, 0x118A2,},
1469 : {0x118C3, 0x118A3,},
1470 : {0x118C4, 0x118A4,},
1471 : {0x118C5, 0x118A5,},
1472 : {0x118C6, 0x118A6,},
1473 : {0x118C7, 0x118A7,},
1474 : {0x118C8, 0x118A8,},
1475 : {0x118C9, 0x118A9,},
1476 : {0x118CA, 0x118AA,},
1477 : {0x118CB, 0x118AB,},
1478 : {0x118CC, 0x118AC,},
1479 : {0x118CD, 0x118AD,},
1480 : {0x118CE, 0x118AE,},
1481 : {0x118CF, 0x118AF,},
1482 : {0x118D0, 0x118B0,},
1483 : {0x118D1, 0x118B1,},
1484 : {0x118D2, 0x118B2,},
1485 : {0x118D3, 0x118B3,},
1486 : {0x118D4, 0x118B4,},
1487 : {0x118D5, 0x118B5,},
1488 : {0x118D6, 0x118B6,},
1489 : {0x118D7, 0x118B7,},
1490 : {0x118D8, 0x118B8,},
1491 : {0x118D9, 0x118B9,},
1492 : {0x118DA, 0x118BA,},
1493 : {0x118DB, 0x118BB,},
1494 : {0x118DC, 0x118BC,},
1495 : {0x118DD, 0x118BD,},
1496 : {0x118DE, 0x118BE,},
1497 : {0x118DF, 0x118BF,},
1498 : {0x16E60, 0x16E40,},
1499 : {0x16E61, 0x16E41,},
1500 : {0x16E62, 0x16E42,},
1501 : {0x16E63, 0x16E43,},
1502 : {0x16E64, 0x16E44,},
1503 : {0x16E65, 0x16E45,},
1504 : {0x16E66, 0x16E46,},
1505 : {0x16E67, 0x16E47,},
1506 : {0x16E68, 0x16E48,},
1507 : {0x16E69, 0x16E49,},
1508 : {0x16E6A, 0x16E4A,},
1509 : {0x16E6B, 0x16E4B,},
1510 : {0x16E6C, 0x16E4C,},
1511 : {0x16E6D, 0x16E4D,},
1512 : {0x16E6E, 0x16E4E,},
1513 : {0x16E6F, 0x16E4F,},
1514 : {0x16E70, 0x16E50,},
1515 : {0x16E71, 0x16E51,},
1516 : {0x16E72, 0x16E52,},
1517 : {0x16E73, 0x16E53,},
1518 : {0x16E74, 0x16E54,},
1519 : {0x16E75, 0x16E55,},
1520 : {0x16E76, 0x16E56,},
1521 : {0x16E77, 0x16E57,},
1522 : {0x16E78, 0x16E58,},
1523 : {0x16E79, 0x16E59,},
1524 : {0x16E7A, 0x16E5A,},
1525 : {0x16E7B, 0x16E5B,},
1526 : {0x16E7C, 0x16E5C,},
1527 : {0x16E7D, 0x16E5D,},
1528 : {0x16E7E, 0x16E5E,},
1529 : {0x16E7F, 0x16E5F,},
1530 : {0x1E922, 0x1E900,},
1531 : {0x1E923, 0x1E901,},
1532 : {0x1E924, 0x1E902,},
1533 : {0x1E925, 0x1E903,},
1534 : {0x1E926, 0x1E904,},
1535 : {0x1E927, 0x1E905,},
1536 : {0x1E928, 0x1E906,},
1537 : {0x1E929, 0x1E907,},
1538 : {0x1E92A, 0x1E908,},
1539 : {0x1E92B, 0x1E909,},
1540 : {0x1E92C, 0x1E90A,},
1541 : {0x1E92D, 0x1E90B,},
1542 : {0x1E92E, 0x1E90C,},
1543 : {0x1E92F, 0x1E90D,},
1544 : {0x1E930, 0x1E90E,},
1545 : {0x1E931, 0x1E90F,},
1546 : {0x1E932, 0x1E910,},
1547 : {0x1E933, 0x1E911,},
1548 : {0x1E934, 0x1E912,},
1549 : {0x1E935, 0x1E913,},
1550 : {0x1E936, 0x1E914,},
1551 : {0x1E937, 0x1E915,},
1552 : {0x1E938, 0x1E916,},
1553 : {0x1E939, 0x1E917,},
1554 : {0x1E93A, 0x1E918,},
1555 : {0x1E93B, 0x1E919,},
1556 : {0x1E93C, 0x1E91A,},
1557 : {0x1E93D, 0x1E91B,},
1558 : {0x1E93E, 0x1E91C,},
1559 : {0x1E93F, 0x1E91D,},
1560 : {0x1E940, 0x1E91E,},
1561 : {0x1E941, 0x1E91F,},
1562 : {0x1E942, 0x1E920,},
1563 : {0x1E943, 0x1E921,},
1564 : }, UTF8_toLower[] = { /* code points with non-null lowercase conversion */
1565 : {0x0041, 0x0061,},
1566 : {0x0042, 0x0062,},
1567 : {0x0043, 0x0063,},
1568 : {0x0044, 0x0064,},
1569 : {0x0045, 0x0065,},
1570 : {0x0046, 0x0066,},
1571 : {0x0047, 0x0067,},
1572 : {0x0048, 0x0068,},
1573 : {0x0049, 0x0069,},
1574 : {0x004A, 0x006A,},
1575 : {0x004B, 0x006B,},
1576 : {0x004C, 0x006C,},
1577 : {0x004D, 0x006D,},
1578 : {0x004E, 0x006E,},
1579 : {0x004F, 0x006F,},
1580 : {0x0050, 0x0070,},
1581 : {0x0051, 0x0071,},
1582 : {0x0052, 0x0072,},
1583 : {0x0053, 0x0073,},
1584 : {0x0054, 0x0074,},
1585 : {0x0055, 0x0075,},
1586 : {0x0056, 0x0076,},
1587 : {0x0057, 0x0077,},
1588 : {0x0058, 0x0078,},
1589 : {0x0059, 0x0079,},
1590 : {0x005A, 0x007A,},
1591 : {0x00C0, 0x00E0,},
1592 : {0x00C1, 0x00E1,},
1593 : {0x00C2, 0x00E2,},
1594 : {0x00C3, 0x00E3,},
1595 : {0x00C4, 0x00E4,},
1596 : {0x00C5, 0x00E5,},
1597 : {0x00C6, 0x00E6,},
1598 : {0x00C7, 0x00E7,},
1599 : {0x00C8, 0x00E8,},
1600 : {0x00C9, 0x00E9,},
1601 : {0x00CA, 0x00EA,},
1602 : {0x00CB, 0x00EB,},
1603 : {0x00CC, 0x00EC,},
1604 : {0x00CD, 0x00ED,},
1605 : {0x00CE, 0x00EE,},
1606 : {0x00CF, 0x00EF,},
1607 : {0x00D0, 0x00F0,},
1608 : {0x00D1, 0x00F1,},
1609 : {0x00D2, 0x00F2,},
1610 : {0x00D3, 0x00F3,},
1611 : {0x00D4, 0x00F4,},
1612 : {0x00D5, 0x00F5,},
1613 : {0x00D6, 0x00F6,},
1614 : {0x00D8, 0x00F8,},
1615 : {0x00D9, 0x00F9,},
1616 : {0x00DA, 0x00FA,},
1617 : {0x00DB, 0x00FB,},
1618 : {0x00DC, 0x00FC,},
1619 : {0x00DD, 0x00FD,},
1620 : {0x00DE, 0x00FE,},
1621 : {0x0100, 0x0101,},
1622 : {0x0102, 0x0103,},
1623 : {0x0104, 0x0105,},
1624 : {0x0106, 0x0107,},
1625 : {0x0108, 0x0109,},
1626 : {0x010A, 0x010B,},
1627 : {0x010C, 0x010D,},
1628 : {0x010E, 0x010F,},
1629 : {0x0110, 0x0111,},
1630 : {0x0112, 0x0113,},
1631 : {0x0114, 0x0115,},
1632 : {0x0116, 0x0117,},
1633 : {0x0118, 0x0119,},
1634 : {0x011A, 0x011B,},
1635 : {0x011C, 0x011D,},
1636 : {0x011E, 0x011F,},
1637 : {0x0120, 0x0121,},
1638 : {0x0122, 0x0123,},
1639 : {0x0124, 0x0125,},
1640 : {0x0126, 0x0127,},
1641 : {0x0128, 0x0129,},
1642 : {0x012A, 0x012B,},
1643 : {0x012C, 0x012D,},
1644 : {0x012E, 0x012F,},
1645 : {0x0130, 0x0069,},
1646 : {0x0132, 0x0133,},
1647 : {0x0134, 0x0135,},
1648 : {0x0136, 0x0137,},
1649 : {0x0139, 0x013A,},
1650 : {0x013B, 0x013C,},
1651 : {0x013D, 0x013E,},
1652 : {0x013F, 0x0140,},
1653 : {0x0141, 0x0142,},
1654 : {0x0143, 0x0144,},
1655 : {0x0145, 0x0146,},
1656 : {0x0147, 0x0148,},
1657 : {0x014A, 0x014B,},
1658 : {0x014C, 0x014D,},
1659 : {0x014E, 0x014F,},
1660 : {0x0150, 0x0151,},
1661 : {0x0152, 0x0153,},
1662 : {0x0154, 0x0155,},
1663 : {0x0156, 0x0157,},
1664 : {0x0158, 0x0159,},
1665 : {0x015A, 0x015B,},
1666 : {0x015C, 0x015D,},
1667 : {0x015E, 0x015F,},
1668 : {0x0160, 0x0161,},
1669 : {0x0162, 0x0163,},
1670 : {0x0164, 0x0165,},
1671 : {0x0166, 0x0167,},
1672 : {0x0168, 0x0169,},
1673 : {0x016A, 0x016B,},
1674 : {0x016C, 0x016D,},
1675 : {0x016E, 0x016F,},
1676 : {0x0170, 0x0171,},
1677 : {0x0172, 0x0173,},
1678 : {0x0174, 0x0175,},
1679 : {0x0176, 0x0177,},
1680 : {0x0178, 0x00FF,},
1681 : {0x0179, 0x017A,},
1682 : {0x017B, 0x017C,},
1683 : {0x017D, 0x017E,},
1684 : {0x0181, 0x0253,},
1685 : {0x0182, 0x0183,},
1686 : {0x0184, 0x0185,},
1687 : {0x0186, 0x0254,},
1688 : {0x0187, 0x0188,},
1689 : {0x0189, 0x0256,},
1690 : {0x018A, 0x0257,},
1691 : {0x018B, 0x018C,},
1692 : {0x018E, 0x01DD,},
1693 : {0x018F, 0x0259,},
1694 : {0x0190, 0x025B,},
1695 : {0x0191, 0x0192,},
1696 : {0x0193, 0x0260,},
1697 : {0x0194, 0x0263,},
1698 : {0x0196, 0x0269,},
1699 : {0x0197, 0x0268,},
1700 : {0x0198, 0x0199,},
1701 : {0x019C, 0x026F,},
1702 : {0x019D, 0x0272,},
1703 : {0x019F, 0x0275,},
1704 : {0x01A0, 0x01A1,},
1705 : {0x01A2, 0x01A3,},
1706 : {0x01A4, 0x01A5,},
1707 : {0x01A6, 0x0280,},
1708 : {0x01A7, 0x01A8,},
1709 : {0x01A9, 0x0283,},
1710 : {0x01AC, 0x01AD,},
1711 : {0x01AE, 0x0288,},
1712 : {0x01AF, 0x01B0,},
1713 : {0x01B1, 0x028A,},
1714 : {0x01B2, 0x028B,},
1715 : {0x01B3, 0x01B4,},
1716 : {0x01B5, 0x01B6,},
1717 : {0x01B7, 0x0292,},
1718 : {0x01B8, 0x01B9,},
1719 : {0x01BC, 0x01BD,},
1720 : {0x01C4, 0x01C6,},
1721 : {0x01C5, 0x01C6,},
1722 : {0x01C7, 0x01C9,},
1723 : {0x01C8, 0x01C9,},
1724 : {0x01CA, 0x01CC,},
1725 : {0x01CB, 0x01CC,},
1726 : {0x01CD, 0x01CE,},
1727 : {0x01CF, 0x01D0,},
1728 : {0x01D1, 0x01D2,},
1729 : {0x01D3, 0x01D4,},
1730 : {0x01D5, 0x01D6,},
1731 : {0x01D7, 0x01D8,},
1732 : {0x01D9, 0x01DA,},
1733 : {0x01DB, 0x01DC,},
1734 : {0x01DE, 0x01DF,},
1735 : {0x01E0, 0x01E1,},
1736 : {0x01E2, 0x01E3,},
1737 : {0x01E4, 0x01E5,},
1738 : {0x01E6, 0x01E7,},
1739 : {0x01E8, 0x01E9,},
1740 : {0x01EA, 0x01EB,},
1741 : {0x01EC, 0x01ED,},
1742 : {0x01EE, 0x01EF,},
1743 : {0x01F1, 0x01F3,},
1744 : {0x01F2, 0x01F3,},
1745 : {0x01F4, 0x01F5,},
1746 : {0x01F6, 0x0195,},
1747 : {0x01F7, 0x01BF,},
1748 : {0x01F8, 0x01F9,},
1749 : {0x01FA, 0x01FB,},
1750 : {0x01FC, 0x01FD,},
1751 : {0x01FE, 0x01FF,},
1752 : {0x0200, 0x0201,},
1753 : {0x0202, 0x0203,},
1754 : {0x0204, 0x0205,},
1755 : {0x0206, 0x0207,},
1756 : {0x0208, 0x0209,},
1757 : {0x020A, 0x020B,},
1758 : {0x020C, 0x020D,},
1759 : {0x020E, 0x020F,},
1760 : {0x0210, 0x0211,},
1761 : {0x0212, 0x0213,},
1762 : {0x0214, 0x0215,},
1763 : {0x0216, 0x0217,},
1764 : {0x0218, 0x0219,},
1765 : {0x021A, 0x021B,},
1766 : {0x021C, 0x021D,},
1767 : {0x021E, 0x021F,},
1768 : {0x0220, 0x019E,},
1769 : {0x0222, 0x0223,},
1770 : {0x0224, 0x0225,},
1771 : {0x0226, 0x0227,},
1772 : {0x0228, 0x0229,},
1773 : {0x022A, 0x022B,},
1774 : {0x022C, 0x022D,},
1775 : {0x022E, 0x022F,},
1776 : {0x0230, 0x0231,},
1777 : {0x0232, 0x0233,},
1778 : {0x023A, 0x2C65,},
1779 : {0x023B, 0x023C,},
1780 : {0x023D, 0x019A,},
1781 : {0x023E, 0x2C66,},
1782 : {0x0241, 0x0242,},
1783 : {0x0243, 0x0180,},
1784 : {0x0244, 0x0289,},
1785 : {0x0245, 0x028C,},
1786 : {0x0246, 0x0247,},
1787 : {0x0248, 0x0249,},
1788 : {0x024A, 0x024B,},
1789 : {0x024C, 0x024D,},
1790 : {0x024E, 0x024F,},
1791 : {0x0370, 0x0371,},
1792 : {0x0372, 0x0373,},
1793 : {0x0376, 0x0377,},
1794 : {0x037F, 0x03F3,},
1795 : {0x0386, 0x03AC,},
1796 : {0x0388, 0x03AD,},
1797 : {0x0389, 0x03AE,},
1798 : {0x038A, 0x03AF,},
1799 : {0x038C, 0x03CC,},
1800 : {0x038E, 0x03CD,},
1801 : {0x038F, 0x03CE,},
1802 : {0x0391, 0x03B1,},
1803 : {0x0392, 0x03B2,},
1804 : {0x0393, 0x03B3,},
1805 : {0x0394, 0x03B4,},
1806 : {0x0395, 0x03B5,},
1807 : {0x0396, 0x03B6,},
1808 : {0x0397, 0x03B7,},
1809 : {0x0398, 0x03B8,},
1810 : {0x0399, 0x03B9,},
1811 : {0x039A, 0x03BA,},
1812 : {0x039B, 0x03BB,},
1813 : {0x039C, 0x03BC,},
1814 : {0x039D, 0x03BD,},
1815 : {0x039E, 0x03BE,},
1816 : {0x039F, 0x03BF,},
1817 : {0x03A0, 0x03C0,},
1818 : {0x03A1, 0x03C1,},
1819 : {0x03A3, 0x03C3,},
1820 : {0x03A4, 0x03C4,},
1821 : {0x03A5, 0x03C5,},
1822 : {0x03A6, 0x03C6,},
1823 : {0x03A7, 0x03C7,},
1824 : {0x03A8, 0x03C8,},
1825 : {0x03A9, 0x03C9,},
1826 : {0x03AA, 0x03CA,},
1827 : {0x03AB, 0x03CB,},
1828 : {0x03CF, 0x03D7,},
1829 : {0x03D8, 0x03D9,},
1830 : {0x03DA, 0x03DB,},
1831 : {0x03DC, 0x03DD,},
1832 : {0x03DE, 0x03DF,},
1833 : {0x03E0, 0x03E1,},
1834 : {0x03E2, 0x03E3,},
1835 : {0x03E4, 0x03E5,},
1836 : {0x03E6, 0x03E7,},
1837 : {0x03E8, 0x03E9,},
1838 : {0x03EA, 0x03EB,},
1839 : {0x03EC, 0x03ED,},
1840 : {0x03EE, 0x03EF,},
1841 : {0x03F4, 0x03B8,},
1842 : {0x03F7, 0x03F8,},
1843 : {0x03F9, 0x03F2,},
1844 : {0x03FA, 0x03FB,},
1845 : {0x03FD, 0x037B,},
1846 : {0x03FE, 0x037C,},
1847 : {0x03FF, 0x037D,},
1848 : {0x0400, 0x0450,},
1849 : {0x0401, 0x0451,},
1850 : {0x0402, 0x0452,},
1851 : {0x0403, 0x0453,},
1852 : {0x0404, 0x0454,},
1853 : {0x0405, 0x0455,},
1854 : {0x0406, 0x0456,},
1855 : {0x0407, 0x0457,},
1856 : {0x0408, 0x0458,},
1857 : {0x0409, 0x0459,},
1858 : {0x040A, 0x045A,},
1859 : {0x040B, 0x045B,},
1860 : {0x040C, 0x045C,},
1861 : {0x040D, 0x045D,},
1862 : {0x040E, 0x045E,},
1863 : {0x040F, 0x045F,},
1864 : {0x0410, 0x0430,},
1865 : {0x0411, 0x0431,},
1866 : {0x0412, 0x0432,},
1867 : {0x0413, 0x0433,},
1868 : {0x0414, 0x0434,},
1869 : {0x0415, 0x0435,},
1870 : {0x0416, 0x0436,},
1871 : {0x0417, 0x0437,},
1872 : {0x0418, 0x0438,},
1873 : {0x0419, 0x0439,},
1874 : {0x041A, 0x043A,},
1875 : {0x041B, 0x043B,},
1876 : {0x041C, 0x043C,},
1877 : {0x041D, 0x043D,},
1878 : {0x041E, 0x043E,},
1879 : {0x041F, 0x043F,},
1880 : {0x0420, 0x0440,},
1881 : {0x0421, 0x0441,},
1882 : {0x0422, 0x0442,},
1883 : {0x0423, 0x0443,},
1884 : {0x0424, 0x0444,},
1885 : {0x0425, 0x0445,},
1886 : {0x0426, 0x0446,},
1887 : {0x0427, 0x0447,},
1888 : {0x0428, 0x0448,},
1889 : {0x0429, 0x0449,},
1890 : {0x042A, 0x044A,},
1891 : {0x042B, 0x044B,},
1892 : {0x042C, 0x044C,},
1893 : {0x042D, 0x044D,},
1894 : {0x042E, 0x044E,},
1895 : {0x042F, 0x044F,},
1896 : {0x0460, 0x0461,},
1897 : {0x0462, 0x0463,},
1898 : {0x0464, 0x0465,},
1899 : {0x0466, 0x0467,},
1900 : {0x0468, 0x0469,},
1901 : {0x046A, 0x046B,},
1902 : {0x046C, 0x046D,},
1903 : {0x046E, 0x046F,},
1904 : {0x0470, 0x0471,},
1905 : {0x0472, 0x0473,},
1906 : {0x0474, 0x0475,},
1907 : {0x0476, 0x0477,},
1908 : {0x0478, 0x0479,},
1909 : {0x047A, 0x047B,},
1910 : {0x047C, 0x047D,},
1911 : {0x047E, 0x047F,},
1912 : {0x0480, 0x0481,},
1913 : {0x048A, 0x048B,},
1914 : {0x048C, 0x048D,},
1915 : {0x048E, 0x048F,},
1916 : {0x0490, 0x0491,},
1917 : {0x0492, 0x0493,},
1918 : {0x0494, 0x0495,},
1919 : {0x0496, 0x0497,},
1920 : {0x0498, 0x0499,},
1921 : {0x049A, 0x049B,},
1922 : {0x049C, 0x049D,},
1923 : {0x049E, 0x049F,},
1924 : {0x04A0, 0x04A1,},
1925 : {0x04A2, 0x04A3,},
1926 : {0x04A4, 0x04A5,},
1927 : {0x04A6, 0x04A7,},
1928 : {0x04A8, 0x04A9,},
1929 : {0x04AA, 0x04AB,},
1930 : {0x04AC, 0x04AD,},
1931 : {0x04AE, 0x04AF,},
1932 : {0x04B0, 0x04B1,},
1933 : {0x04B2, 0x04B3,},
1934 : {0x04B4, 0x04B5,},
1935 : {0x04B6, 0x04B7,},
1936 : {0x04B8, 0x04B9,},
1937 : {0x04BA, 0x04BB,},
1938 : {0x04BC, 0x04BD,},
1939 : {0x04BE, 0x04BF,},
1940 : {0x04C0, 0x04CF,},
1941 : {0x04C1, 0x04C2,},
1942 : {0x04C3, 0x04C4,},
1943 : {0x04C5, 0x04C6,},
1944 : {0x04C7, 0x04C8,},
1945 : {0x04C9, 0x04CA,},
1946 : {0x04CB, 0x04CC,},
1947 : {0x04CD, 0x04CE,},
1948 : {0x04D0, 0x04D1,},
1949 : {0x04D2, 0x04D3,},
1950 : {0x04D4, 0x04D5,},
1951 : {0x04D6, 0x04D7,},
1952 : {0x04D8, 0x04D9,},
1953 : {0x04DA, 0x04DB,},
1954 : {0x04DC, 0x04DD,},
1955 : {0x04DE, 0x04DF,},
1956 : {0x04E0, 0x04E1,},
1957 : {0x04E2, 0x04E3,},
1958 : {0x04E4, 0x04E5,},
1959 : {0x04E6, 0x04E7,},
1960 : {0x04E8, 0x04E9,},
1961 : {0x04EA, 0x04EB,},
1962 : {0x04EC, 0x04ED,},
1963 : {0x04EE, 0x04EF,},
1964 : {0x04F0, 0x04F1,},
1965 : {0x04F2, 0x04F3,},
1966 : {0x04F4, 0x04F5,},
1967 : {0x04F6, 0x04F7,},
1968 : {0x04F8, 0x04F9,},
1969 : {0x04FA, 0x04FB,},
1970 : {0x04FC, 0x04FD,},
1971 : {0x04FE, 0x04FF,},
1972 : {0x0500, 0x0501,},
1973 : {0x0502, 0x0503,},
1974 : {0x0504, 0x0505,},
1975 : {0x0506, 0x0507,},
1976 : {0x0508, 0x0509,},
1977 : {0x050A, 0x050B,},
1978 : {0x050C, 0x050D,},
1979 : {0x050E, 0x050F,},
1980 : {0x0510, 0x0511,},
1981 : {0x0512, 0x0513,},
1982 : {0x0514, 0x0515,},
1983 : {0x0516, 0x0517,},
1984 : {0x0518, 0x0519,},
1985 : {0x051A, 0x051B,},
1986 : {0x051C, 0x051D,},
1987 : {0x051E, 0x051F,},
1988 : {0x0520, 0x0521,},
1989 : {0x0522, 0x0523,},
1990 : {0x0524, 0x0525,},
1991 : {0x0526, 0x0527,},
1992 : {0x0528, 0x0529,},
1993 : {0x052A, 0x052B,},
1994 : {0x052C, 0x052D,},
1995 : {0x052E, 0x052F,},
1996 : {0x0531, 0x0561,},
1997 : {0x0532, 0x0562,},
1998 : {0x0533, 0x0563,},
1999 : {0x0534, 0x0564,},
2000 : {0x0535, 0x0565,},
2001 : {0x0536, 0x0566,},
2002 : {0x0537, 0x0567,},
2003 : {0x0538, 0x0568,},
2004 : {0x0539, 0x0569,},
2005 : {0x053A, 0x056A,},
2006 : {0x053B, 0x056B,},
2007 : {0x053C, 0x056C,},
2008 : {0x053D, 0x056D,},
2009 : {0x053E, 0x056E,},
2010 : {0x053F, 0x056F,},
2011 : {0x0540, 0x0570,},
2012 : {0x0541, 0x0571,},
2013 : {0x0542, 0x0572,},
2014 : {0x0543, 0x0573,},
2015 : {0x0544, 0x0574,},
2016 : {0x0545, 0x0575,},
2017 : {0x0546, 0x0576,},
2018 : {0x0547, 0x0577,},
2019 : {0x0548, 0x0578,},
2020 : {0x0549, 0x0579,},
2021 : {0x054A, 0x057A,},
2022 : {0x054B, 0x057B,},
2023 : {0x054C, 0x057C,},
2024 : {0x054D, 0x057D,},
2025 : {0x054E, 0x057E,},
2026 : {0x054F, 0x057F,},
2027 : {0x0550, 0x0580,},
2028 : {0x0551, 0x0581,},
2029 : {0x0552, 0x0582,},
2030 : {0x0553, 0x0583,},
2031 : {0x0554, 0x0584,},
2032 : {0x0555, 0x0585,},
2033 : {0x0556, 0x0586,},
2034 : {0x10A0, 0x2D00,},
2035 : {0x10A1, 0x2D01,},
2036 : {0x10A2, 0x2D02,},
2037 : {0x10A3, 0x2D03,},
2038 : {0x10A4, 0x2D04,},
2039 : {0x10A5, 0x2D05,},
2040 : {0x10A6, 0x2D06,},
2041 : {0x10A7, 0x2D07,},
2042 : {0x10A8, 0x2D08,},
2043 : {0x10A9, 0x2D09,},
2044 : {0x10AA, 0x2D0A,},
2045 : {0x10AB, 0x2D0B,},
2046 : {0x10AC, 0x2D0C,},
2047 : {0x10AD, 0x2D0D,},
2048 : {0x10AE, 0x2D0E,},
2049 : {0x10AF, 0x2D0F,},
2050 : {0x10B0, 0x2D10,},
2051 : {0x10B1, 0x2D11,},
2052 : {0x10B2, 0x2D12,},
2053 : {0x10B3, 0x2D13,},
2054 : {0x10B4, 0x2D14,},
2055 : {0x10B5, 0x2D15,},
2056 : {0x10B6, 0x2D16,},
2057 : {0x10B7, 0x2D17,},
2058 : {0x10B8, 0x2D18,},
2059 : {0x10B9, 0x2D19,},
2060 : {0x10BA, 0x2D1A,},
2061 : {0x10BB, 0x2D1B,},
2062 : {0x10BC, 0x2D1C,},
2063 : {0x10BD, 0x2D1D,},
2064 : {0x10BE, 0x2D1E,},
2065 : {0x10BF, 0x2D1F,},
2066 : {0x10C0, 0x2D20,},
2067 : {0x10C1, 0x2D21,},
2068 : {0x10C2, 0x2D22,},
2069 : {0x10C3, 0x2D23,},
2070 : {0x10C4, 0x2D24,},
2071 : {0x10C5, 0x2D25,},
2072 : {0x10C7, 0x2D27,},
2073 : {0x10CD, 0x2D2D,},
2074 : {0x13A0, 0xAB70,},
2075 : {0x13A1, 0xAB71,},
2076 : {0x13A2, 0xAB72,},
2077 : {0x13A3, 0xAB73,},
2078 : {0x13A4, 0xAB74,},
2079 : {0x13A5, 0xAB75,},
2080 : {0x13A6, 0xAB76,},
2081 : {0x13A7, 0xAB77,},
2082 : {0x13A8, 0xAB78,},
2083 : {0x13A9, 0xAB79,},
2084 : {0x13AA, 0xAB7A,},
2085 : {0x13AB, 0xAB7B,},
2086 : {0x13AC, 0xAB7C,},
2087 : {0x13AD, 0xAB7D,},
2088 : {0x13AE, 0xAB7E,},
2089 : {0x13AF, 0xAB7F,},
2090 : {0x13B0, 0xAB80,},
2091 : {0x13B1, 0xAB81,},
2092 : {0x13B2, 0xAB82,},
2093 : {0x13B3, 0xAB83,},
2094 : {0x13B4, 0xAB84,},
2095 : {0x13B5, 0xAB85,},
2096 : {0x13B6, 0xAB86,},
2097 : {0x13B7, 0xAB87,},
2098 : {0x13B8, 0xAB88,},
2099 : {0x13B9, 0xAB89,},
2100 : {0x13BA, 0xAB8A,},
2101 : {0x13BB, 0xAB8B,},
2102 : {0x13BC, 0xAB8C,},
2103 : {0x13BD, 0xAB8D,},
2104 : {0x13BE, 0xAB8E,},
2105 : {0x13BF, 0xAB8F,},
2106 : {0x13C0, 0xAB90,},
2107 : {0x13C1, 0xAB91,},
2108 : {0x13C2, 0xAB92,},
2109 : {0x13C3, 0xAB93,},
2110 : {0x13C4, 0xAB94,},
2111 : {0x13C5, 0xAB95,},
2112 : {0x13C6, 0xAB96,},
2113 : {0x13C7, 0xAB97,},
2114 : {0x13C8, 0xAB98,},
2115 : {0x13C9, 0xAB99,},
2116 : {0x13CA, 0xAB9A,},
2117 : {0x13CB, 0xAB9B,},
2118 : {0x13CC, 0xAB9C,},
2119 : {0x13CD, 0xAB9D,},
2120 : {0x13CE, 0xAB9E,},
2121 : {0x13CF, 0xAB9F,},
2122 : {0x13D0, 0xABA0,},
2123 : {0x13D1, 0xABA1,},
2124 : {0x13D2, 0xABA2,},
2125 : {0x13D3, 0xABA3,},
2126 : {0x13D4, 0xABA4,},
2127 : {0x13D5, 0xABA5,},
2128 : {0x13D6, 0xABA6,},
2129 : {0x13D7, 0xABA7,},
2130 : {0x13D8, 0xABA8,},
2131 : {0x13D9, 0xABA9,},
2132 : {0x13DA, 0xABAA,},
2133 : {0x13DB, 0xABAB,},
2134 : {0x13DC, 0xABAC,},
2135 : {0x13DD, 0xABAD,},
2136 : {0x13DE, 0xABAE,},
2137 : {0x13DF, 0xABAF,},
2138 : {0x13E0, 0xABB0,},
2139 : {0x13E1, 0xABB1,},
2140 : {0x13E2, 0xABB2,},
2141 : {0x13E3, 0xABB3,},
2142 : {0x13E4, 0xABB4,},
2143 : {0x13E5, 0xABB5,},
2144 : {0x13E6, 0xABB6,},
2145 : {0x13E7, 0xABB7,},
2146 : {0x13E8, 0xABB8,},
2147 : {0x13E9, 0xABB9,},
2148 : {0x13EA, 0xABBA,},
2149 : {0x13EB, 0xABBB,},
2150 : {0x13EC, 0xABBC,},
2151 : {0x13ED, 0xABBD,},
2152 : {0x13EE, 0xABBE,},
2153 : {0x13EF, 0xABBF,},
2154 : {0x13F0, 0x13F8,},
2155 : {0x13F1, 0x13F9,},
2156 : {0x13F2, 0x13FA,},
2157 : {0x13F3, 0x13FB,},
2158 : {0x13F4, 0x13FC,},
2159 : {0x13F5, 0x13FD,},
2160 : {0x1C90, 0x10D0,},
2161 : {0x1C91, 0x10D1,},
2162 : {0x1C92, 0x10D2,},
2163 : {0x1C93, 0x10D3,},
2164 : {0x1C94, 0x10D4,},
2165 : {0x1C95, 0x10D5,},
2166 : {0x1C96, 0x10D6,},
2167 : {0x1C97, 0x10D7,},
2168 : {0x1C98, 0x10D8,},
2169 : {0x1C99, 0x10D9,},
2170 : {0x1C9A, 0x10DA,},
2171 : {0x1C9B, 0x10DB,},
2172 : {0x1C9C, 0x10DC,},
2173 : {0x1C9D, 0x10DD,},
2174 : {0x1C9E, 0x10DE,},
2175 : {0x1C9F, 0x10DF,},
2176 : {0x1CA0, 0x10E0,},
2177 : {0x1CA1, 0x10E1,},
2178 : {0x1CA2, 0x10E2,},
2179 : {0x1CA3, 0x10E3,},
2180 : {0x1CA4, 0x10E4,},
2181 : {0x1CA5, 0x10E5,},
2182 : {0x1CA6, 0x10E6,},
2183 : {0x1CA7, 0x10E7,},
2184 : {0x1CA8, 0x10E8,},
2185 : {0x1CA9, 0x10E9,},
2186 : {0x1CAA, 0x10EA,},
2187 : {0x1CAB, 0x10EB,},
2188 : {0x1CAC, 0x10EC,},
2189 : {0x1CAD, 0x10ED,},
2190 : {0x1CAE, 0x10EE,},
2191 : {0x1CAF, 0x10EF,},
2192 : {0x1CB0, 0x10F0,},
2193 : {0x1CB1, 0x10F1,},
2194 : {0x1CB2, 0x10F2,},
2195 : {0x1CB3, 0x10F3,},
2196 : {0x1CB4, 0x10F4,},
2197 : {0x1CB5, 0x10F5,},
2198 : {0x1CB6, 0x10F6,},
2199 : {0x1CB7, 0x10F7,},
2200 : {0x1CB8, 0x10F8,},
2201 : {0x1CB9, 0x10F9,},
2202 : {0x1CBA, 0x10FA,},
2203 : {0x1CBD, 0x10FD,},
2204 : {0x1CBE, 0x10FE,},
2205 : {0x1CBF, 0x10FF,},
2206 : {0x1E00, 0x1E01,},
2207 : {0x1E02, 0x1E03,},
2208 : {0x1E04, 0x1E05,},
2209 : {0x1E06, 0x1E07,},
2210 : {0x1E08, 0x1E09,},
2211 : {0x1E0A, 0x1E0B,},
2212 : {0x1E0C, 0x1E0D,},
2213 : {0x1E0E, 0x1E0F,},
2214 : {0x1E10, 0x1E11,},
2215 : {0x1E12, 0x1E13,},
2216 : {0x1E14, 0x1E15,},
2217 : {0x1E16, 0x1E17,},
2218 : {0x1E18, 0x1E19,},
2219 : {0x1E1A, 0x1E1B,},
2220 : {0x1E1C, 0x1E1D,},
2221 : {0x1E1E, 0x1E1F,},
2222 : {0x1E20, 0x1E21,},
2223 : {0x1E22, 0x1E23,},
2224 : {0x1E24, 0x1E25,},
2225 : {0x1E26, 0x1E27,},
2226 : {0x1E28, 0x1E29,},
2227 : {0x1E2A, 0x1E2B,},
2228 : {0x1E2C, 0x1E2D,},
2229 : {0x1E2E, 0x1E2F,},
2230 : {0x1E30, 0x1E31,},
2231 : {0x1E32, 0x1E33,},
2232 : {0x1E34, 0x1E35,},
2233 : {0x1E36, 0x1E37,},
2234 : {0x1E38, 0x1E39,},
2235 : {0x1E3A, 0x1E3B,},
2236 : {0x1E3C, 0x1E3D,},
2237 : {0x1E3E, 0x1E3F,},
2238 : {0x1E40, 0x1E41,},
2239 : {0x1E42, 0x1E43,},
2240 : {0x1E44, 0x1E45,},
2241 : {0x1E46, 0x1E47,},
2242 : {0x1E48, 0x1E49,},
2243 : {0x1E4A, 0x1E4B,},
2244 : {0x1E4C, 0x1E4D,},
2245 : {0x1E4E, 0x1E4F,},
2246 : {0x1E50, 0x1E51,},
2247 : {0x1E52, 0x1E53,},
2248 : {0x1E54, 0x1E55,},
2249 : {0x1E56, 0x1E57,},
2250 : {0x1E58, 0x1E59,},
2251 : {0x1E5A, 0x1E5B,},
2252 : {0x1E5C, 0x1E5D,},
2253 : {0x1E5E, 0x1E5F,},
2254 : {0x1E60, 0x1E61,},
2255 : {0x1E62, 0x1E63,},
2256 : {0x1E64, 0x1E65,},
2257 : {0x1E66, 0x1E67,},
2258 : {0x1E68, 0x1E69,},
2259 : {0x1E6A, 0x1E6B,},
2260 : {0x1E6C, 0x1E6D,},
2261 : {0x1E6E, 0x1E6F,},
2262 : {0x1E70, 0x1E71,},
2263 : {0x1E72, 0x1E73,},
2264 : {0x1E74, 0x1E75,},
2265 : {0x1E76, 0x1E77,},
2266 : {0x1E78, 0x1E79,},
2267 : {0x1E7A, 0x1E7B,},
2268 : {0x1E7C, 0x1E7D,},
2269 : {0x1E7E, 0x1E7F,},
2270 : {0x1E80, 0x1E81,},
2271 : {0x1E82, 0x1E83,},
2272 : {0x1E84, 0x1E85,},
2273 : {0x1E86, 0x1E87,},
2274 : {0x1E88, 0x1E89,},
2275 : {0x1E8A, 0x1E8B,},
2276 : {0x1E8C, 0x1E8D,},
2277 : {0x1E8E, 0x1E8F,},
2278 : {0x1E90, 0x1E91,},
2279 : {0x1E92, 0x1E93,},
2280 : {0x1E94, 0x1E95,},
2281 : {0x1E9E, 0x00DF,},
2282 : {0x1EA0, 0x1EA1,},
2283 : {0x1EA2, 0x1EA3,},
2284 : {0x1EA4, 0x1EA5,},
2285 : {0x1EA6, 0x1EA7,},
2286 : {0x1EA8, 0x1EA9,},
2287 : {0x1EAA, 0x1EAB,},
2288 : {0x1EAC, 0x1EAD,},
2289 : {0x1EAE, 0x1EAF,},
2290 : {0x1EB0, 0x1EB1,},
2291 : {0x1EB2, 0x1EB3,},
2292 : {0x1EB4, 0x1EB5,},
2293 : {0x1EB6, 0x1EB7,},
2294 : {0x1EB8, 0x1EB9,},
2295 : {0x1EBA, 0x1EBB,},
2296 : {0x1EBC, 0x1EBD,},
2297 : {0x1EBE, 0x1EBF,},
2298 : {0x1EC0, 0x1EC1,},
2299 : {0x1EC2, 0x1EC3,},
2300 : {0x1EC4, 0x1EC5,},
2301 : {0x1EC6, 0x1EC7,},
2302 : {0x1EC8, 0x1EC9,},
2303 : {0x1ECA, 0x1ECB,},
2304 : {0x1ECC, 0x1ECD,},
2305 : {0x1ECE, 0x1ECF,},
2306 : {0x1ED0, 0x1ED1,},
2307 : {0x1ED2, 0x1ED3,},
2308 : {0x1ED4, 0x1ED5,},
2309 : {0x1ED6, 0x1ED7,},
2310 : {0x1ED8, 0x1ED9,},
2311 : {0x1EDA, 0x1EDB,},
2312 : {0x1EDC, 0x1EDD,},
2313 : {0x1EDE, 0x1EDF,},
2314 : {0x1EE0, 0x1EE1,},
2315 : {0x1EE2, 0x1EE3,},
2316 : {0x1EE4, 0x1EE5,},
2317 : {0x1EE6, 0x1EE7,},
2318 : {0x1EE8, 0x1EE9,},
2319 : {0x1EEA, 0x1EEB,},
2320 : {0x1EEC, 0x1EED,},
2321 : {0x1EEE, 0x1EEF,},
2322 : {0x1EF0, 0x1EF1,},
2323 : {0x1EF2, 0x1EF3,},
2324 : {0x1EF4, 0x1EF5,},
2325 : {0x1EF6, 0x1EF7,},
2326 : {0x1EF8, 0x1EF9,},
2327 : {0x1EFA, 0x1EFB,},
2328 : {0x1EFC, 0x1EFD,},
2329 : {0x1EFE, 0x1EFF,},
2330 : {0x1F08, 0x1F00,},
2331 : {0x1F09, 0x1F01,},
2332 : {0x1F0A, 0x1F02,},
2333 : {0x1F0B, 0x1F03,},
2334 : {0x1F0C, 0x1F04,},
2335 : {0x1F0D, 0x1F05,},
2336 : {0x1F0E, 0x1F06,},
2337 : {0x1F0F, 0x1F07,},
2338 : {0x1F18, 0x1F10,},
2339 : {0x1F19, 0x1F11,},
2340 : {0x1F1A, 0x1F12,},
2341 : {0x1F1B, 0x1F13,},
2342 : {0x1F1C, 0x1F14,},
2343 : {0x1F1D, 0x1F15,},
2344 : {0x1F28, 0x1F20,},
2345 : {0x1F29, 0x1F21,},
2346 : {0x1F2A, 0x1F22,},
2347 : {0x1F2B, 0x1F23,},
2348 : {0x1F2C, 0x1F24,},
2349 : {0x1F2D, 0x1F25,},
2350 : {0x1F2E, 0x1F26,},
2351 : {0x1F2F, 0x1F27,},
2352 : {0x1F38, 0x1F30,},
2353 : {0x1F39, 0x1F31,},
2354 : {0x1F3A, 0x1F32,},
2355 : {0x1F3B, 0x1F33,},
2356 : {0x1F3C, 0x1F34,},
2357 : {0x1F3D, 0x1F35,},
2358 : {0x1F3E, 0x1F36,},
2359 : {0x1F3F, 0x1F37,},
2360 : {0x1F48, 0x1F40,},
2361 : {0x1F49, 0x1F41,},
2362 : {0x1F4A, 0x1F42,},
2363 : {0x1F4B, 0x1F43,},
2364 : {0x1F4C, 0x1F44,},
2365 : {0x1F4D, 0x1F45,},
2366 : {0x1F59, 0x1F51,},
2367 : {0x1F5B, 0x1F53,},
2368 : {0x1F5D, 0x1F55,},
2369 : {0x1F5F, 0x1F57,},
2370 : {0x1F68, 0x1F60,},
2371 : {0x1F69, 0x1F61,},
2372 : {0x1F6A, 0x1F62,},
2373 : {0x1F6B, 0x1F63,},
2374 : {0x1F6C, 0x1F64,},
2375 : {0x1F6D, 0x1F65,},
2376 : {0x1F6E, 0x1F66,},
2377 : {0x1F6F, 0x1F67,},
2378 : {0x1F88, 0x1F80,},
2379 : {0x1F89, 0x1F81,},
2380 : {0x1F8A, 0x1F82,},
2381 : {0x1F8B, 0x1F83,},
2382 : {0x1F8C, 0x1F84,},
2383 : {0x1F8D, 0x1F85,},
2384 : {0x1F8E, 0x1F86,},
2385 : {0x1F8F, 0x1F87,},
2386 : {0x1F98, 0x1F90,},
2387 : {0x1F99, 0x1F91,},
2388 : {0x1F9A, 0x1F92,},
2389 : {0x1F9B, 0x1F93,},
2390 : {0x1F9C, 0x1F94,},
2391 : {0x1F9D, 0x1F95,},
2392 : {0x1F9E, 0x1F96,},
2393 : {0x1F9F, 0x1F97,},
2394 : {0x1FA8, 0x1FA0,},
2395 : {0x1FA9, 0x1FA1,},
2396 : {0x1FAA, 0x1FA2,},
2397 : {0x1FAB, 0x1FA3,},
2398 : {0x1FAC, 0x1FA4,},
2399 : {0x1FAD, 0x1FA5,},
2400 : {0x1FAE, 0x1FA6,},
2401 : {0x1FAF, 0x1FA7,},
2402 : {0x1FB8, 0x1FB0,},
2403 : {0x1FB9, 0x1FB1,},
2404 : {0x1FBA, 0x1F70,},
2405 : {0x1FBB, 0x1F71,},
2406 : {0x1FBC, 0x1FB3,},
2407 : {0x1FC8, 0x1F72,},
2408 : {0x1FC9, 0x1F73,},
2409 : {0x1FCA, 0x1F74,},
2410 : {0x1FCB, 0x1F75,},
2411 : {0x1FCC, 0x1FC3,},
2412 : {0x1FD8, 0x1FD0,},
2413 : {0x1FD9, 0x1FD1,},
2414 : {0x1FDA, 0x1F76,},
2415 : {0x1FDB, 0x1F77,},
2416 : {0x1FE8, 0x1FE0,},
2417 : {0x1FE9, 0x1FE1,},
2418 : {0x1FEA, 0x1F7A,},
2419 : {0x1FEB, 0x1F7B,},
2420 : {0x1FEC, 0x1FE5,},
2421 : {0x1FF8, 0x1F78,},
2422 : {0x1FF9, 0x1F79,},
2423 : {0x1FFA, 0x1F7C,},
2424 : {0x1FFB, 0x1F7D,},
2425 : {0x1FFC, 0x1FF3,},
2426 : {0x2126, 0x03C9,},
2427 : {0x212A, 0x006B,},
2428 : {0x212B, 0x00E5,},
2429 : {0x2132, 0x214E,},
2430 : {0x2160, 0x2170,},
2431 : {0x2161, 0x2171,},
2432 : {0x2162, 0x2172,},
2433 : {0x2163, 0x2173,},
2434 : {0x2164, 0x2174,},
2435 : {0x2165, 0x2175,},
2436 : {0x2166, 0x2176,},
2437 : {0x2167, 0x2177,},
2438 : {0x2168, 0x2178,},
2439 : {0x2169, 0x2179,},
2440 : {0x216A, 0x217A,},
2441 : {0x216B, 0x217B,},
2442 : {0x216C, 0x217C,},
2443 : {0x216D, 0x217D,},
2444 : {0x216E, 0x217E,},
2445 : {0x216F, 0x217F,},
2446 : {0x2183, 0x2184,},
2447 : {0x24B6, 0x24D0,},
2448 : {0x24B7, 0x24D1,},
2449 : {0x24B8, 0x24D2,},
2450 : {0x24B9, 0x24D3,},
2451 : {0x24BA, 0x24D4,},
2452 : {0x24BB, 0x24D5,},
2453 : {0x24BC, 0x24D6,},
2454 : {0x24BD, 0x24D7,},
2455 : {0x24BE, 0x24D8,},
2456 : {0x24BF, 0x24D9,},
2457 : {0x24C0, 0x24DA,},
2458 : {0x24C1, 0x24DB,},
2459 : {0x24C2, 0x24DC,},
2460 : {0x24C3, 0x24DD,},
2461 : {0x24C4, 0x24DE,},
2462 : {0x24C5, 0x24DF,},
2463 : {0x24C6, 0x24E0,},
2464 : {0x24C7, 0x24E1,},
2465 : {0x24C8, 0x24E2,},
2466 : {0x24C9, 0x24E3,},
2467 : {0x24CA, 0x24E4,},
2468 : {0x24CB, 0x24E5,},
2469 : {0x24CC, 0x24E6,},
2470 : {0x24CD, 0x24E7,},
2471 : {0x24CE, 0x24E8,},
2472 : {0x24CF, 0x24E9,},
2473 : {0x2C00, 0x2C30,},
2474 : {0x2C01, 0x2C31,},
2475 : {0x2C02, 0x2C32,},
2476 : {0x2C03, 0x2C33,},
2477 : {0x2C04, 0x2C34,},
2478 : {0x2C05, 0x2C35,},
2479 : {0x2C06, 0x2C36,},
2480 : {0x2C07, 0x2C37,},
2481 : {0x2C08, 0x2C38,},
2482 : {0x2C09, 0x2C39,},
2483 : {0x2C0A, 0x2C3A,},
2484 : {0x2C0B, 0x2C3B,},
2485 : {0x2C0C, 0x2C3C,},
2486 : {0x2C0D, 0x2C3D,},
2487 : {0x2C0E, 0x2C3E,},
2488 : {0x2C0F, 0x2C3F,},
2489 : {0x2C10, 0x2C40,},
2490 : {0x2C11, 0x2C41,},
2491 : {0x2C12, 0x2C42,},
2492 : {0x2C13, 0x2C43,},
2493 : {0x2C14, 0x2C44,},
2494 : {0x2C15, 0x2C45,},
2495 : {0x2C16, 0x2C46,},
2496 : {0x2C17, 0x2C47,},
2497 : {0x2C18, 0x2C48,},
2498 : {0x2C19, 0x2C49,},
2499 : {0x2C1A, 0x2C4A,},
2500 : {0x2C1B, 0x2C4B,},
2501 : {0x2C1C, 0x2C4C,},
2502 : {0x2C1D, 0x2C4D,},
2503 : {0x2C1E, 0x2C4E,},
2504 : {0x2C1F, 0x2C4F,},
2505 : {0x2C20, 0x2C50,},
2506 : {0x2C21, 0x2C51,},
2507 : {0x2C22, 0x2C52,},
2508 : {0x2C23, 0x2C53,},
2509 : {0x2C24, 0x2C54,},
2510 : {0x2C25, 0x2C55,},
2511 : {0x2C26, 0x2C56,},
2512 : {0x2C27, 0x2C57,},
2513 : {0x2C28, 0x2C58,},
2514 : {0x2C29, 0x2C59,},
2515 : {0x2C2A, 0x2C5A,},
2516 : {0x2C2B, 0x2C5B,},
2517 : {0x2C2C, 0x2C5C,},
2518 : {0x2C2D, 0x2C5D,},
2519 : {0x2C2E, 0x2C5E,},
2520 : {0x2C2F, 0x2C5F,},
2521 : {0x2C60, 0x2C61,},
2522 : {0x2C62, 0x026B,},
2523 : {0x2C63, 0x1D7D,},
2524 : {0x2C64, 0x027D,},
2525 : {0x2C67, 0x2C68,},
2526 : {0x2C69, 0x2C6A,},
2527 : {0x2C6B, 0x2C6C,},
2528 : {0x2C6D, 0x0251,},
2529 : {0x2C6E, 0x0271,},
2530 : {0x2C6F, 0x0250,},
2531 : {0x2C70, 0x0252,},
2532 : {0x2C72, 0x2C73,},
2533 : {0x2C75, 0x2C76,},
2534 : {0x2C7E, 0x023F,},
2535 : {0x2C7F, 0x0240,},
2536 : {0x2C80, 0x2C81,},
2537 : {0x2C82, 0x2C83,},
2538 : {0x2C84, 0x2C85,},
2539 : {0x2C86, 0x2C87,},
2540 : {0x2C88, 0x2C89,},
2541 : {0x2C8A, 0x2C8B,},
2542 : {0x2C8C, 0x2C8D,},
2543 : {0x2C8E, 0x2C8F,},
2544 : {0x2C90, 0x2C91,},
2545 : {0x2C92, 0x2C93,},
2546 : {0x2C94, 0x2C95,},
2547 : {0x2C96, 0x2C97,},
2548 : {0x2C98, 0x2C99,},
2549 : {0x2C9A, 0x2C9B,},
2550 : {0x2C9C, 0x2C9D,},
2551 : {0x2C9E, 0x2C9F,},
2552 : {0x2CA0, 0x2CA1,},
2553 : {0x2CA2, 0x2CA3,},
2554 : {0x2CA4, 0x2CA5,},
2555 : {0x2CA6, 0x2CA7,},
2556 : {0x2CA8, 0x2CA9,},
2557 : {0x2CAA, 0x2CAB,},
2558 : {0x2CAC, 0x2CAD,},
2559 : {0x2CAE, 0x2CAF,},
2560 : {0x2CB0, 0x2CB1,},
2561 : {0x2CB2, 0x2CB3,},
2562 : {0x2CB4, 0x2CB5,},
2563 : {0x2CB6, 0x2CB7,},
2564 : {0x2CB8, 0x2CB9,},
2565 : {0x2CBA, 0x2CBB,},
2566 : {0x2CBC, 0x2CBD,},
2567 : {0x2CBE, 0x2CBF,},
2568 : {0x2CC0, 0x2CC1,},
2569 : {0x2CC2, 0x2CC3,},
2570 : {0x2CC4, 0x2CC5,},
2571 : {0x2CC6, 0x2CC7,},
2572 : {0x2CC8, 0x2CC9,},
2573 : {0x2CCA, 0x2CCB,},
2574 : {0x2CCC, 0x2CCD,},
2575 : {0x2CCE, 0x2CCF,},
2576 : {0x2CD0, 0x2CD1,},
2577 : {0x2CD2, 0x2CD3,},
2578 : {0x2CD4, 0x2CD5,},
2579 : {0x2CD6, 0x2CD7,},
2580 : {0x2CD8, 0x2CD9,},
2581 : {0x2CDA, 0x2CDB,},
2582 : {0x2CDC, 0x2CDD,},
2583 : {0x2CDE, 0x2CDF,},
2584 : {0x2CE0, 0x2CE1,},
2585 : {0x2CE2, 0x2CE3,},
2586 : {0x2CEB, 0x2CEC,},
2587 : {0x2CED, 0x2CEE,},
2588 : {0x2CF2, 0x2CF3,},
2589 : {0xA640, 0xA641,},
2590 : {0xA642, 0xA643,},
2591 : {0xA644, 0xA645,},
2592 : {0xA646, 0xA647,},
2593 : {0xA648, 0xA649,},
2594 : {0xA64A, 0xA64B,},
2595 : {0xA64C, 0xA64D,},
2596 : {0xA64E, 0xA64F,},
2597 : {0xA650, 0xA651,},
2598 : {0xA652, 0xA653,},
2599 : {0xA654, 0xA655,},
2600 : {0xA656, 0xA657,},
2601 : {0xA658, 0xA659,},
2602 : {0xA65A, 0xA65B,},
2603 : {0xA65C, 0xA65D,},
2604 : {0xA65E, 0xA65F,},
2605 : {0xA660, 0xA661,},
2606 : {0xA662, 0xA663,},
2607 : {0xA664, 0xA665,},
2608 : {0xA666, 0xA667,},
2609 : {0xA668, 0xA669,},
2610 : {0xA66A, 0xA66B,},
2611 : {0xA66C, 0xA66D,},
2612 : {0xA680, 0xA681,},
2613 : {0xA682, 0xA683,},
2614 : {0xA684, 0xA685,},
2615 : {0xA686, 0xA687,},
2616 : {0xA688, 0xA689,},
2617 : {0xA68A, 0xA68B,},
2618 : {0xA68C, 0xA68D,},
2619 : {0xA68E, 0xA68F,},
2620 : {0xA690, 0xA691,},
2621 : {0xA692, 0xA693,},
2622 : {0xA694, 0xA695,},
2623 : {0xA696, 0xA697,},
2624 : {0xA698, 0xA699,},
2625 : {0xA69A, 0xA69B,},
2626 : {0xA722, 0xA723,},
2627 : {0xA724, 0xA725,},
2628 : {0xA726, 0xA727,},
2629 : {0xA728, 0xA729,},
2630 : {0xA72A, 0xA72B,},
2631 : {0xA72C, 0xA72D,},
2632 : {0xA72E, 0xA72F,},
2633 : {0xA732, 0xA733,},
2634 : {0xA734, 0xA735,},
2635 : {0xA736, 0xA737,},
2636 : {0xA738, 0xA739,},
2637 : {0xA73A, 0xA73B,},
2638 : {0xA73C, 0xA73D,},
2639 : {0xA73E, 0xA73F,},
2640 : {0xA740, 0xA741,},
2641 : {0xA742, 0xA743,},
2642 : {0xA744, 0xA745,},
2643 : {0xA746, 0xA747,},
2644 : {0xA748, 0xA749,},
2645 : {0xA74A, 0xA74B,},
2646 : {0xA74C, 0xA74D,},
2647 : {0xA74E, 0xA74F,},
2648 : {0xA750, 0xA751,},
2649 : {0xA752, 0xA753,},
2650 : {0xA754, 0xA755,},
2651 : {0xA756, 0xA757,},
2652 : {0xA758, 0xA759,},
2653 : {0xA75A, 0xA75B,},
2654 : {0xA75C, 0xA75D,},
2655 : {0xA75E, 0xA75F,},
2656 : {0xA760, 0xA761,},
2657 : {0xA762, 0xA763,},
2658 : {0xA764, 0xA765,},
2659 : {0xA766, 0xA767,},
2660 : {0xA768, 0xA769,},
2661 : {0xA76A, 0xA76B,},
2662 : {0xA76C, 0xA76D,},
2663 : {0xA76E, 0xA76F,},
2664 : {0xA779, 0xA77A,},
2665 : {0xA77B, 0xA77C,},
2666 : {0xA77D, 0x1D79,},
2667 : {0xA77E, 0xA77F,},
2668 : {0xA780, 0xA781,},
2669 : {0xA782, 0xA783,},
2670 : {0xA784, 0xA785,},
2671 : {0xA786, 0xA787,},
2672 : {0xA78B, 0xA78C,},
2673 : {0xA78D, 0x0265,},
2674 : {0xA790, 0xA791,},
2675 : {0xA792, 0xA793,},
2676 : {0xA796, 0xA797,},
2677 : {0xA798, 0xA799,},
2678 : {0xA79A, 0xA79B,},
2679 : {0xA79C, 0xA79D,},
2680 : {0xA79E, 0xA79F,},
2681 : {0xA7A0, 0xA7A1,},
2682 : {0xA7A2, 0xA7A3,},
2683 : {0xA7A4, 0xA7A5,},
2684 : {0xA7A6, 0xA7A7,},
2685 : {0xA7A8, 0xA7A9,},
2686 : {0xA7AA, 0x0266,},
2687 : {0xA7AB, 0x025C,},
2688 : {0xA7AC, 0x0261,},
2689 : {0xA7AD, 0x026C,},
2690 : {0xA7AE, 0x026A,},
2691 : {0xA7B0, 0x029E,},
2692 : {0xA7B1, 0x0287,},
2693 : {0xA7B2, 0x029D,},
2694 : {0xA7B3, 0xAB53,},
2695 : {0xA7B4, 0xA7B5,},
2696 : {0xA7B6, 0xA7B7,},
2697 : {0xA7B8, 0xA7B9,},
2698 : {0xA7BA, 0xA7BB,},
2699 : {0xA7BC, 0xA7BD,},
2700 : {0xA7BE, 0xA7BF,},
2701 : {0xA7C0, 0xA7C1,},
2702 : {0xA7C2, 0xA7C3,},
2703 : {0xA7C4, 0xA794,},
2704 : {0xA7C5, 0x0282,},
2705 : {0xA7C6, 0x1D8E,},
2706 : {0xA7C7, 0xA7C8,},
2707 : {0xA7C9, 0xA7CA,},
2708 : {0xA7D0, 0xA7D1,},
2709 : {0xA7D6, 0xA7D7,},
2710 : {0xA7D8, 0xA7D9,},
2711 : {0xA7F5, 0xA7F6,},
2712 : {0xFF21, 0xFF41,},
2713 : {0xFF22, 0xFF42,},
2714 : {0xFF23, 0xFF43,},
2715 : {0xFF24, 0xFF44,},
2716 : {0xFF25, 0xFF45,},
2717 : {0xFF26, 0xFF46,},
2718 : {0xFF27, 0xFF47,},
2719 : {0xFF28, 0xFF48,},
2720 : {0xFF29, 0xFF49,},
2721 : {0xFF2A, 0xFF4A,},
2722 : {0xFF2B, 0xFF4B,},
2723 : {0xFF2C, 0xFF4C,},
2724 : {0xFF2D, 0xFF4D,},
2725 : {0xFF2E, 0xFF4E,},
2726 : {0xFF2F, 0xFF4F,},
2727 : {0xFF30, 0xFF50,},
2728 : {0xFF31, 0xFF51,},
2729 : {0xFF32, 0xFF52,},
2730 : {0xFF33, 0xFF53,},
2731 : {0xFF34, 0xFF54,},
2732 : {0xFF35, 0xFF55,},
2733 : {0xFF36, 0xFF56,},
2734 : {0xFF37, 0xFF57,},
2735 : {0xFF38, 0xFF58,},
2736 : {0xFF39, 0xFF59,},
2737 : {0xFF3A, 0xFF5A,},
2738 : {0x10400, 0x10428,},
2739 : {0x10401, 0x10429,},
2740 : {0x10402, 0x1042A,},
2741 : {0x10403, 0x1042B,},
2742 : {0x10404, 0x1042C,},
2743 : {0x10405, 0x1042D,},
2744 : {0x10406, 0x1042E,},
2745 : {0x10407, 0x1042F,},
2746 : {0x10408, 0x10430,},
2747 : {0x10409, 0x10431,},
2748 : {0x1040A, 0x10432,},
2749 : {0x1040B, 0x10433,},
2750 : {0x1040C, 0x10434,},
2751 : {0x1040D, 0x10435,},
2752 : {0x1040E, 0x10436,},
2753 : {0x1040F, 0x10437,},
2754 : {0x10410, 0x10438,},
2755 : {0x10411, 0x10439,},
2756 : {0x10412, 0x1043A,},
2757 : {0x10413, 0x1043B,},
2758 : {0x10414, 0x1043C,},
2759 : {0x10415, 0x1043D,},
2760 : {0x10416, 0x1043E,},
2761 : {0x10417, 0x1043F,},
2762 : {0x10418, 0x10440,},
2763 : {0x10419, 0x10441,},
2764 : {0x1041A, 0x10442,},
2765 : {0x1041B, 0x10443,},
2766 : {0x1041C, 0x10444,},
2767 : {0x1041D, 0x10445,},
2768 : {0x1041E, 0x10446,},
2769 : {0x1041F, 0x10447,},
2770 : {0x10420, 0x10448,},
2771 : {0x10421, 0x10449,},
2772 : {0x10422, 0x1044A,},
2773 : {0x10423, 0x1044B,},
2774 : {0x10424, 0x1044C,},
2775 : {0x10425, 0x1044D,},
2776 : {0x10426, 0x1044E,},
2777 : {0x10427, 0x1044F,},
2778 : {0x104B0, 0x104D8,},
2779 : {0x104B1, 0x104D9,},
2780 : {0x104B2, 0x104DA,},
2781 : {0x104B3, 0x104DB,},
2782 : {0x104B4, 0x104DC,},
2783 : {0x104B5, 0x104DD,},
2784 : {0x104B6, 0x104DE,},
2785 : {0x104B7, 0x104DF,},
2786 : {0x104B8, 0x104E0,},
2787 : {0x104B9, 0x104E1,},
2788 : {0x104BA, 0x104E2,},
2789 : {0x104BB, 0x104E3,},
2790 : {0x104BC, 0x104E4,},
2791 : {0x104BD, 0x104E5,},
2792 : {0x104BE, 0x104E6,},
2793 : {0x104BF, 0x104E7,},
2794 : {0x104C0, 0x104E8,},
2795 : {0x104C1, 0x104E9,},
2796 : {0x104C2, 0x104EA,},
2797 : {0x104C3, 0x104EB,},
2798 : {0x104C4, 0x104EC,},
2799 : {0x104C5, 0x104ED,},
2800 : {0x104C6, 0x104EE,},
2801 : {0x104C7, 0x104EF,},
2802 : {0x104C8, 0x104F0,},
2803 : {0x104C9, 0x104F1,},
2804 : {0x104CA, 0x104F2,},
2805 : {0x104CB, 0x104F3,},
2806 : {0x104CC, 0x104F4,},
2807 : {0x104CD, 0x104F5,},
2808 : {0x104CE, 0x104F6,},
2809 : {0x104CF, 0x104F7,},
2810 : {0x104D0, 0x104F8,},
2811 : {0x104D1, 0x104F9,},
2812 : {0x104D2, 0x104FA,},
2813 : {0x104D3, 0x104FB,},
2814 : {0x10570, 0x10597,},
2815 : {0x10571, 0x10598,},
2816 : {0x10572, 0x10599,},
2817 : {0x10573, 0x1059A,},
2818 : {0x10574, 0x1059B,},
2819 : {0x10575, 0x1059C,},
2820 : {0x10576, 0x1059D,},
2821 : {0x10577, 0x1059E,},
2822 : {0x10578, 0x1059F,},
2823 : {0x10579, 0x105A0,},
2824 : {0x1057A, 0x105A1,},
2825 : {0x1057C, 0x105A3,},
2826 : {0x1057D, 0x105A4,},
2827 : {0x1057E, 0x105A5,},
2828 : {0x1057F, 0x105A6,},
2829 : {0x10580, 0x105A7,},
2830 : {0x10581, 0x105A8,},
2831 : {0x10582, 0x105A9,},
2832 : {0x10583, 0x105AA,},
2833 : {0x10584, 0x105AB,},
2834 : {0x10585, 0x105AC,},
2835 : {0x10586, 0x105AD,},
2836 : {0x10587, 0x105AE,},
2837 : {0x10588, 0x105AF,},
2838 : {0x10589, 0x105B0,},
2839 : {0x1058A, 0x105B1,},
2840 : {0x1058C, 0x105B3,},
2841 : {0x1058D, 0x105B4,},
2842 : {0x1058E, 0x105B5,},
2843 : {0x1058F, 0x105B6,},
2844 : {0x10590, 0x105B7,},
2845 : {0x10591, 0x105B8,},
2846 : {0x10592, 0x105B9,},
2847 : {0x10594, 0x105BB,},
2848 : {0x10595, 0x105BC,},
2849 : {0x10C80, 0x10CC0,},
2850 : {0x10C81, 0x10CC1,},
2851 : {0x10C82, 0x10CC2,},
2852 : {0x10C83, 0x10CC3,},
2853 : {0x10C84, 0x10CC4,},
2854 : {0x10C85, 0x10CC5,},
2855 : {0x10C86, 0x10CC6,},
2856 : {0x10C87, 0x10CC7,},
2857 : {0x10C88, 0x10CC8,},
2858 : {0x10C89, 0x10CC9,},
2859 : {0x10C8A, 0x10CCA,},
2860 : {0x10C8B, 0x10CCB,},
2861 : {0x10C8C, 0x10CCC,},
2862 : {0x10C8D, 0x10CCD,},
2863 : {0x10C8E, 0x10CCE,},
2864 : {0x10C8F, 0x10CCF,},
2865 : {0x10C90, 0x10CD0,},
2866 : {0x10C91, 0x10CD1,},
2867 : {0x10C92, 0x10CD2,},
2868 : {0x10C93, 0x10CD3,},
2869 : {0x10C94, 0x10CD4,},
2870 : {0x10C95, 0x10CD5,},
2871 : {0x10C96, 0x10CD6,},
2872 : {0x10C97, 0x10CD7,},
2873 : {0x10C98, 0x10CD8,},
2874 : {0x10C99, 0x10CD9,},
2875 : {0x10C9A, 0x10CDA,},
2876 : {0x10C9B, 0x10CDB,},
2877 : {0x10C9C, 0x10CDC,},
2878 : {0x10C9D, 0x10CDD,},
2879 : {0x10C9E, 0x10CDE,},
2880 : {0x10C9F, 0x10CDF,},
2881 : {0x10CA0, 0x10CE0,},
2882 : {0x10CA1, 0x10CE1,},
2883 : {0x10CA2, 0x10CE2,},
2884 : {0x10CA3, 0x10CE3,},
2885 : {0x10CA4, 0x10CE4,},
2886 : {0x10CA5, 0x10CE5,},
2887 : {0x10CA6, 0x10CE6,},
2888 : {0x10CA7, 0x10CE7,},
2889 : {0x10CA8, 0x10CE8,},
2890 : {0x10CA9, 0x10CE9,},
2891 : {0x10CAA, 0x10CEA,},
2892 : {0x10CAB, 0x10CEB,},
2893 : {0x10CAC, 0x10CEC,},
2894 : {0x10CAD, 0x10CED,},
2895 : {0x10CAE, 0x10CEE,},
2896 : {0x10CAF, 0x10CEF,},
2897 : {0x10CB0, 0x10CF0,},
2898 : {0x10CB1, 0x10CF1,},
2899 : {0x10CB2, 0x10CF2,},
2900 : {0x118A0, 0x118C0,},
2901 : {0x118A1, 0x118C1,},
2902 : {0x118A2, 0x118C2,},
2903 : {0x118A3, 0x118C3,},
2904 : {0x118A4, 0x118C4,},
2905 : {0x118A5, 0x118C5,},
2906 : {0x118A6, 0x118C6,},
2907 : {0x118A7, 0x118C7,},
2908 : {0x118A8, 0x118C8,},
2909 : {0x118A9, 0x118C9,},
2910 : {0x118AA, 0x118CA,},
2911 : {0x118AB, 0x118CB,},
2912 : {0x118AC, 0x118CC,},
2913 : {0x118AD, 0x118CD,},
2914 : {0x118AE, 0x118CE,},
2915 : {0x118AF, 0x118CF,},
2916 : {0x118B0, 0x118D0,},
2917 : {0x118B1, 0x118D1,},
2918 : {0x118B2, 0x118D2,},
2919 : {0x118B3, 0x118D3,},
2920 : {0x118B4, 0x118D4,},
2921 : {0x118B5, 0x118D5,},
2922 : {0x118B6, 0x118D6,},
2923 : {0x118B7, 0x118D7,},
2924 : {0x118B8, 0x118D8,},
2925 : {0x118B9, 0x118D9,},
2926 : {0x118BA, 0x118DA,},
2927 : {0x118BB, 0x118DB,},
2928 : {0x118BC, 0x118DC,},
2929 : {0x118BD, 0x118DD,},
2930 : {0x118BE, 0x118DE,},
2931 : {0x118BF, 0x118DF,},
2932 : {0x16E40, 0x16E60,},
2933 : {0x16E41, 0x16E61,},
2934 : {0x16E42, 0x16E62,},
2935 : {0x16E43, 0x16E63,},
2936 : {0x16E44, 0x16E64,},
2937 : {0x16E45, 0x16E65,},
2938 : {0x16E46, 0x16E66,},
2939 : {0x16E47, 0x16E67,},
2940 : {0x16E48, 0x16E68,},
2941 : {0x16E49, 0x16E69,},
2942 : {0x16E4A, 0x16E6A,},
2943 : {0x16E4B, 0x16E6B,},
2944 : {0x16E4C, 0x16E6C,},
2945 : {0x16E4D, 0x16E6D,},
2946 : {0x16E4E, 0x16E6E,},
2947 : {0x16E4F, 0x16E6F,},
2948 : {0x16E50, 0x16E70,},
2949 : {0x16E51, 0x16E71,},
2950 : {0x16E52, 0x16E72,},
2951 : {0x16E53, 0x16E73,},
2952 : {0x16E54, 0x16E74,},
2953 : {0x16E55, 0x16E75,},
2954 : {0x16E56, 0x16E76,},
2955 : {0x16E57, 0x16E77,},
2956 : {0x16E58, 0x16E78,},
2957 : {0x16E59, 0x16E79,},
2958 : {0x16E5A, 0x16E7A,},
2959 : {0x16E5B, 0x16E7B,},
2960 : {0x16E5C, 0x16E7C,},
2961 : {0x16E5D, 0x16E7D,},
2962 : {0x16E5E, 0x16E7E,},
2963 : {0x16E5F, 0x16E7F,},
2964 : {0x1E900, 0x1E922,},
2965 : {0x1E901, 0x1E923,},
2966 : {0x1E902, 0x1E924,},
2967 : {0x1E903, 0x1E925,},
2968 : {0x1E904, 0x1E926,},
2969 : {0x1E905, 0x1E927,},
2970 : {0x1E906, 0x1E928,},
2971 : {0x1E907, 0x1E929,},
2972 : {0x1E908, 0x1E92A,},
2973 : {0x1E909, 0x1E92B,},
2974 : {0x1E90A, 0x1E92C,},
2975 : {0x1E90B, 0x1E92D,},
2976 : {0x1E90C, 0x1E92E,},
2977 : {0x1E90D, 0x1E92F,},
2978 : {0x1E90E, 0x1E930,},
2979 : {0x1E90F, 0x1E931,},
2980 : {0x1E910, 0x1E932,},
2981 : {0x1E911, 0x1E933,},
2982 : {0x1E912, 0x1E934,},
2983 : {0x1E913, 0x1E935,},
2984 : {0x1E914, 0x1E936,},
2985 : {0x1E915, 0x1E937,},
2986 : {0x1E916, 0x1E938,},
2987 : {0x1E917, 0x1E939,},
2988 : {0x1E918, 0x1E93A,},
2989 : {0x1E919, 0x1E93B,},
2990 : {0x1E91A, 0x1E93C,},
2991 : {0x1E91B, 0x1E93D,},
2992 : {0x1E91C, 0x1E93E,},
2993 : {0x1E91D, 0x1E93F,},
2994 : {0x1E91E, 0x1E940,},
2995 : {0x1E91F, 0x1E941,},
2996 : {0x1E920, 0x1E942,},
2997 : {0x1E921, 0x1E943,},
2998 : };
2999 :
3000 : static BAT *UTF8_toUpperFrom = NULL, *UTF8_toUpperTo = NULL,
3001 : *UTF8_toLowerFrom = NULL, *UTF8_toLowerTo = NULL;
3002 :
3003 : static str
3004 336 : STRprelude(void)
3005 : {
3006 336 : if (UTF8_toUpperFrom == NULL) {
3007 336 : size_t i;
3008 :
3009 336 : UTF8_toUpperFrom = COLnew(0, TYPE_int,
3010 : sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]),
3011 : SYSTRANS);
3012 336 : UTF8_toUpperTo = COLnew(0, TYPE_int,
3013 : sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]),
3014 : SYSTRANS);
3015 336 : UTF8_toLowerFrom = COLnew(0, TYPE_int,
3016 : sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]),
3017 : SYSTRANS);
3018 336 : UTF8_toLowerTo = COLnew(0, TYPE_int,
3019 : sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]),
3020 : SYSTRANS);
3021 336 : if (UTF8_toUpperFrom == NULL || UTF8_toUpperTo == NULL
3022 336 : || UTF8_toLowerFrom == NULL || UTF8_toLowerTo == NULL) {
3023 0 : goto bailout;
3024 : }
3025 :
3026 336 : int *fp = (int *) Tloc(UTF8_toUpperFrom, 0);
3027 336 : int *tp = (int *) Tloc(UTF8_toUpperTo, 0);
3028 487536 : for (i = 0; i < sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]); i++) {
3029 487200 : fp[i] = UTF8_toUpper[i].from;
3030 487200 : tp[i] = UTF8_toUpper[i].to;
3031 : }
3032 336 : BATsetcount(UTF8_toUpperFrom, i);
3033 336 : UTF8_toUpperFrom->tkey = true;
3034 336 : UTF8_toUpperFrom->tsorted = true;
3035 336 : UTF8_toUpperFrom->trevsorted = false;
3036 336 : UTF8_toUpperFrom->tnil = false;
3037 336 : UTF8_toUpperFrom->tnonil = true;
3038 336 : BATsetcount(UTF8_toUpperTo, i);
3039 336 : UTF8_toUpperTo->tkey = false;
3040 336 : UTF8_toUpperTo->tsorted = false;
3041 336 : UTF8_toUpperTo->trevsorted = false;
3042 336 : UTF8_toUpperTo->tnil = false;
3043 336 : UTF8_toUpperTo->tnonil = true;
3044 :
3045 336 : fp = (int *) Tloc(UTF8_toLowerFrom, 0);
3046 336 : tp = (int *) Tloc(UTF8_toLowerTo, 0);
3047 481824 : for (i = 0; i < sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]); i++) {
3048 481488 : fp[i] = UTF8_toLower[i].from;
3049 481488 : tp[i] = UTF8_toLower[i].to;
3050 : }
3051 336 : BATsetcount(UTF8_toLowerFrom, i);
3052 336 : UTF8_toLowerFrom->tkey = true;
3053 336 : UTF8_toLowerFrom->tsorted = true;
3054 336 : UTF8_toLowerFrom->trevsorted = false;
3055 336 : UTF8_toLowerFrom->tnil = false;
3056 336 : UTF8_toLowerFrom->tnonil = true;
3057 336 : BATsetcount(UTF8_toLowerTo, i);
3058 336 : UTF8_toLowerTo->tkey = false;
3059 336 : UTF8_toLowerTo->tsorted = false;
3060 336 : UTF8_toLowerTo->trevsorted = false;
3061 336 : UTF8_toLowerTo->tnil = false;
3062 336 : UTF8_toLowerTo->tnonil = true;
3063 :
3064 672 : if (BBPrename(UTF8_toUpperFrom, "monet_unicode_upper_from") != 0 ||
3065 672 : BBPrename(UTF8_toUpperTo, "monet_unicode_upper_to") != 0 ||
3066 672 : BBPrename(UTF8_toLowerFrom, "monet_unicode_lower_from") != 0 ||
3067 336 : BBPrename(UTF8_toLowerTo, "monet_unicode_lower_to") != 0) {
3068 0 : goto bailout;
3069 : }
3070 336 : BBP_pid(UTF8_toUpperFrom->batCacheid) = 0;
3071 336 : BBP_pid(UTF8_toUpperTo->batCacheid) = 0;
3072 336 : BBP_pid(UTF8_toLowerFrom->batCacheid) = 0;
3073 336 : BBP_pid(UTF8_toLowerTo->batCacheid) = 0;
3074 : }
3075 : return MAL_SUCCEED;
3076 :
3077 0 : bailout:
3078 0 : BBPreclaim(UTF8_toUpperFrom);
3079 0 : BBPreclaim(UTF8_toUpperTo);
3080 0 : BBPreclaim(UTF8_toLowerFrom);
3081 0 : BBPreclaim(UTF8_toLowerTo);
3082 0 : UTF8_toUpperFrom = NULL;
3083 0 : UTF8_toUpperTo = NULL;
3084 0 : UTF8_toLowerFrom = NULL;
3085 0 : UTF8_toLowerTo = NULL;
3086 0 : throw(MAL, "str.prelude", GDK_EXCEPTION);
3087 : }
3088 :
3089 : static str
3090 334 : STRepilogue(void *ret)
3091 : {
3092 334 : (void) ret;
3093 334 : BBPreclaim(UTF8_toUpperFrom);
3094 334 : BBPreclaim(UTF8_toUpperTo);
3095 334 : BBPreclaim(UTF8_toLowerFrom);
3096 334 : BBPreclaim(UTF8_toLowerTo);
3097 334 : UTF8_toUpperFrom = NULL;
3098 334 : UTF8_toUpperTo = NULL;
3099 334 : UTF8_toLowerFrom = NULL;
3100 334 : UTF8_toLowerTo = NULL;
3101 334 : return MAL_SUCCEED;
3102 : }
3103 :
3104 : #ifndef NDEBUG
3105 : static inline void
3106 41995063 : UTF8_assert(const char *s)
3107 : {
3108 83990126 : assert(strNil(s) || utf8valid(s) == 0);
3109 41995063 : }
3110 : #else
3111 : #define UTF8_assert(s) ((void) 0)
3112 : #endif
3113 :
3114 : /* return how many codepoints in the substring end in s starts */
3115 : static inline int
3116 723 : UTF8_strpos(const char *s, const char *end)
3117 : {
3118 723 : UTF8_assert(s);
3119 :
3120 722 : if (s > end) {
3121 : return -1;
3122 : }
3123 722 : return (int) utf8nlen(s, (size_t) (end - s));
3124 : }
3125 :
3126 : /* return a pointer to the byte that starts the pos'th (0-based)
3127 : * codepoint in s */
3128 : static inline str
3129 7159739 : UTF8_strtail(const char *s, int pos)
3130 : {
3131 7159739 : UTF8_assert(s);
3132 102702937 : while (*s) {
3133 102250957 : if ((*s & 0xC0) != 0x80) {
3134 102250887 : if (pos <= 0)
3135 : break;
3136 95543128 : pos--;
3137 : }
3138 95543198 : s++;
3139 : }
3140 7653398 : return (str) s;
3141 : }
3142 :
3143 : /* copy n Unicode codepoints from s to dst, return pointer to new end */
3144 : static inline str
3145 216 : UTF8_strncpy(char *restrict dst, const char *restrict s, int n)
3146 : {
3147 216 : UTF8_assert(s);
3148 1451 : while (*s && n) {
3149 1235 : if ((*s & 0xF8) == 0xF0) {
3150 : /* 4 byte UTF-8 sequence */
3151 0 : *dst++ = *s++;
3152 0 : *dst++ = *s++;
3153 0 : *dst++ = *s++;
3154 0 : *dst++ = *s++;
3155 1235 : } else if ((*s & 0xF0) == 0xE0) {
3156 : /* 3 byte UTF-8 sequence */
3157 6 : *dst++ = *s++;
3158 6 : *dst++ = *s++;
3159 6 : *dst++ = *s++;
3160 1229 : } else if ((*s & 0xE0) == 0xC0) {
3161 : /* 2 byte UTF-8 sequence */
3162 0 : *dst++ = *s++;
3163 0 : *dst++ = *s++;
3164 : } else {
3165 : /* 1 byte UTF-8 "sequence" */
3166 1229 : *dst++ = *s++;
3167 : }
3168 1235 : n--;
3169 : }
3170 216 : *dst = '\0';
3171 216 : return dst;
3172 : }
3173 :
3174 : /* return number of Unicode codepoints in s; s is not nil */
3175 : int
3176 34811190 : UTF8_strlen(const char *s)
3177 : { /* This function assumes s is never nil */
3178 34811190 : UTF8_assert(s);
3179 69555262 : assert(!strNil(s));
3180 :
3181 34777631 : return (int) utf8len(s);
3182 : }
3183 :
3184 : /* return (int) strlen(s); s is not nil */
3185 : int
3186 8961 : str_strlen(const char *s)
3187 : { /* This function assumes s is never nil */
3188 8961 : UTF8_assert(s);
3189 18062 : assert(!strNil(s));
3190 :
3191 9031 : return (int) strlen(s);
3192 : }
3193 :
3194 : /* return the display width of s */
3195 : int
3196 5723019 : UTF8_strwidth(const char *s)
3197 : {
3198 5723019 : int len = 0;
3199 5723019 : int c;
3200 5723019 : int n;
3201 :
3202 5723019 : if (strNil(s))
3203 214261 : return int_nil;
3204 : c = 0;
3205 : n = 0;
3206 141751468 : while (*s != 0) {
3207 136242710 : if ((*s & 0x80) == 0) {
3208 136229514 : assert(n == 0);
3209 136229514 : len++;
3210 136229514 : n = 0;
3211 13196 : } else if ((*s & 0xC0) == 0x80) {
3212 8679 : c = (c << 6) | (*s & 0x3F);
3213 8679 : if (--n == 0) {
3214 : /* last byte of a multi-byte character */
3215 4517 : len++;
3216 : /* this list was created by combining
3217 : * the code points marked as
3218 : * Emoji_Presentation in
3219 : * /usr/share/unicode/emoji/emoji-data.txt
3220 : * and code points marked either F or
3221 : * W in EastAsianWidth.txt; this list
3222 : * is up-to-date with Unicode 9.0 */
3223 4517 : if ((0x1100 <= c && c <= 0x115F) ||
3224 4517 : (0x231A <= c && c <= 0x231B) ||
3225 : (0x2329 <= c && c <= 0x232A) ||
3226 : (0x23E9 <= c && c <= 0x23EC) ||
3227 : c == 0x23F0 ||
3228 : c == 0x23F3 ||
3229 : (0x25FD <= c && c <= 0x25FE) ||
3230 : (0x2614 <= c && c <= 0x2615) ||
3231 : (0x2648 <= c && c <= 0x2653) ||
3232 : c == 0x267F ||
3233 : c == 0x2693 ||
3234 : c == 0x26A1 ||
3235 : (0x26AA <= c && c <= 0x26AB) ||
3236 : (0x26BD <= c && c <= 0x26BE) ||
3237 : (0x26C4 <= c && c <= 0x26C5) ||
3238 : c == 0x26CE ||
3239 : c == 0x26D4 ||
3240 : c == 0x26EA ||
3241 : (0x26F2 <= c && c <= 0x26F3) ||
3242 : c == 0x26F5 ||
3243 : c == 0x26FA ||
3244 : c == 0x26FD ||
3245 : c == 0x2705 ||
3246 : (0x270A <= c && c <= 0x270B) ||
3247 : c == 0x2728 ||
3248 : c == 0x274C ||
3249 : c == 0x274E ||
3250 : (0x2753 <= c && c <= 0x2755) ||
3251 : c == 0x2757 ||
3252 : (0x2795 <= c && c <= 0x2797) ||
3253 : c == 0x27B0 ||
3254 : c == 0x27BF ||
3255 : (0x2B1B <= c && c <= 0x2B1C) ||
3256 : c == 0x2B50 ||
3257 : c == 0x2B55 ||
3258 : (0x2E80 <= c && c <= 0x2E99) ||
3259 : (0x2E9B <= c && c <= 0x2EF3) ||
3260 : (0x2F00 <= c && c <= 0x2FD5) ||
3261 : (0x2FF0 <= c && c <= 0x2FFB) ||
3262 : (0x3000 <= c && c <= 0x303E) ||
3263 : (0x3041 <= c && c <= 0x3096) ||
3264 : (0x3099 <= c && c <= 0x30FF) ||
3265 : (0x3105 <= c && c <= 0x312D) ||
3266 : (0x3131 <= c && c <= 0x318E) ||
3267 : (0x3190 <= c && c <= 0x31BA) ||
3268 : (0x31C0 <= c && c <= 0x31E3) ||
3269 : (0x31F0 <= c && c <= 0x321E) ||
3270 : (0x3220 <= c && c <= 0x3247) ||
3271 : (0x3250 <= c && c <= 0x32FE) ||
3272 : (0x3300 <= c && c <= 0x4DBF) ||
3273 : (0x4E00 <= c && c <= 0xA48C) ||
3274 : (0xA490 <= c && c <= 0xA4C6) ||
3275 : (0xA960 <= c && c <= 0xA97C) ||
3276 : (0xAC00 <= c && c <= 0xD7A3) ||
3277 : (0xF900 <= c && c <= 0xFAFF) ||
3278 : (0xFE10 <= c && c <= 0xFE19) ||
3279 : (0xFE30 <= c && c <= 0xFE52) ||
3280 : (0xFE54 <= c && c <= 0xFE66) ||
3281 : (0xFE68 <= c && c <= 0xFE6B) ||
3282 : (0xFF01 <= c && c <= 0xFF60) ||
3283 : (0xFFE0 <= c && c <= 0xFFE6) ||
3284 : c == 0x16FE0 ||
3285 : (0x17000 <= c && c <= 0x187EC) ||
3286 : (0x18800 <= c && c <= 0x18AF2) ||
3287 : (0x1B000 <= c && c <= 0x1B001) ||
3288 : c == 0x1F004 ||
3289 : c == 0x1F0CF ||
3290 : c == 0x1F18E || (0x1F191 <= c && c <= 0x1F19A) ||
3291 : /* removed 0x1F1E6..0x1F1FF */
3292 : (0x1F200 <= c && c <= 0x1F202) ||
3293 : (0x1F210 <= c && c <= 0x1F23B) ||
3294 : (0x1F240 <= c && c <= 0x1F248) ||
3295 : (0x1F250 <= c && c <= 0x1F251) ||
3296 : (0x1F300 <= c && c <= 0x1F320) ||
3297 : (0x1F32D <= c && c <= 0x1F335) ||
3298 : (0x1F337 <= c && c <= 0x1F37C) ||
3299 : (0x1F37E <= c && c <= 0x1F393) ||
3300 : (0x1F3A0 <= c && c <= 0x1F3CA) ||
3301 : (0x1F3CF <= c && c <= 0x1F3D3) ||
3302 : (0x1F3E0 <= c && c <= 0x1F3F0) ||
3303 : c == 0x1F3F4 ||
3304 : (0x1F3F8 <= c && c <= 0x1F43E) ||
3305 : c == 0x1F440 ||
3306 : (0x1F442 <= c && c <= 0x1F4FC) ||
3307 : (0x1F4FF <= c && c <= 0x1F53D) ||
3308 : (0x1F54B <= c && c <= 0x1F54E) ||
3309 : (0x1F550 <= c && c <= 0x1F567) ||
3310 : c == 0x1F57A ||
3311 : (0x1F595 <= c && c <= 0x1F596) ||
3312 : c == 0x1F5A4 ||
3313 : (0x1F5FB <= c && c <= 0x1F64F) ||
3314 : (0x1F680 <= c && c <= 0x1F6C5) ||
3315 : c == 0x1F6CC ||
3316 : (0x1F6D0 <= c && c <= 0x1F6D2) ||
3317 : (0x1F6EB <= c && c <= 0x1F6EC) ||
3318 : (0x1F6F4 <= c && c <= 0x1F6F6) ||
3319 : (0x1F910 <= c && c <= 0x1F91E) ||
3320 : (0x1F920 <= c && c <= 0x1F927) ||
3321 : c == 0x1F930 ||
3322 : (0x1F933 <= c && c <= 0x1F93E) ||
3323 : (0x1F940 <= c && c <= 0x1F94B) ||
3324 : (0x1F950 <= c && c <= 0x1F95E) ||
3325 : (0x1F980 <= c && c <= 0x1F991) ||
3326 : c == 0x1F9C0 ||
3327 : (0x20000 <= c && c <= 0x2FFFD) ||
3328 : (0x30000 <= c && c <= 0x3FFFD))
3329 84 : len++;
3330 : }
3331 4517 : } else if ((*s & 0xE0) == 0xC0) {
3332 402 : assert(n == 0);
3333 402 : n = 1;
3334 402 : c = *s & 0x1F;
3335 4115 : } else if ((*s & 0xF0) == 0xE0) {
3336 4068 : assert(n == 0);
3337 4068 : n = 2;
3338 4068 : c = *s & 0x0F;
3339 47 : } else if ((*s & 0xF8) == 0xF0) {
3340 47 : assert(n == 0);
3341 47 : n = 3;
3342 47 : c = *s & 0x07;
3343 0 : } else if ((*s & 0xFC) == 0xF8) {
3344 0 : assert(n == 0);
3345 0 : n = 4;
3346 0 : c = *s & 0x03;
3347 : } else {
3348 0 : assert(0);
3349 : n = 0;
3350 : }
3351 136242710 : s++;
3352 : }
3353 : return len;
3354 : }
3355 :
3356 : str
3357 108615 : str_case_hash_lock(bool upper)
3358 : {
3359 108615 : BAT *b = upper ? UTF8_toUpperFrom : UTF8_toLowerFrom;
3360 :
3361 108615 : if (BAThash(b) != GDK_SUCCEED)
3362 0 : throw(MAL, "str.str_case_hash_lock", GDK_EXCEPTION);
3363 108615 : MT_rwlock_rdlock(&b->thashlock);
3364 108615 : if (b->thash)
3365 : return MAL_SUCCEED;
3366 0 : MT_rwlock_rdunlock(&b->thashlock);
3367 0 : throw(MAL, "str.str_case_hash_lock", "Lost hash");
3368 : }
3369 :
3370 : void
3371 108615 : str_case_hash_unlock(bool upper)
3372 : {
3373 90 : BAT *b = upper ? UTF8_toUpperFrom : UTF8_toLowerFrom;
3374 90 : MT_rwlock_rdunlock(&b->thashlock);
3375 90 : }
3376 :
3377 : static inline str
3378 289684 : convertCase(BAT *from, BAT *to, str *buf, size_t *buflen, const char *src,
3379 : const char *malfunc)
3380 : {
3381 289684 : size_t len = strlen(src);
3382 289684 : char *dst;
3383 289684 : const char *end = src + len;
3384 289684 : bool lower_to_upper = from == UTF8_toUpperFrom;
3385 289684 : const Hash *h = from->thash;
3386 289684 : const int *restrict fromb = (const int *restrict) from->theap->base;
3387 289684 : const int *restrict tob = (const int *restrict) to->theap->base;
3388 :
3389 : /* the from and to bats are not views */
3390 289684 : assert(from->tbaseoff == 0);
3391 289684 : assert(to->tbaseoff == 0);
3392 289684 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len + 1, malfunc);
3393 289684 : dst = *buf;
3394 4183832 : while (src < end) {
3395 3894245 : int c;
3396 :
3397 3894245 : UTF8_GETCHAR(c, src);
3398 3894245 : if (c < 192) { /* the first 191 characters in unicode are trivial to convert */
3399 : /* for ASCII characters we don't need to do a hash lookup */
3400 3894176 : if (lower_to_upper) {
3401 3183181 : if ('a' <= c && c <= 'z')
3402 2492153 : c += 'A' - 'a';
3403 : } else {
3404 710995 : if ('A' <= c && c <= 'Z')
3405 670620 : c += 'a' - 'A';
3406 : }
3407 : } else {
3408 : /* use hash, even though BAT is sorted */
3409 72 : for (BUN hb = HASHget(h, hash_int(h, &c));
3410 114 : hb != BUN_NONE; hb = HASHgetlink(h, hb)) {
3411 62 : if (c == fromb[hb]) {
3412 17 : c = tob[hb];
3413 17 : break;
3414 : }
3415 : }
3416 : }
3417 3894245 : if (dst + UTF8_CHARLEN(c) > *buf + len) {
3418 : /* doesn't fit, so allocate more space;
3419 : * also allocate enough for the rest of the
3420 : * source */
3421 97 : size_t off = dst - *buf;
3422 97 : size_t nextlen = (len += 4 + (end - src)) + 1;
3423 :
3424 : /* Don't use CHECK_STR_BUFFER_LENGTH here, because it
3425 : * does GDKmalloc instead of GDKrealloc and data could be lost */
3426 97 : if (nextlen > *buflen) {
3427 97 : size_t newlen = ((nextlen + 1023) & ~1023); /* align to a multiple of 1024 bytes */
3428 97 : str newbuf = GDKrealloc(*buf, newlen);
3429 0 : if (!newbuf)
3430 0 : throw(MAL, malfunc, SQLSTATE(HY013) MAL_MALLOC_FAIL);
3431 0 : *buf = newbuf;
3432 0 : *buflen = newlen;
3433 : }
3434 0 : dst = *buf + off;
3435 : }
3436 3894148 : UTF8_PUTCHAR(c, dst);
3437 : }
3438 289587 : *dst = 0;
3439 289587 : return MAL_SUCCEED;
3440 0 : illegal:
3441 0 : throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
3442 : }
3443 :
3444 : /*
3445 : * Here you find the wrappers around the version 4 library code
3446 : * It also contains the direct implementation of the string
3447 : * matching support routines.
3448 : */
3449 : #include "mal_exception.h"
3450 :
3451 : /*
3452 : * The SQL like function return a boolean
3453 : */
3454 : static bool
3455 0 : STRlike(const char *s, const char *pat, const char *esc)
3456 : {
3457 0 : const char *t, *p;
3458 :
3459 0 : t = s;
3460 0 : for (p = pat; *p && *t; p++) {
3461 0 : if (esc && *p == *esc) {
3462 0 : p++;
3463 0 : if (*p != *t)
3464 : return false;
3465 0 : t++;
3466 0 : } else if (*p == '_')
3467 0 : t++;
3468 0 : else if (*p == '%') {
3469 0 : p++;
3470 0 : while (*p == '%')
3471 0 : p++;
3472 0 : if (*p == 0)
3473 : return true; /* tail is acceptable */
3474 0 : for (; *p && *t; t++)
3475 0 : if (STRlike(t, p, esc))
3476 : return true;
3477 0 : if (*p == 0 && *t == 0)
3478 : return true;
3479 : return false;
3480 0 : } else if (*p == *t)
3481 0 : t++;
3482 : else
3483 : return false;
3484 : }
3485 0 : if (*p == '%' && *(p + 1) == 0)
3486 : return true;
3487 0 : return *t == 0 && *p == 0;
3488 : }
3489 :
3490 : static str
3491 0 : STRlikewrap3(bit *ret, const str *s, const str *pat, const str *esc)
3492 : {
3493 0 : if (strNil(*s) || strNil(*pat) || strNil(*esc))
3494 0 : *ret = bit_nil;
3495 : else
3496 0 : *ret = (bit) STRlike(*s, *pat, *esc);
3497 0 : return MAL_SUCCEED;
3498 : }
3499 :
3500 : static str
3501 0 : STRlikewrap(bit *ret, const str *s, const str *pat)
3502 : {
3503 0 : if (strNil(*s) || strNil(*pat))
3504 0 : *ret = bit_nil;
3505 : else
3506 0 : *ret = (bit) STRlike(*s, *pat, NULL);
3507 0 : return MAL_SUCCEED;
3508 : }
3509 :
3510 : static str
3511 0 : STRtostr(str *res, const str *src)
3512 : {
3513 0 : if (*src == 0)
3514 0 : *res = GDKstrdup(str_nil);
3515 : else
3516 0 : *res = GDKstrdup(*src);
3517 0 : if (*res == NULL)
3518 0 : throw(MAL, "str.str", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3519 : return MAL_SUCCEED;
3520 : }
3521 :
3522 : static str
3523 91 : STRLength(int *res, const str *arg1)
3524 : {
3525 91 : const char *s = *arg1;
3526 :
3527 182 : *res = strNil(s) ? int_nil : UTF8_strlen(s);
3528 91 : return MAL_SUCCEED;
3529 : }
3530 :
3531 : static str
3532 3 : STRBytes(int *res, const str *arg1)
3533 : {
3534 3 : const char *s = *arg1;
3535 :
3536 6 : *res = strNil(s) ? int_nil : str_strlen(s);
3537 3 : return MAL_SUCCEED;
3538 : }
3539 :
3540 : str
3541 4262 : str_tail(str *buf, size_t *buflen, const char *s, int off)
3542 : {
3543 4262 : if (off < 0) {
3544 1 : off += UTF8_strlen(s);
3545 1 : if (off < 0)
3546 : off = 0;
3547 : }
3548 4262 : char *tail = UTF8_strtail(s, off);
3549 4262 : size_t nextlen = strlen(tail) + 1;
3550 4262 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.tail");
3551 4262 : strcpy(*buf, tail);
3552 4262 : return MAL_SUCCEED;
3553 : }
3554 :
3555 : static str
3556 1 : STRTail(str *res, const str *arg1, const int *offset)
3557 : {
3558 1 : str buf = NULL, msg = MAL_SUCCEED;
3559 1 : const char *s = *arg1;
3560 1 : int off = *offset;
3561 :
3562 2 : if (strNil(s) || is_int_nil(off)) {
3563 0 : *res = GDKstrdup(str_nil);
3564 : } else {
3565 1 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3566 :
3567 1 : *res = NULL;
3568 1 : if (!(buf = GDKmalloc(buflen)))
3569 0 : throw(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3570 1 : if ((msg = str_tail(&buf, &buflen, s, off)) != MAL_SUCCEED) {
3571 0 : GDKfree(buf);
3572 0 : return msg;
3573 : }
3574 1 : *res = GDKstrdup(buf);
3575 : }
3576 :
3577 1 : GDKfree(buf);
3578 1 : if (!*res)
3579 0 : msg = createException(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3580 : return msg;
3581 : }
3582 :
3583 : /* copy the substring s[off:off+l] into *buf, replacing *buf with a
3584 : * freshly allocated buffer if the substring doesn't fit; off is 0
3585 : * based, and both off and l count in Unicode codepoints (i.e. not
3586 : * bytes); if off < 0, off counts from the end of the string */
3587 : str
3588 3901532 : str_Sub_String(str *buf, size_t *buflen, const char *s, int off, int l)
3589 : {
3590 3901532 : size_t len;
3591 :
3592 3901532 : if (off < 0) {
3593 4 : off += UTF8_strlen(s);
3594 4 : if (off < 0) {
3595 3 : l += off;
3596 3 : off = 0;
3597 : }
3598 : }
3599 : /* here, off >= 0 */
3600 3901532 : if (l < 0) {
3601 1228 : strcpy(*buf, "");
3602 1228 : return MAL_SUCCEED;
3603 : }
3604 3900304 : s = UTF8_strtail(s, off);
3605 3918215 : len = (size_t) (UTF8_strtail(s, l) - s + 1);
3606 3916163 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.substring");
3607 3916163 : strcpy_len(*buf, s, len);
3608 3916163 : return MAL_SUCCEED;
3609 : }
3610 :
3611 : static str
3612 4 : STRSubString(str *res, const str *arg1, const int *offset, const int *length)
3613 : {
3614 4 : str buf = NULL, msg = MAL_SUCCEED;
3615 4 : const char *s = *arg1;
3616 4 : int off = *offset, len = *length;
3617 :
3618 7 : if (strNil(s) || is_int_nil(off) || is_int_nil(len)) {
3619 1 : *res = GDKstrdup(str_nil);
3620 : } else {
3621 3 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3622 :
3623 3 : *res = NULL;
3624 3 : if (!(buf = GDKmalloc(buflen)))
3625 0 : throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3626 3 : if ((msg = str_Sub_String(&buf, &buflen, s, off, len)) != MAL_SUCCEED) {
3627 0 : GDKfree(buf);
3628 0 : return msg;
3629 : }
3630 3 : *res = GDKstrdup(buf);
3631 : }
3632 :
3633 4 : GDKfree(buf);
3634 4 : if (!*res)
3635 0 : msg = createException(MAL, "str.substring",
3636 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3637 : return msg;
3638 : }
3639 :
3640 : str
3641 4 : str_from_wchr(str *buf, size_t *buflen, int c)
3642 : {
3643 4 : CHECK_STR_BUFFER_LENGTH(buf, buflen, 5, "str.unicode");
3644 4 : str s = *buf;
3645 4 : UTF8_PUTCHAR(c, s);
3646 4 : *s = 0;
3647 4 : return MAL_SUCCEED;
3648 0 : illegal:
3649 0 : throw(MAL, "str.unicode", SQLSTATE(42000) "Illegal Unicode code point");
3650 : }
3651 :
3652 : static str
3653 2 : STRFromWChr(str *res, const int *c)
3654 : {
3655 2 : str buf = NULL, msg = MAL_SUCCEED;
3656 2 : int cc = *c;
3657 :
3658 2 : if (is_int_nil(cc)) {
3659 0 : *res = GDKstrdup(str_nil);
3660 : } else {
3661 2 : size_t buflen = MAX(strlen(str_nil) + 1, 8);
3662 :
3663 2 : *res = NULL;
3664 2 : if (!(buf = GDKmalloc(buflen)))
3665 0 : throw(MAL, "str.unicode", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3666 2 : if ((msg = str_from_wchr(&buf, &buflen, cc)) != MAL_SUCCEED) {
3667 0 : GDKfree(buf);
3668 0 : return msg;
3669 : }
3670 2 : *res = GDKstrdup(buf);
3671 : }
3672 :
3673 2 : GDKfree(buf);
3674 2 : if (!*res)
3675 0 : msg = createException(MAL, "str.unicode",
3676 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3677 : return msg;
3678 : }
3679 :
3680 : /* return the Unicode code point of arg1 at position at */
3681 : str
3682 31 : str_wchr_at(int *res, const char *s, int at)
3683 : {
3684 : /* 64bit: should have lng arg */
3685 60 : if (strNil(s) || is_int_nil(at) || at < 0) {
3686 2 : *res = int_nil;
3687 2 : return MAL_SUCCEED;
3688 : }
3689 29 : s = UTF8_strtail(s, at);
3690 29 : if (s == NULL || *s == 0) {
3691 6 : *res = int_nil;
3692 6 : return MAL_SUCCEED;
3693 : }
3694 23 : UTF8_GETCHAR(*res, s);
3695 : return MAL_SUCCEED;
3696 0 : illegal:
3697 0 : throw(MAL, "str.unicodeAt", SQLSTATE(42000) "Illegal Unicode code point");
3698 : }
3699 :
3700 : static str
3701 0 : STRWChrAt(int *res, const str *arg1, const int *at)
3702 : {
3703 0 : return str_wchr_at(res, *arg1, *at);
3704 : }
3705 :
3706 : str
3707 88451 : str_lower(str *buf, size_t *buflen, const char *s)
3708 : {
3709 88451 : return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s,
3710 : "str.lower");
3711 : }
3712 :
3713 : static inline str
3714 4612 : STRlower(str *res, const str *arg1)
3715 : {
3716 4612 : str buf = NULL, msg = MAL_SUCCEED;
3717 4612 : const char *s = *arg1;
3718 :
3719 4612 : if (strNil(s)) {
3720 312 : *res = GDKstrdup(str_nil);
3721 : } else {
3722 4300 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3723 :
3724 4300 : *res = NULL;
3725 4300 : if (!(buf = GDKmalloc(buflen)))
3726 0 : throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3727 4300 : if ((msg = str_case_hash_lock(false))) {
3728 0 : GDKfree(buf);
3729 0 : return msg;
3730 : }
3731 4300 : msg = str_lower(&buf, &buflen, s);
3732 4300 : str_case_hash_unlock(false);
3733 4300 : if (msg != MAL_SUCCEED) {
3734 0 : GDKfree(buf);
3735 0 : return msg;
3736 : }
3737 4300 : *res = GDKstrdup(buf);
3738 : }
3739 :
3740 4612 : GDKfree(buf);
3741 4612 : if (!*res)
3742 0 : msg = createException(MAL, "str.lower",
3743 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3744 : return msg;
3745 : }
3746 :
3747 : str
3748 201232 : str_upper(str *buf, size_t *buflen, const char *s)
3749 : {
3750 201232 : return convertCase(UTF8_toUpperFrom, UTF8_toUpperTo, buf, buflen, s,
3751 : "str.upper");
3752 : }
3753 :
3754 : static str
3755 104493 : STRupper(str *res, const str *arg1)
3756 : {
3757 104493 : str buf = NULL, msg = MAL_SUCCEED;
3758 104493 : const char *s = *arg1;
3759 :
3760 104493 : if (strNil(s)) {
3761 268 : *res = GDKstrdup(str_nil);
3762 : } else {
3763 104225 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3764 :
3765 104225 : *res = NULL;
3766 104225 : if (!(buf = GDKmalloc(buflen)))
3767 0 : throw(MAL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3768 104225 : if ((msg = str_case_hash_lock(true))) {
3769 0 : GDKfree(buf);
3770 0 : return msg;
3771 : }
3772 104225 : msg = str_upper(&buf, &buflen, s);
3773 104225 : str_case_hash_unlock(true);
3774 104225 : if (msg != MAL_SUCCEED) {
3775 0 : GDKfree(buf);
3776 0 : return msg;
3777 : }
3778 104225 : *res = GDKstrdup(buf);
3779 : }
3780 :
3781 104493 : GDKfree(buf);
3782 104493 : if (!*res)
3783 0 : msg = createException(MAL, "str.upper",
3784 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3785 : return msg;
3786 : }
3787 :
3788 : /* returns whether arg1 starts with arg2 */
3789 : int
3790 1368 : str_is_prefix(const char *s, const char *prefix, int plen)
3791 : {
3792 1368 : return strncmp(s, prefix, plen);
3793 : }
3794 :
3795 : int
3796 65 : str_is_iprefix(const char *s, const char *prefix, int plen)
3797 : {
3798 65 : return utf8ncasecmp(s, prefix, plen);
3799 : }
3800 :
3801 : int
3802 2360 : str_is_suffix(const char *s, const char *suffix, int sul)
3803 : {
3804 2360 : int sl = str_strlen(s);
3805 :
3806 2360 : if (sl < sul)
3807 : return -1;
3808 : else
3809 2347 : return strcmp(s + sl - sul, suffix);
3810 : }
3811 :
3812 : /* case insensitive endswith check */
3813 : int
3814 120 : str_is_isuffix(const char *s, const char *suffix, int sul)
3815 : {
3816 120 : const char *e = s + strlen(s);
3817 120 : const char *sf;
3818 :
3819 120 : (void) sul;
3820 : /* note that the uppercase and lowercase forms of a character aren't
3821 : * necessarily the same length in their UTF-8 encodings */
3822 878 : for (sf = suffix; *sf && e > s; sf++) {
3823 758 : if ((*sf & 0xC0) != 0x80) {
3824 764 : while ((*--e & 0xC0) == 0x80)
3825 : ;
3826 : }
3827 : }
3828 122 : while ((*sf & 0xC0) == 0x80)
3829 2 : sf++;
3830 120 : return *sf != 0 || utf8casecmp(e, suffix) != 0;
3831 : }
3832 :
3833 : int
3834 14650 : str_contains(const char *h, const char *n, int nlen)
3835 : {
3836 14650 : (void) nlen;
3837 14650 : return strstr(h, n) == NULL;
3838 : }
3839 :
3840 : int
3841 164 : str_icontains(const char *h, const char *n, int nlen)
3842 : {
3843 164 : (void) nlen;
3844 164 : return utf8casestr(h, n) == NULL;
3845 : }
3846 :
3847 : #define STR_MAPARGS(STK, PCI, R, S1, S2, ICASE) \
3848 : do{ \
3849 : R = getArgReference(STK, PCI, 0); \
3850 : S1 = *getArgReference_str(STK, PCI, 1); \
3851 : S2 = *getArgReference_str(STK, PCI, 2); \
3852 : icase = PCI->argc == 4 && *getArgReference_bit(STK, PCI, 3); \
3853 : } while(0)
3854 :
3855 : static str
3856 16 : STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3857 : {
3858 16 : (void) cntxt;
3859 16 : (void) mb;
3860 :
3861 16 : str s1, s2;
3862 16 : bit *r, icase;
3863 :
3864 16 : STR_MAPARGS(stk, pci, r, s1, s2, icase);
3865 :
3866 31 : if (strNil(s1) || strNil(s2)) {
3867 2 : *r = bit_nil;
3868 : } else {
3869 14 : int s2_len = str_strlen(s2);
3870 28 : *r = icase ?
3871 5 : str_is_iprefix(s1, s2, s2_len) == 0 :
3872 9 : str_is_prefix(s1, s2, s2_len) == 0;
3873 : }
3874 16 : return MAL_SUCCEED;
3875 : }
3876 :
3877 : static str
3878 13 : STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3879 : {
3880 13 : (void) cntxt;
3881 13 : (void) mb;
3882 :
3883 13 : str s1, s2;
3884 13 : bit *r, icase;
3885 :
3886 13 : STR_MAPARGS(stk, pci, r, s1, s2, icase);
3887 :
3888 25 : if (strNil(s1) || strNil(s2)) {
3889 2 : *r = bit_nil;
3890 : } else {
3891 11 : int s2_len = str_strlen(s2);
3892 22 : *r = icase ?
3893 4 : str_is_isuffix(s1, s2, s2_len) == 0 :
3894 7 : str_is_suffix(s1, s2, s2_len) == 0;
3895 : }
3896 13 : return MAL_SUCCEED;
3897 : }
3898 :
3899 : /* returns whether haystack contains needle */
3900 : static str
3901 15 : STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3902 : {
3903 15 : (void) cntxt;
3904 15 : (void) mb;
3905 :
3906 15 : str s1, s2;
3907 15 : bit *r, icase;
3908 :
3909 15 : STR_MAPARGS(stk, pci, r, s1, s2, icase);
3910 :
3911 29 : if (strNil(s1) || strNil(s2)) {
3912 2 : *r = bit_nil;
3913 : } else {
3914 13 : int s2_len = str_strlen(s2);
3915 26 : *r = icase ?
3916 5 : str_icontains(s1, s2, s2_len) == 0 :
3917 8 : str_contains(s1, s2, s2_len) == 0;
3918 : }
3919 15 : return MAL_SUCCEED;
3920 : }
3921 :
3922 : int
3923 4261 : str_search(const char *s, const char *s2, int slen)
3924 : {
3925 4261 : (void) slen;
3926 : /* 64bit: should return lng */
3927 4261 : if ((s2 = strstr(s, s2)) != NULL)
3928 723 : return UTF8_strpos(s, s2);
3929 : else
3930 : return -1;
3931 : }
3932 :
3933 : int
3934 0 : str_isearch(const char *s, const char *s2, int slen)
3935 : {
3936 0 : (void) slen;
3937 : /* 64bit: should return lng */
3938 0 : if ((s2 = utf8casestr(s, s2)) != NULL)
3939 0 : return UTF8_strpos(s, s2);
3940 : else
3941 : return -1;
3942 : }
3943 :
3944 : /* find first occurrence of needle in haystack */
3945 : static str
3946 0 : STRstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3947 : {
3948 0 : (void) cntxt;
3949 0 : (void) mb;
3950 0 : bit *res = getArgReference(stk, pci, 0);
3951 0 : const str *haystack = getArgReference(stk, pci, 1),
3952 0 : *needle = getArgReference(stk, pci, 2);
3953 0 : bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
3954 0 : str s = *haystack, h = *needle, msg = MAL_SUCCEED;
3955 0 : if (strNil(s) || strNil(h)) {
3956 0 : *res = bit_nil;
3957 : } else {
3958 0 : int needle_len = str_strlen(h);
3959 :
3960 0 : *res = icase ?
3961 0 : str_isearch(s, h, needle_len) :
3962 0 : str_search(s, h, needle_len);
3963 : }
3964 0 : return msg;
3965 : }
3966 :
3967 : int
3968 0 : str_reverse_str_search(const char *s, const char *s2, int slen)
3969 : {
3970 : /* 64bit: should return lng */
3971 0 : int len = str_strlen(s);
3972 0 : int res = -1; /* changed if found */
3973 :
3974 0 : if (len >= slen) {
3975 0 : const char *p = s + len - slen;
3976 0 : do {
3977 0 : if (strncmp(p, s2, slen) == 0) {
3978 0 : res = UTF8_strpos(s, p);
3979 0 : break;
3980 : }
3981 0 : } while (p-- > s);
3982 : }
3983 0 : return res;
3984 : }
3985 :
3986 : int
3987 0 : str_reverse_str_isearch(const char *s, const char *s2, int slen)
3988 : {
3989 : /* 64bit: should return lng */
3990 0 : int len = str_strlen(s);
3991 0 : int res = -1; /* changed if found */
3992 :
3993 0 : if (len >= slen) {
3994 0 : const char *p = s + len - slen;
3995 0 : do {
3996 0 : if (utf8ncasecmp(p, s2, slen) == 0) {
3997 0 : res = UTF8_strpos(s, p);
3998 0 : break;
3999 : }
4000 0 : } while (p-- > s);
4001 : }
4002 0 : return res;
4003 : }
4004 :
4005 : /* find last occurrence of arg2 in arg1 */
4006 : static str
4007 0 : STRrevstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
4008 : {
4009 0 : (void) cntxt;
4010 0 : (void) mb;
4011 0 : bit *res = getArgReference(stk, pci, 0);
4012 0 : const str *haystack = getArgReference(stk, pci, 1);
4013 0 : const str *needle = getArgReference(stk, pci, 2);
4014 0 : bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
4015 0 : str s = *haystack, h = *needle, msg = MAL_SUCCEED;
4016 0 : if (strNil(s) || strNil(h)) {
4017 0 : *res = bit_nil;
4018 : } else {
4019 0 : int needle_len = str_strlen(h);
4020 :
4021 0 : *res = icase ?
4022 0 : str_reverse_str_isearch(s, h, needle_len) :
4023 0 : str_reverse_str_search(s, h, needle_len);
4024 : }
4025 0 : return msg;
4026 : }
4027 :
4028 : str
4029 37 : str_splitpart(str *buf, size_t *buflen, const char *s, const char *s2, int f)
4030 : {
4031 37 : size_t len;
4032 37 : char *p = NULL;
4033 :
4034 37 : if (f <= 0)
4035 4 : throw(MAL, "str.splitpart",
4036 : SQLSTATE(42000) "field position must be greater than zero");
4037 :
4038 33 : len = strlen(s2);
4039 33 : if (len) {
4040 42 : while ((p = strstr(s, s2)) != NULL && f > 1) {
4041 13 : s = p + len;
4042 13 : f--;
4043 : }
4044 : }
4045 :
4046 33 : if (f != 1) {
4047 12 : strcpy(*buf, "");
4048 12 : return MAL_SUCCEED;
4049 : }
4050 :
4051 21 : if (p == NULL) {
4052 10 : len = strlen(s);
4053 : } else {
4054 11 : len = (size_t) (p - s);
4055 : }
4056 :
4057 21 : len++;
4058 21 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.splitpart");
4059 21 : strcpy_len(*buf, s, len);
4060 21 : return MAL_SUCCEED;
4061 : }
4062 :
4063 : static str
4064 23 : STRsplitpart(str *res, str *haystack, str *needle, int *field)
4065 : {
4066 23 : str buf = NULL, msg = MAL_SUCCEED;
4067 23 : const char *s = *haystack, *s2 = *needle;
4068 23 : int f = *field;
4069 :
4070 69 : if (strNil(s) || strNil(s2) || is_int_nil(f)) {
4071 0 : *res = GDKstrdup(str_nil);
4072 : } else {
4073 23 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4074 :
4075 23 : *res = NULL;
4076 23 : if (!(buf = GDKmalloc(buflen)))
4077 4 : throw(MAL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4078 23 : if ((msg = str_splitpart(&buf, &buflen, s, s2, f)) != MAL_SUCCEED) {
4079 4 : GDKfree(buf);
4080 4 : return msg;
4081 : }
4082 19 : *res = GDKstrdup(buf);
4083 : }
4084 :
4085 19 : GDKfree(buf);
4086 19 : if (!*res)
4087 0 : msg = createException(MAL, "str.splitpart",
4088 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4089 : return msg;
4090 : }
4091 :
4092 : /* returns number of bytes to remove from left to strip the codepoints in rm */
4093 : static size_t
4094 331 : lstrip(const char *s, size_t len, const int *rm, size_t nrm)
4095 : {
4096 331 : int c;
4097 331 : size_t i, n, skip = 0;
4098 :
4099 473 : while (len > 0) {
4100 458 : UTF8_NEXTCHAR(c, n, s);
4101 458 : assert(n > 0 && n <= len);
4102 6938 : for (i = 0; i < nrm; i++) {
4103 6622 : if (rm[i] == c) {
4104 142 : s += n;
4105 142 : skip += n;
4106 142 : len -= n;
4107 142 : break;
4108 : }
4109 : }
4110 458 : if (i == nrm)
4111 : break;
4112 : }
4113 331 : return skip;
4114 : }
4115 :
4116 : /* returns the resulting length of s after stripping codepoints in rm
4117 : * from the right */
4118 : static size_t
4119 407 : rstrip(const char *s, size_t len, const int *rm, size_t nrm)
4120 : {
4121 407 : int c;
4122 407 : size_t i, n;
4123 :
4124 576 : while (len > 0) {
4125 565 : UTF8_LASTCHAR(c, n, s, len);
4126 565 : assert(n > 0 && n <= len);
4127 8999 : for (i = 0; i < nrm; i++) {
4128 8603 : if (rm[i] == c) {
4129 169 : len -= n;
4130 169 : break;
4131 : }
4132 : }
4133 565 : if (i == nrm)
4134 : break;
4135 : }
4136 407 : return len;
4137 : }
4138 :
4139 : const int whitespace[] = {
4140 : ' ', /* space */
4141 : '\t', /* tab (character tabulation) */
4142 : '\n', /* line feed */
4143 : '\r', /* carriage return */
4144 : '\f', /* form feed */
4145 : '\v', /* vertical tab (line tabulation) */
4146 : /* below the code points that have the Unicode Zs (space separator) property */
4147 : 0x00A0, /* no-break space */
4148 : 0x1680, /* ogham space mark */
4149 : 0x2000, /* en quad */
4150 : 0x2001, /* em quad */
4151 : 0x2002, /* en space */
4152 : 0x2003, /* em space */
4153 : 0x2004, /* three-per-em space */
4154 : 0x2005, /* four-per-em space */
4155 : 0x2006, /* six-per-em space */
4156 : 0x2007, /* figure space */
4157 : 0x2008, /* punctuation space */
4158 : 0x2009, /* thin space */
4159 : 0x200A, /* hair space */
4160 : 0x202F, /* narrow no-break space */
4161 : 0x205F, /* medium mathematical space */
4162 : 0x3000, /* ideographic space */
4163 : };
4164 :
4165 : #define NSPACES (sizeof(whitespace) / sizeof(whitespace[0]))
4166 :
4167 : str
4168 279 : str_strip(str *buf, size_t *buflen, const char *s)
4169 : {
4170 279 : size_t len = strlen(s);
4171 279 : size_t n = lstrip(s, len, whitespace, NSPACES);
4172 279 : s += n;
4173 279 : len -= n;
4174 279 : n = rstrip(s, len, whitespace, NSPACES);
4175 :
4176 279 : n++;
4177 279 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip");
4178 279 : strcpy_len(*buf, s, n);
4179 279 : return MAL_SUCCEED;
4180 : }
4181 :
4182 : /* remove all whitespace from either side of arg1 */
4183 : static str
4184 8 : STRStrip(str *res, const str *arg1)
4185 : {
4186 8 : str buf = NULL, msg = MAL_SUCCEED;
4187 8 : const char *s = *arg1;
4188 :
4189 8 : if (strNil(s)) {
4190 0 : *res = GDKstrdup(str_nil);
4191 : } else {
4192 8 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4193 :
4194 8 : *res = NULL;
4195 8 : if (!(buf = GDKmalloc(buflen)))
4196 0 : throw(MAL, "str.strip", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4197 8 : if ((msg = str_strip(&buf, &buflen, s)) != MAL_SUCCEED) {
4198 0 : GDKfree(buf);
4199 0 : return msg;
4200 : }
4201 8 : *res = GDKstrdup(buf);
4202 : }
4203 :
4204 8 : GDKfree(buf);
4205 8 : if (!*res)
4206 0 : msg = createException(MAL, "str.strip",
4207 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4208 : return msg;
4209 : }
4210 :
4211 : str
4212 18 : str_ltrim(str *buf, size_t *buflen, const char *s)
4213 : {
4214 18 : size_t len = strlen(s);
4215 18 : size_t n = lstrip(s, len, whitespace, NSPACES);
4216 18 : size_t nallocate = len - n + 1;
4217 :
4218 18 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim");
4219 18 : strcpy_len(*buf, s + n, nallocate);
4220 18 : return MAL_SUCCEED;
4221 : }
4222 :
4223 : /* remove all whitespace from the start (left) of arg1 */
4224 : static str
4225 10 : STRLtrim(str *res, const str *arg1)
4226 : {
4227 10 : str buf = NULL, msg = MAL_SUCCEED;
4228 10 : const char *s = *arg1;
4229 :
4230 10 : if (strNil(s)) {
4231 0 : *res = GDKstrdup(str_nil);
4232 : } else {
4233 10 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4234 :
4235 10 : *res = NULL;
4236 10 : if (!(buf = GDKmalloc(buflen)))
4237 0 : throw(MAL, "str.ltrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4238 10 : if ((msg = str_ltrim(&buf, &buflen, s)) != MAL_SUCCEED) {
4239 0 : GDKfree(buf);
4240 0 : return msg;
4241 : }
4242 10 : *res = GDKstrdup(buf);
4243 : }
4244 :
4245 10 : GDKfree(buf);
4246 10 : if (!*res)
4247 0 : msg = createException(MAL, "str.ltrim",
4248 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4249 : return msg;
4250 : }
4251 :
4252 : str
4253 96 : str_rtrim(str *buf, size_t *buflen, const char *s)
4254 : {
4255 96 : size_t len = strlen(s);
4256 96 : size_t n = rstrip(s, len, whitespace, NSPACES);
4257 :
4258 96 : n++;
4259 96 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim");
4260 96 : strcpy_len(*buf, s, n);
4261 96 : return MAL_SUCCEED;
4262 : }
4263 :
4264 : /* remove all whitespace from the end (right) of arg1 */
4265 : static str
4266 6 : STRRtrim(str *res, const str *arg1)
4267 : {
4268 6 : str buf = NULL, msg = MAL_SUCCEED;
4269 6 : const char *s = *arg1;
4270 :
4271 6 : if (strNil(s)) {
4272 0 : *res = GDKstrdup(str_nil);
4273 : } else {
4274 6 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4275 :
4276 6 : *res = NULL;
4277 6 : if (!(buf = GDKmalloc(buflen)))
4278 0 : throw(MAL, "str.rtrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4279 6 : if ((msg = str_rtrim(&buf, &buflen, s)) != MAL_SUCCEED) {
4280 0 : GDKfree(buf);
4281 0 : return msg;
4282 : }
4283 6 : *res = GDKstrdup(buf);
4284 : }
4285 :
4286 6 : GDKfree(buf);
4287 6 : if (!*res)
4288 0 : msg = createException(MAL, "str.rtrim",
4289 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4290 : return msg;
4291 : }
4292 :
4293 : /* return a list of codepoints in s */
4294 : static str
4295 45 : trimchars(str *buf, size_t *buflen, size_t *n, const char *s, size_t len_s,
4296 : const char *malfunc)
4297 : {
4298 45 : size_t len = 0, nlen = len_s * sizeof(int);
4299 45 : int c, *cbuf;
4300 :
4301 45 : assert(s);
4302 45 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
4303 45 : cbuf = *(int **) buf;
4304 :
4305 221 : while (*s) {
4306 176 : UTF8_GETCHAR(c, s);
4307 176 : assert(!is_int_nil(c));
4308 176 : cbuf[len++] = c;
4309 : }
4310 45 : *n = len;
4311 45 : return MAL_SUCCEED;
4312 0 : illegal:
4313 0 : throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
4314 : }
4315 :
4316 : str
4317 22 : str_strip2(str *buf, size_t *buflen, const char *s, const char *s2)
4318 : {
4319 22 : str msg = MAL_SUCCEED;
4320 22 : size_t len, n, n2, n3;
4321 :
4322 22 : if ((n2 = strlen(s2)) == 0) {
4323 1 : len = strlen(s) + 1;
4324 1 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.strip2");
4325 1 : strcpy(*buf, s);
4326 1 : return MAL_SUCCEED;
4327 : } else {
4328 21 : if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) != MAL_SUCCEED)
4329 : return msg;
4330 21 : len = strlen(s);
4331 21 : n = lstrip(s, len, *(int **) buf, n3);
4332 21 : s += n;
4333 21 : len -= n;
4334 21 : n = rstrip(s, len, *(int **) buf, n3);
4335 :
4336 21 : n++;
4337 21 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
4338 21 : strcpy_len(*buf, s, n);
4339 21 : return MAL_SUCCEED;
4340 : }
4341 : }
4342 :
4343 : /* remove the longest string containing only characters from arg2 from
4344 : * either side of arg1 */
4345 : static str
4346 19 : STRStrip2(str *res, const str *arg1, const str *arg2)
4347 : {
4348 19 : str buf = NULL, msg = MAL_SUCCEED;
4349 19 : const char *s = *arg1, *s2 = *arg2;
4350 :
4351 36 : if (strNil(s) || strNil(s2)) {
4352 3 : *res = GDKstrdup(str_nil);
4353 : } else {
4354 16 : size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
4355 :
4356 16 : *res = NULL;
4357 16 : if (!(buf = GDKmalloc(buflen)))
4358 0 : throw(MAL, "str.strip2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4359 16 : if ((msg = str_strip2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
4360 0 : GDKfree(buf);
4361 0 : return msg;
4362 : }
4363 16 : *res = GDKstrdup(buf);
4364 : }
4365 :
4366 19 : GDKfree(buf);
4367 19 : if (!*res)
4368 0 : msg = createException(MAL, "str.strip2",
4369 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4370 : return msg;
4371 : }
4372 :
4373 : str
4374 14 : str_ltrim2(str *buf, size_t *buflen, const char *s, const char *s2)
4375 : {
4376 14 : str msg = MAL_SUCCEED;
4377 14 : size_t len, n, n2, n3, nallocate;
4378 :
4379 14 : if ((n2 = strlen(s2)) == 0) {
4380 1 : len = strlen(s) + 1;
4381 1 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.ltrim2");
4382 1 : strcpy(*buf, s);
4383 1 : return MAL_SUCCEED;
4384 : } else {
4385 13 : if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
4386 : return msg;
4387 13 : len = strlen(s);
4388 13 : n = lstrip(s, len, *(int **) buf, n3);
4389 13 : nallocate = len - n + 1;
4390 :
4391 13 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
4392 13 : strcpy_len(*buf, s + n, nallocate);
4393 13 : return MAL_SUCCEED;
4394 : }
4395 : }
4396 :
4397 : /* remove the longest string containing only characters from arg2 from
4398 : * the start (left) of arg1 */
4399 : static str
4400 8 : STRLtrim2(str *res, const str *arg1, const str *arg2)
4401 : {
4402 8 : str buf = NULL, msg = MAL_SUCCEED;
4403 8 : const char *s = *arg1, *s2 = *arg2;
4404 :
4405 16 : if (strNil(s) || strNil(s2)) {
4406 0 : *res = GDKstrdup(str_nil);
4407 : } else {
4408 8 : size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
4409 :
4410 8 : *res = NULL;
4411 8 : if (!(buf = GDKmalloc(buflen)))
4412 0 : throw(MAL, "str.ltrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4413 8 : if ((msg = str_ltrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
4414 0 : GDKfree(buf);
4415 0 : return msg;
4416 : }
4417 8 : *res = GDKstrdup(buf);
4418 : }
4419 :
4420 8 : GDKfree(buf);
4421 8 : if (!*res)
4422 0 : msg = createException(MAL, "str.ltrim2",
4423 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4424 : return msg;
4425 : }
4426 :
4427 : str
4428 13 : str_rtrim2(str *buf, size_t *buflen, const char *s, const char *s2)
4429 : {
4430 13 : str msg = MAL_SUCCEED;
4431 13 : size_t len, n, n2, n3;
4432 :
4433 13 : if ((n2 = strlen(s2)) == 0) {
4434 2 : len = strlen(s) + 1;
4435 2 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.rtrim2");
4436 2 : strcpy(*buf, s);
4437 2 : return MAL_SUCCEED;
4438 : } else {
4439 11 : if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
4440 : return msg;
4441 11 : len = strlen(s);
4442 11 : n = rstrip(s, len, *(int **) buf, n3);
4443 11 : n++;
4444 :
4445 11 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
4446 11 : strcpy_len(*buf, s, n);
4447 11 : return MAL_SUCCEED;
4448 : }
4449 : }
4450 :
4451 : /* remove the longest string containing only characters from arg2 from
4452 : * the end (right) of arg1 */
4453 : static str
4454 7 : STRRtrim2(str *res, const str *arg1, const str *arg2)
4455 : {
4456 7 : str buf = NULL, msg = MAL_SUCCEED;
4457 7 : const char *s = *arg1, *s2 = *arg2;
4458 :
4459 14 : if (strNil(s) || strNil(s2)) {
4460 0 : *res = GDKstrdup(str_nil);
4461 : } else {
4462 7 : size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
4463 :
4464 7 : *res = NULL;
4465 7 : if (!(buf = GDKmalloc(buflen)))
4466 0 : throw(MAL, "str.rtrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4467 7 : if ((msg = str_rtrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
4468 0 : GDKfree(buf);
4469 0 : return msg;
4470 : }
4471 7 : *res = GDKstrdup(buf);
4472 : }
4473 :
4474 7 : GDKfree(buf);
4475 7 : if (!*res)
4476 0 : msg = createException(MAL, "str.rtrim2",
4477 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4478 : return msg;
4479 : }
4480 :
4481 : static str
4482 60 : pad(str *buf, size_t *buflen, const char *s, const char *pad, int len, int left,
4483 : const char *malfunc)
4484 : {
4485 60 : size_t slen, padlen, repeats, residual, i, nlen;
4486 60 : char *res;
4487 :
4488 60 : if (len < 0)
4489 : len = 0;
4490 :
4491 60 : slen = (size_t) UTF8_strlen(s);
4492 60 : if (slen > (size_t) len) {
4493 : /* truncate */
4494 20 : pad = UTF8_strtail(s, len);
4495 20 : slen = pad - s + 1;
4496 :
4497 20 : CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
4498 20 : strcpy_len(*buf, s, slen);
4499 20 : return MAL_SUCCEED;
4500 : }
4501 :
4502 40 : padlen = (size_t) UTF8_strlen(pad);
4503 40 : if (slen == (size_t) len || padlen == 0) {
4504 : /* nothing to do (no padding if there is no pad string) */
4505 0 : slen = strlen(s) + 1;
4506 0 : CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
4507 0 : strcpy(*buf, s);
4508 0 : return MAL_SUCCEED;
4509 : }
4510 :
4511 40 : repeats = ((size_t) len - slen) / padlen;
4512 40 : residual = ((size_t) len - slen) % padlen;
4513 40 : if (residual > 0)
4514 20 : residual = (size_t) (UTF8_strtail(pad, (int) residual) - pad);
4515 40 : padlen = strlen(pad);
4516 40 : slen = strlen(s);
4517 :
4518 40 : nlen = slen + repeats * padlen + residual + 1;
4519 40 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
4520 40 : res = *buf;
4521 40 : if (left) {
4522 87 : for (i = 0; i < repeats; i++)
4523 67 : memcpy(res + i * padlen, pad, padlen);
4524 20 : if (residual > 0)
4525 10 : memcpy(res + repeats * padlen, pad, residual);
4526 20 : if (slen > 0)
4527 20 : memcpy(res + repeats * padlen + residual, s, slen);
4528 : } else {
4529 20 : if (slen > 0)
4530 20 : memcpy(res, s, slen);
4531 87 : for (i = 0; i < repeats; i++)
4532 67 : memcpy(res + slen + i * padlen, pad, padlen);
4533 20 : if (residual > 0)
4534 10 : memcpy(res + slen + repeats * padlen, pad, residual);
4535 : }
4536 40 : res[repeats * padlen + residual + slen] = 0;
4537 40 : return MAL_SUCCEED;
4538 : }
4539 :
4540 : str
4541 8 : str_lpad(str *buf, size_t *buflen, const char *s, int len)
4542 : {
4543 4 : return pad(buf, buflen, s, " ", len, 1, "str.lpad");
4544 : }
4545 :
4546 : /* Fill up 'arg1' to length 'len' by prepending whitespaces.
4547 : * If 'arg1' is already longer than 'len', then it's truncated on the right
4548 : * (NB: this is the PostgreSQL definition).
4549 : *
4550 : * Example: lpad('hi', 5)
4551 : * Result: ' hi'
4552 : */
4553 : static str
4554 4 : STRLpad(str *res, const str *arg1, const int *len)
4555 : {
4556 4 : str buf = NULL, msg = MAL_SUCCEED;
4557 4 : const char *s = *arg1;
4558 4 : int l = *len;
4559 :
4560 8 : if (strNil(s) || is_int_nil(l)) {
4561 0 : *res = GDKstrdup(str_nil);
4562 : } else {
4563 4 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4564 :
4565 4 : *res = NULL;
4566 4 : if (!(buf = GDKmalloc(buflen)))
4567 0 : throw(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4568 4 : if ((msg = str_lpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
4569 0 : GDKfree(buf);
4570 0 : return msg;
4571 : }
4572 4 : *res = GDKstrdup(buf);
4573 : }
4574 :
4575 4 : GDKfree(buf);
4576 4 : if (!*res)
4577 0 : msg = createException(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4578 : return msg;
4579 : }
4580 :
4581 : str
4582 8 : str_rpad(str *buf, size_t *buflen, const char *s, int len)
4583 : {
4584 4 : return pad(buf, buflen, s, " ", len, 0, "str.lpad");
4585 : }
4586 :
4587 : /* Fill up 'arg1' to length 'len' by appending whitespaces.
4588 : * If 'arg1' is already longer than 'len', then it's truncated (on the right)
4589 : * (NB: this is the PostgreSQL definition).
4590 : *
4591 : * Example: rpad('hi', 5)
4592 : * Result: 'hi '
4593 : */
4594 : static str
4595 4 : STRRpad(str *res, const str *arg1, const int *len)
4596 : {
4597 4 : str buf = NULL, msg = MAL_SUCCEED;
4598 4 : const char *s = *arg1;
4599 4 : int l = *len;
4600 :
4601 8 : if (strNil(s) || is_int_nil(l)) {
4602 0 : *res = GDKstrdup(str_nil);
4603 : } else {
4604 4 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4605 :
4606 4 : *res = NULL;
4607 4 : if (!(buf = GDKmalloc(buflen)))
4608 0 : throw(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4609 4 : if ((msg = str_rpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
4610 0 : GDKfree(buf);
4611 0 : return msg;
4612 : }
4613 4 : *res = GDKstrdup(buf);
4614 : }
4615 :
4616 4 : GDKfree(buf);
4617 4 : if (!*res)
4618 0 : msg = createException(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4619 : return msg;
4620 : }
4621 :
4622 : str
4623 22 : str_lpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
4624 : {
4625 16 : return pad(buf, buflen, s, s2, len, 1, "str.lpad2");
4626 : }
4627 :
4628 : /* Fill up 'arg1' to length 'len' by prepending characters from 'arg2'
4629 : * If 'arg1' is already longer than 'len', then it's truncated on the right
4630 : * (NB: this is the PostgreSQL definition).
4631 : *
4632 : * Example: lpad('hi', 5, 'xy')
4633 : * Result: xyxhi
4634 : */
4635 : static str
4636 6 : STRLpad3(str *res, const str *arg1, const int *len, const str *arg2)
4637 : {
4638 6 : str buf = NULL, msg = MAL_SUCCEED;
4639 6 : const char *s = *arg1, *s2 = *arg2;
4640 6 : int l = *len;
4641 :
4642 18 : if (strNil(s) || strNil(s2) || is_int_nil(l)) {
4643 0 : *res = GDKstrdup(str_nil);
4644 : } else {
4645 6 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4646 :
4647 6 : *res = NULL;
4648 6 : if (!(buf = GDKmalloc(buflen)))
4649 0 : throw(MAL, "str.lpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4650 6 : if ((msg = str_lpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
4651 0 : GDKfree(buf);
4652 0 : return msg;
4653 : }
4654 6 : *res = GDKstrdup(buf);
4655 : }
4656 :
4657 6 : GDKfree(buf);
4658 6 : if (!*res)
4659 0 : msg = createException(MAL, "str.lpad2",
4660 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4661 : return msg;
4662 : }
4663 :
4664 : str
4665 22 : str_rpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
4666 : {
4667 16 : return pad(buf, buflen, s, s2, len, 0, "str.rpad2");
4668 : }
4669 :
4670 : /* Fill up 'arg1' to length 'len' by appending characters from 'arg2'
4671 : * If 'arg1' is already longer than 'len', then it's truncated (on the right)
4672 : * (NB: this is the PostgreSQL definition).
4673 : *
4674 : * Example: rpad('hi', 5, 'xy')
4675 : * Result: hixyx
4676 : */
4677 : static str
4678 6 : STRRpad3(str *res, const str *arg1, const int *len, const str *arg2)
4679 : {
4680 6 : str buf = NULL, msg = MAL_SUCCEED;
4681 6 : const char *s = *arg1, *s2 = *arg2;
4682 6 : int l = *len;
4683 :
4684 18 : if (strNil(s) || strNil(s2) || is_int_nil(l)) {
4685 0 : *res = GDKstrdup(str_nil);
4686 : } else {
4687 6 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4688 :
4689 6 : *res = NULL;
4690 6 : if (!(buf = GDKmalloc(buflen)))
4691 0 : throw(MAL, "str.rpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4692 6 : if ((msg = str_rpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
4693 0 : GDKfree(buf);
4694 0 : return msg;
4695 : }
4696 6 : *res = GDKstrdup(buf);
4697 : }
4698 :
4699 6 : GDKfree(buf);
4700 6 : if (!*res)
4701 0 : msg = createException(MAL, "str.rpad2",
4702 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4703 : return msg;
4704 : }
4705 :
4706 : str
4707 100535 : str_substitute(str *buf, size_t *buflen, const char *s, const char *src,
4708 : const char *dst, bit repeat)
4709 : {
4710 100535 : size_t lsrc = strlen(src), ldst = strlen(dst), n, l = strlen(s);
4711 100535 : char *b, *fnd;
4712 100535 : const char *pfnd;
4713 :
4714 100535 : if (!lsrc || !l) { /* s/src is an empty string, there's nothing to substitute */
4715 7 : l++;
4716 7 : CHECK_STR_BUFFER_LENGTH(buf, buflen, l, "str.substitute");
4717 7 : strcpy(*buf, s);
4718 7 : return MAL_SUCCEED;
4719 : }
4720 :
4721 100528 : n = l + ldst;
4722 100528 : if (repeat && ldst > lsrc)
4723 77340 : n = (ldst * l) / lsrc; /* max length */
4724 :
4725 100528 : n++;
4726 100528 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.substitute");
4727 100528 : b = *buf;
4728 100528 : pfnd = s;
4729 105695 : do {
4730 105695 : fnd = strstr(pfnd, src);
4731 105695 : if (fnd == NULL)
4732 : break;
4733 5168 : n = fnd - pfnd;
4734 5168 : if (n > 0) {
4735 4401 : strcpy_len(b, pfnd, n + 1);
4736 4401 : b += n;
4737 : }
4738 5168 : if (ldst > 0) {
4739 406 : strcpy_len(b, dst, ldst + 1);
4740 405 : b += ldst;
4741 : }
4742 5167 : if (*fnd == 0)
4743 : break;
4744 5167 : pfnd = fnd + lsrc;
4745 5167 : } while (repeat);
4746 100527 : strcpy(b, pfnd);
4747 100527 : return MAL_SUCCEED;
4748 : }
4749 :
4750 : static str
4751 197 : STRSubstitute(str *res, const str *arg1, const str *arg2, const str *arg3,
4752 : const bit *g)
4753 : {
4754 197 : str buf = NULL, msg = MAL_SUCCEED;
4755 197 : const char *s = *arg1, *s2 = *arg2, *s3 = *arg3;
4756 :
4757 590 : if (strNil(s) || strNil(s2) || strNil(s3)) {
4758 2 : *res = GDKstrdup(str_nil);
4759 : } else {
4760 195 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4761 :
4762 195 : *res = NULL;
4763 195 : if (!(buf = GDKmalloc(buflen)))
4764 0 : throw(MAL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4765 195 : if ((msg = str_substitute(&buf, &buflen, s, s2, s3, *g)) != MAL_SUCCEED) {
4766 0 : GDKfree(buf);
4767 0 : return msg;
4768 : }
4769 195 : *res = GDKstrdup(buf);
4770 : }
4771 :
4772 197 : GDKfree(buf);
4773 197 : if (!*res)
4774 0 : msg = createException(MAL, "str.substitute",
4775 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4776 : return msg;
4777 : }
4778 :
4779 : static str
4780 9 : STRascii(int *ret, const str *s)
4781 : {
4782 9 : return str_wchr_at(ret, *s, 0);
4783 : }
4784 :
4785 : str
4786 4256 : str_substring_tail(str *buf, size_t *buflen, const char *s, int start)
4787 : {
4788 4256 : if (start < 1)
4789 : start = 1;
4790 4256 : start--;
4791 4248 : return str_tail(buf, buflen, s, start);
4792 : }
4793 :
4794 : static str
4795 8 : STRsubstringTail(str *res, const str *arg1, const int *start)
4796 : {
4797 8 : str buf = NULL, msg = MAL_SUCCEED;
4798 8 : const char *s = *arg1;
4799 8 : int st = *start;
4800 :
4801 16 : if (strNil(s) || is_int_nil(st)) {
4802 0 : *res = GDKstrdup(str_nil);
4803 : } else {
4804 8 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4805 :
4806 8 : *res = NULL;
4807 8 : if (!(buf = GDKmalloc(buflen)))
4808 0 : throw(MAL, "str.substringTail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4809 8 : if ((msg = str_substring_tail(&buf, &buflen, s, st)) != MAL_SUCCEED) {
4810 0 : GDKfree(buf);
4811 0 : return msg;
4812 : }
4813 8 : *res = GDKstrdup(buf);
4814 : }
4815 :
4816 8 : GDKfree(buf);
4817 8 : if (!*res)
4818 0 : msg = createException(MAL, "str.substringTail",
4819 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4820 : return msg;
4821 : }
4822 :
4823 : str
4824 3802797 : str_sub_string(str *buf, size_t *buflen, const char *s, int start, int l)
4825 : {
4826 3802797 : if (start < 1)
4827 : start = 1;
4828 3802797 : start--;
4829 3802777 : return str_Sub_String(buf, buflen, s, start, l);
4830 : }
4831 :
4832 : static str
4833 23 : STRsubstring(str *res, const str *arg1, const int *start, const int *ll)
4834 : {
4835 23 : str buf = NULL, msg = MAL_SUCCEED;
4836 23 : const char *s = *arg1;
4837 23 : int st = *start, l = *ll;
4838 :
4839 46 : if (strNil(s) || is_int_nil(st) || is_int_nil(l)) {
4840 3 : *res = GDKstrdup(str_nil);
4841 : } else {
4842 20 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4843 :
4844 20 : *res = NULL;
4845 20 : if (!(buf = GDKmalloc(buflen)))
4846 0 : throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4847 20 : if ((msg = str_sub_string(&buf, &buflen, s, st, l)) != MAL_SUCCEED) {
4848 0 : GDKfree(buf);
4849 0 : return msg;
4850 : }
4851 20 : *res = GDKstrdup(buf);
4852 : }
4853 :
4854 23 : GDKfree(buf);
4855 23 : if (!*res)
4856 0 : msg = createException(MAL, "str.substring",
4857 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4858 : return msg;
4859 : }
4860 :
4861 : static str
4862 20 : STRprefix(str *res, const str *arg1, const int *ll)
4863 : {
4864 20 : str buf = NULL, msg = MAL_SUCCEED;
4865 20 : const char *s = *arg1;
4866 20 : int l = *ll;
4867 :
4868 40 : if (strNil(s) || is_int_nil(l)) {
4869 0 : *res = GDKstrdup(str_nil);
4870 : } else {
4871 20 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4872 :
4873 20 : *res = NULL;
4874 20 : if (!(buf = GDKmalloc(buflen)))
4875 0 : throw(MAL, "str.prefix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4876 20 : if ((msg = str_Sub_String(&buf, &buflen, s, 0, l)) != MAL_SUCCEED) {
4877 0 : GDKfree(buf);
4878 0 : return msg;
4879 : }
4880 20 : *res = GDKstrdup(buf);
4881 : }
4882 :
4883 20 : GDKfree(buf);
4884 20 : if (!*res)
4885 0 : msg = createException(MAL, "str.prefix",
4886 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4887 : return msg;
4888 : }
4889 :
4890 : str
4891 9 : str_suffix(str *buf, size_t *buflen, const char *s, int l)
4892 : {
4893 9 : int start = (int) (strlen(s) - l);
4894 9 : return str_Sub_String(buf, buflen, s, start, l);
4895 : }
4896 :
4897 : static str
4898 5 : STRsuffix(str *res, const str *arg1, const int *ll)
4899 : {
4900 5 : str buf = NULL, msg = MAL_SUCCEED;
4901 5 : const char *s = *arg1;
4902 5 : int l = *ll;
4903 :
4904 10 : if (strNil(s) || is_int_nil(l)) {
4905 0 : *res = GDKstrdup(str_nil);
4906 : } else {
4907 5 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4908 :
4909 5 : *res = NULL;
4910 5 : if (!(buf = GDKmalloc(buflen)))
4911 0 : throw(MAL, "str.suffix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4912 5 : if ((msg = str_suffix(&buf, &buflen, s, l)) != MAL_SUCCEED) {
4913 0 : GDKfree(buf);
4914 0 : return msg;
4915 : }
4916 5 : *res = GDKstrdup(buf);
4917 : }
4918 :
4919 5 : GDKfree(buf);
4920 5 : if (!*res)
4921 0 : msg = createException(MAL, "str.suffix",
4922 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4923 : return msg;
4924 : }
4925 :
4926 : int
4927 4244 : str_locate2(const char *needle, const char *haystack, int start)
4928 : {
4929 4244 : int off, res;
4930 4244 : char *s;
4931 :
4932 4244 : off = start <= 0 ? 1 : start;
4933 4244 : s = UTF8_strtail(haystack, off - 1);
4934 4244 : res = str_search(s, needle, str_strlen(needle));
4935 4251 : return res >= 0 ? res + off : 0;
4936 : }
4937 :
4938 : static str
4939 27274 : STRlocate3(int *ret, const str *needle, const str *haystack, const int *start)
4940 : {
4941 27274 : const char *s = *needle, *s2 = *haystack;
4942 27274 : int st = *start;
4943 :
4944 54614 : *ret = (strNil(s) || strNil(s2) || is_int_nil(st)) ?
4945 27274 : int_nil :
4946 66 : str_locate2(s, s2, st);
4947 27274 : return MAL_SUCCEED;
4948 : }
4949 :
4950 : static str
4951 16 : STRlocate(int *ret, const str *needle, const str *haystack)
4952 : {
4953 16 : const char *s = *needle, *s2 = *haystack;
4954 :
4955 45 : *ret = (strNil(s) || strNil(s2)) ? int_nil : str_locate2(s, s2, 1);
4956 16 : return MAL_SUCCEED;
4957 : }
4958 :
4959 : str
4960 223 : str_insert(str *buf, size_t *buflen, const char *s, int strt, int l,
4961 : const char *s2)
4962 : {
4963 223 : str v;
4964 223 : int l1 = UTF8_strlen(s);
4965 223 : size_t nextlen;
4966 :
4967 223 : if (l < 0)
4968 0 : throw(MAL, "str.insert",
4969 : SQLSTATE(42000)
4970 : "The number of characters for insert function must be non negative");
4971 223 : if (strt < 0) {
4972 0 : if (-strt <= l1)
4973 0 : strt = l1 + strt;
4974 : else
4975 : strt = 0;
4976 : }
4977 223 : if (strt > l1)
4978 : strt = l1;
4979 :
4980 223 : nextlen = strlen(s) + strlen(s2) + 1;
4981 223 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.insert");
4982 223 : v = *buf;
4983 223 : if (strt > 0)
4984 216 : v = UTF8_strncpy(v, s, strt);
4985 223 : strcpy(v, s2);
4986 223 : if (strt + l < l1)
4987 10 : strcat(v, UTF8_strtail((char *) s, strt + l));
4988 : return MAL_SUCCEED;
4989 : }
4990 :
4991 : static str
4992 225 : STRinsert(str *res, const str *input, const int *start, const int *nchars,
4993 : const str *input2)
4994 : {
4995 225 : str buf = NULL, msg = MAL_SUCCEED;
4996 225 : const char *s = *input, *s2 = *input2;
4997 225 : int st = *start, n = *nchars;
4998 :
4999 449 : if (strNil(s) || is_int_nil(st) || is_int_nil(n) || strNil(s2)) {
5000 2 : *res = GDKstrdup(str_nil);
5001 : } else {
5002 223 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
5003 :
5004 223 : *res = NULL;
5005 223 : if (!(buf = GDKmalloc(buflen)))
5006 0 : throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5007 223 : if ((msg = str_insert(&buf, &buflen, s, st, n, s2)) != MAL_SUCCEED) {
5008 0 : GDKfree(buf);
5009 0 : return msg;
5010 : }
5011 223 : *res = GDKstrdup(buf);
5012 : }
5013 :
5014 225 : GDKfree(buf);
5015 225 : if (!*res)
5016 0 : msg = createException(MAL, "str.insert",
5017 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
5018 : return msg;
5019 : }
5020 :
5021 : static str
5022 197 : STRreplace(str *ret, const str *s1, const str *s2, const str *s3)
5023 : {
5024 197 : bit flag = TRUE;
5025 197 : return STRSubstitute(ret, s1, s2, s3, &flag);
5026 : }
5027 :
5028 : str
5029 15 : str_repeat(str *buf, size_t *buflen, const char *s, int c)
5030 : {
5031 15 : size_t l = strlen(s), nextlen;
5032 :
5033 15 : if (l >= INT_MAX)
5034 0 : throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5035 15 : nextlen = (size_t) c *l + 1;
5036 :
5037 15 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.repeat");
5038 15 : str t = *buf;
5039 15 : *t = 0;
5040 160043 : for (int i = c; i > 0; i--, t += l)
5041 160028 : strcpy(t, s);
5042 : return MAL_SUCCEED;
5043 : }
5044 :
5045 : static str
5046 11 : STRrepeat(str *res, const str *arg1, const int *c)
5047 : {
5048 11 : str buf = NULL, msg = MAL_SUCCEED;
5049 11 : const char *s = *arg1;
5050 11 : int cc = *c;
5051 :
5052 21 : if (strNil(s) || is_int_nil(cc) || cc < 0) {
5053 1 : *res = GDKstrdup(str_nil);
5054 : } else {
5055 10 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
5056 :
5057 10 : *res = NULL;
5058 10 : if (!(buf = GDKmalloc(buflen)))
5059 0 : throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5060 10 : if ((msg = str_repeat(&buf, &buflen, s, cc)) != MAL_SUCCEED) {
5061 0 : GDKfree(buf);
5062 0 : return msg;
5063 : }
5064 10 : *res = GDKstrdup(buf);
5065 : }
5066 :
5067 11 : GDKfree(buf);
5068 11 : if (!*res)
5069 0 : msg = createException(MAL, "str.repeat",
5070 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
5071 : return msg;
5072 : }
5073 :
5074 : static str
5075 1 : STRspace(str *res, const int *ll)
5076 : {
5077 1 : str buf = NULL, msg = MAL_SUCCEED;
5078 1 : int l = *ll;
5079 :
5080 1 : if (is_int_nil(l) || l < 0) {
5081 0 : *res = GDKstrdup(str_nil);
5082 : } else {
5083 1 : const char space[] = " ", *s = space;
5084 1 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
5085 :
5086 1 : *res = NULL;
5087 1 : if (!(buf = GDKmalloc(buflen)))
5088 0 : throw(MAL, "str.space", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5089 1 : if ((msg = str_repeat(&buf, &buflen, s, l)) != MAL_SUCCEED) {
5090 0 : GDKfree(buf);
5091 0 : return msg;
5092 : }
5093 1 : *res = GDKstrdup(buf);
5094 : }
5095 :
5096 1 : GDKfree(buf);
5097 1 : if (!*res)
5098 0 : msg = createException(MAL, "str.space",
5099 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
5100 : return msg;
5101 : }
5102 :
5103 : static str
5104 4 : STRasciify(str *r, const str *s)
5105 : {
5106 : #ifdef HAVE_ICONV
5107 :
5108 4 : if (strNil(*s)) {
5109 0 : if ((*r = GDKstrdup(str_nil)) == NULL)
5110 0 : throw(MAL, "str.asciify", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5111 : else
5112 : return MAL_SUCCEED;
5113 : }
5114 :
5115 4 : iconv_t cd;
5116 4 : const str f = "UTF-8", t = "ASCII//TRANSLIT";
5117 4 : str in = *s, out;
5118 4 : size_t in_len = strlen(in), out_len = in_len * 4; /* oversized as a single utf8 char could change into multiple ascii char */
5119 :
5120 4 : if ((cd = iconv_open(t, f)) == (iconv_t) (-1))
5121 0 : throw(MAL, "str.asciify", "ICONV: cannot convert from (%s) to (%s).", f, t);
5122 :
5123 4 : if ((*r = out = GDKmalloc(out_len)) == NULL) {
5124 0 : iconv_close(cd);
5125 0 : throw(MAL, "str.asciify", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5126 : }
5127 :
5128 4 : str o = out;
5129 :
5130 4 : if (iconv(cd, &in, &in_len, &o, &out_len) == (size_t) -1) {
5131 0 : GDKfree(out);
5132 0 : *r = NULL;
5133 0 : iconv_close(cd);
5134 0 : throw(MAL, "str.asciify", "Conversion failed, possibly due to system locale %s.", setlocale(0, NULL));
5135 : }
5136 :
5137 4 : *o = '\0';
5138 4 : iconv_close(cd);
5139 4 : return MAL_SUCCEED;
5140 :
5141 : #else
5142 : throw(MAL, "str.asciify", "ICONV library not available.");
5143 : #endif
5144 : }
5145 :
5146 : static inline void
5147 154 : BBPnreclaim(int nargs, ...)
5148 : {
5149 154 : va_list valist;
5150 154 : va_start(valist, nargs);
5151 669 : for (int i = 0; i < nargs; i++) {
5152 514 : BAT *b = va_arg(valist, BAT *);
5153 824 : BBPreclaim(b);
5154 : }
5155 155 : va_end(valist);
5156 155 : }
5157 :
5158 : /* scan select loop with or without candidates */
5159 : #define scanloop(TEST, KEEP_NULLS) \
5160 : do { \
5161 : TRC_DEBUG(ALGO, \
5162 : "scanselect(b=%s#"BUNFMT",anti=%d): " \
5163 : "scanselect %s\n", BATgetId(b), BATcount(b), \
5164 : anti, #TEST); \
5165 : if (!s || BATtdense(s)) { \
5166 : for (; p < q; p++) { \
5167 : GDK_CHECK_TIMEOUT(timeoffset, counter, \
5168 : GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
5169 : const char *restrict v = BUNtvar(bi, p - off); \
5170 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5171 : vals[cnt++] = p; \
5172 : } \
5173 : } else { \
5174 : for (; p < ncands; p++) { \
5175 : GDK_CHECK_TIMEOUT(timeoffset, counter, \
5176 : GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
5177 : oid o = canditer_next(ci); \
5178 : const char *restrict v = BUNtvar(bi, o - off); \
5179 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5180 : vals[cnt++] = o; \
5181 : } \
5182 : } \
5183 : } while (0)
5184 :
5185 : /* scan select loop with or without candidates */
5186 : #define scanloop_anti(TEST, KEEP_NULLS) \
5187 : do { \
5188 : TRC_DEBUG(ALGO, \
5189 : "scanselect(b=%s#"BUNFMT",anti=%d): " \
5190 : "scanselect %s\n", BATgetId(b), BATcount(b), \
5191 : anti, #TEST); \
5192 : if (!s || BATtdense(s)) { \
5193 : for (; p < q; p++) { \
5194 : GDK_CHECK_TIMEOUT(timeoffset, counter, \
5195 : GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
5196 : const char *restrict v = BUNtvar(bi, p - off); \
5197 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5198 : vals[cnt++] = p; \
5199 : } \
5200 : } else { \
5201 : for (; p < ncands; p++) { \
5202 : GDK_CHECK_TIMEOUT(timeoffset, counter, \
5203 : GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
5204 : oid o = canditer_next(ci); \
5205 : const char *restrict v = BUNtvar(bi, o - off); \
5206 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5207 : vals[cnt++] = o; \
5208 : } \
5209 : } \
5210 : } while (0)
5211 :
5212 : static str
5213 18 : str_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
5214 : BUN *rcnt, const char *key, bool anti,
5215 : int (*str_cmp)(const char *, const char *, int),
5216 : bool keep_nulls)
5217 : {
5218 18 : if (strNil(key))
5219 : return MAL_SUCCEED;
5220 :
5221 18 : BATiter bi = bat_iterator(b);
5222 18 : BUN cnt = 0, ncands = ci->ncand;
5223 18 : oid off = b->hseqbase, *restrict vals = Tloc(bn, 0);
5224 18 : str msg = MAL_SUCCEED;
5225 18 : int klen = str_strlen(key);
5226 :
5227 18 : size_t counter = 0;
5228 18 : lng timeoffset = 0;
5229 18 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
5230 18 : if (qry_ctx != NULL)
5231 18 : timeoffset = (qry_ctx->starttime
5232 18 : && qry_ctx->querytimeout) ? (qry_ctx->starttime +
5233 18 : qry_ctx->querytimeout) : 0;
5234 :
5235 18 : if (anti) /* keep nulls ? (use false for now) */
5236 0 : scanloop_anti(!strNil(v) && str_cmp(v, key, klen) != 0, keep_nulls);
5237 : else
5238 54 : scanloop(!strNil(v) && str_cmp(v, key, klen) == 0, keep_nulls);
5239 :
5240 0 : bailout:
5241 18 : bat_iterator_end(&bi);
5242 18 : *rcnt = cnt;
5243 18 : return msg;
5244 : }
5245 :
5246 : static str
5247 18 : STRselect(bat *r_id, const bat *b_id, const bat *cb_id, const char *key,
5248 : const bit anti, int (*str_cmp)(const char *, const char *, int),
5249 : const str fname)
5250 : {
5251 18 : str msg = MAL_SUCCEED;
5252 :
5253 18 : BAT *b, *cb = NULL, *r = NULL, *old_s = NULL;;
5254 18 : BUN p = 0, q = 0, rcnt = 0;
5255 18 : struct canditer ci;
5256 18 : bool with_strimps = false,
5257 18 : with_strimps_anti = false;
5258 :
5259 18 : if (!(b = BATdescriptor(*b_id)))
5260 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
5261 :
5262 18 : if (cb_id && !is_bat_nil(*cb_id) && !(cb = BATdescriptor(*cb_id))) {
5263 0 : BBPreclaim(b);
5264 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
5265 : }
5266 :
5267 18 : assert(ATOMstorage(b->ttype) == TYPE_str);
5268 :
5269 18 : if (BAThasstrimps(b)) {
5270 0 : if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
5271 0 : BAT *tmp_s = STRMPfilter(b, cb, key, anti);
5272 0 : if (tmp_s) {
5273 0 : old_s = cb;
5274 0 : cb = tmp_s;
5275 0 : if (!anti)
5276 : with_strimps = true;
5277 : else
5278 0 : with_strimps_anti = true;
5279 : }
5280 : } else {
5281 0 : GDKclrerr();
5282 : }
5283 : }
5284 :
5285 36 : MT_thread_setalgorithm(with_strimps ?
5286 18 : "string_select: strcmp function using strimps" :
5287 : (with_strimps_anti ?
5288 : "string_select: strcmp function using strimps anti"
5289 : : "string_select: strcmp function with no accelerator"));
5290 :
5291 18 : canditer_init(&ci, b, cb);
5292 18 : if (!(r = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
5293 0 : BBPnreclaim(2, b, cb);
5294 0 : throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
5295 : }
5296 :
5297 18 : if (!cb || BATtdense(cb)) {
5298 0 : if (cb) {
5299 18 : assert(BATtdense(cb));
5300 18 : p = (BUN) cb->tseqbase;
5301 18 : q = p + BATcount(cb);
5302 18 : if ((oid) p < b->hseqbase)
5303 : p = b->hseqbase;
5304 18 : if ((oid) q > b->hseqbase + BATcount(b))
5305 : q = b->hseqbase + BATcount(b);
5306 : } else {
5307 0 : p = b->hseqbase;
5308 0 : q = BATcount(b) + b->hseqbase;
5309 : }
5310 : }
5311 :
5312 36 : msg = str_select(r, b, cb, &ci, p, q, &rcnt, key, anti
5313 18 : && !with_strimps_anti, str_cmp, with_strimps_anti);
5314 :
5315 18 : if (!msg) {
5316 18 : BATsetcount(r, rcnt);
5317 18 : r->tsorted = true;
5318 18 : r->trevsorted = r->batCount <= 1;
5319 18 : r->tkey = true;
5320 18 : r->tnil = false;
5321 18 : r->tnonil = true;
5322 36 : r->tseqbase = rcnt == 0 ?
5323 18 : 0 : rcnt == 1 ?
5324 8 : *(const oid *) Tloc(r, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
5325 :
5326 18 : if (with_strimps_anti) {
5327 0 : BAT *rev;
5328 0 : if (old_s) {
5329 0 : rev = BATdiffcand(old_s, r);
5330 : #ifndef NDEBUG
5331 0 : BAT *is = BATintersectcand(old_s, r);
5332 0 : if (is) {
5333 0 : assert(is->batCount == r->batCount);
5334 0 : BBPreclaim(is);
5335 : }
5336 0 : assert(rev->batCount == old_s->batCount - r->batCount);
5337 : #endif
5338 : } else
5339 0 : rev = BATnegcands(b->batCount, r);
5340 :
5341 0 : BBPreclaim(r);
5342 0 : r = rev;
5343 0 : if (r == NULL)
5344 0 : msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
5345 : }
5346 : }
5347 :
5348 18 : if (r && !msg) {
5349 18 : *r_id = r->batCacheid;
5350 18 : BBPkeepref(r);
5351 : } else {
5352 0 : BBPreclaim(r);
5353 : }
5354 :
5355 18 : BBPnreclaim(3, b, cb, old_s);
5356 18 : return msg;
5357 : }
5358 :
5359 : #define STRSELECT_MAPARGS(STK, PCI, R_ID, B_ID, CB_ID, KEY, ICASE, ANTI) \
5360 : do { \
5361 : R_ID = getArgReference(STK, PCI, 0); \
5362 : B_ID = getArgReference(STK, PCI, 1); \
5363 : CB_ID = getArgReference(STK, PCI, 2); \
5364 : KEY = *getArgReference_str(STK, PCI, 3); \
5365 : ICASE = PCI->argc != 5; \
5366 : ANTI = PCI->argc == 5 ? *getArgReference_bit(STK, PCI, 4) : \
5367 : *getArgReference_bit(STK, PCI, 5); \
5368 : } while (0)
5369 :
5370 : /**
5371 : * @r_id: result oid
5372 : * @b_id: input bat oid
5373 : * @cb_id: input bat candidates oid
5374 : * @key: input string
5375 : * @icase: ignore case
5376 : * @anti: anti join
5377 : */
5378 : static str
5379 6 : STRstartswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
5380 : {
5381 6 : (void) cntxt;
5382 6 : (void) mb;
5383 :
5384 6 : bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
5385 6 : char *key = NULL;
5386 6 : bit icase = 0, anti = 0;
5387 :
5388 6 : STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
5389 6 : return STRselect(r_id, b_id, cb_id, key, anti,
5390 : icase ? str_is_iprefix : str_is_prefix, "str.startswithselect");
5391 : }
5392 :
5393 : /**
5394 : * @r_id: result oid
5395 : * @b_id: input bat oid
5396 : * @cb_id: input bat candidates oid
5397 : * @key: input string
5398 : * @icase: ignore case
5399 : * @anti: anti join
5400 : */
5401 : static str
5402 6 : STRendswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
5403 : {
5404 6 : (void) cntxt;
5405 6 : (void) mb;
5406 :
5407 6 : bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
5408 6 : char *key = NULL;
5409 6 : bit icase = 0, anti = 0;
5410 :
5411 6 : STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
5412 6 : return STRselect(r_id, b_id, cb_id, key, anti,
5413 : icase ? str_is_isuffix : str_is_suffix, "str.endswithselect");
5414 : }
5415 :
5416 : /**
5417 : * @r_id: result oid
5418 : * @b_id: input bat oid
5419 : * @cb_id: input bat candidates oid
5420 : * @key: input string
5421 : * @icase: ignore case
5422 : * @anti: anti join
5423 : */
5424 : static str
5425 6 : STRcontainsselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
5426 : {
5427 6 : (void) cntxt;
5428 6 : (void) mb;
5429 :
5430 6 : bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
5431 6 : char *key = NULL;
5432 6 : bit icase = 0, anti = 0;
5433 :
5434 6 : STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
5435 6 : return STRselect(r_id, b_id, cb_id, key, anti,
5436 : icase ? str_icontains : str_contains, "str.containsselect");
5437 : }
5438 :
5439 : #define APPEND(b, o) (((oid *) b->theap->base)[b->batCount++] = (o))
5440 : #define VALUE(s, x) (s##vars + VarHeapVal(s##vals, (x), s##i.width))
5441 :
5442 : #define set_empty_bat_props(B) \
5443 : do { \
5444 : B->tnil = false; \
5445 : B->tnonil = true; \
5446 : B->tkey = true; \
5447 : B->tsorted = true; \
5448 : B->trevsorted = true; \
5449 : B->tseqbase = 0; \
5450 : } while (0)
5451 :
5452 : #define CONTAINS_JOIN_LOOP(STR_CMP, STR_LEN) \
5453 : do { \
5454 : canditer_init(&rci, r, cr); \
5455 : for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \
5456 : BAT *filtered_sl = NULL; \
5457 : GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
5458 : ro = canditer_next(&rci); \
5459 : vr = VALUE(r, ro - rbase); \
5460 : matches = 0; \
5461 : if (!strNil(vr)) { \
5462 : vr_len = STR_LEN; \
5463 : if (with_strimps) \
5464 : filtered_sl = STRMPfilter(l, cl, vr, anti); \
5465 : if (filtered_sl) \
5466 : canditer_init(&lci, l, filtered_sl); \
5467 : else \
5468 : canditer_init(&lci, l, cl); \
5469 : for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \
5470 : lo = canditer_next(&lci); \
5471 : vl = VALUE(l, lo - lbase); \
5472 : if (strNil(vl)) \
5473 : continue; \
5474 : if (STR_CMP) \
5475 : continue; \
5476 : if (BATcount(rl) == BATcapacity(rl)) { \
5477 : newcap = BATgrows(rl); \
5478 : BATsetcount(rl, BATcount(rl)); \
5479 : if (rr) \
5480 : BATsetcount(rr, BATcount(rr)); \
5481 : if (BATextend(rl, newcap) != GDK_SUCCEED || \
5482 : (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
5483 : msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
5484 : goto exit; \
5485 : } \
5486 : assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
5487 : } \
5488 : if (BATcount(rl) > 0) { \
5489 : if (lastl + 1 != lo) \
5490 : rl->tseqbase = oid_nil; \
5491 : if (matches == 0) { \
5492 : if (rr) \
5493 : rr->trevsorted = false; \
5494 : if (lastl > lo) { \
5495 : rl->tsorted = false; \
5496 : rl->tkey = false; \
5497 : } else if (lastl < lo) { \
5498 : rl->trevsorted = false; \
5499 : } else { \
5500 : rl->tkey = false; \
5501 : } \
5502 : } \
5503 : } \
5504 : APPEND(rl, lo); \
5505 : if (rr) \
5506 : APPEND(rr, ro); \
5507 : lastl = lo; \
5508 : matches++; \
5509 : } \
5510 : BBPreclaim(filtered_sl); \
5511 : } \
5512 : if (rr) { \
5513 : if (matches > 1) { \
5514 : rr->tkey = false; \
5515 : rr->tseqbase = oid_nil; \
5516 : rl->trevsorted = false; \
5517 : } else if (matches == 0) { \
5518 : rskipped = BATcount(rr) > 0; \
5519 : } else if (rskipped) { \
5520 : rr->tseqbase = oid_nil; \
5521 : } \
5522 : } else if (matches > 1) { \
5523 : rl->trevsorted = false; \
5524 : } \
5525 : } \
5526 : } while (0)
5527 :
5528 : #define STR_JOIN_NESTED_LOOP(STR_CMP, STR_LEN, FNAME) \
5529 : do { \
5530 : canditer_init(&rci, r, cr); \
5531 : for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \
5532 : GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
5533 : ro = canditer_next(&rci); \
5534 : vr = VALUE(r, ro - rbase); \
5535 : matches = 0; \
5536 : if (!strNil(vr)) { \
5537 : vr_len = STR_LEN; \
5538 : canditer_init(&lci, l, cl); \
5539 : for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \
5540 : lo = canditer_next(&lci); \
5541 : vl = VALUE(l, lo - lbase); \
5542 : if (strNil(vl)) \
5543 : continue; \
5544 : if (!(STR_CMP)) \
5545 : continue; \
5546 : if (BATcount(rl) == BATcapacity(rl)) { \
5547 : newcap = BATgrows(rl); \
5548 : BATsetcount(rl, BATcount(rl)); \
5549 : if (rr) \
5550 : BATsetcount(rr, BATcount(rr)); \
5551 : if (BATextend(rl, newcap) != GDK_SUCCEED || \
5552 : (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
5553 : msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
5554 : goto exit; \
5555 : } \
5556 : assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
5557 : } \
5558 : if (BATcount(rl) > 0) { \
5559 : if (last_lo + 1 != lo) \
5560 : rl->tseqbase = oid_nil; \
5561 : if (matches == 0) { \
5562 : if (rr) \
5563 : rr->trevsorted = false; \
5564 : if (last_lo > lo) { \
5565 : rl->tsorted = false; \
5566 : rl->tkey = false; \
5567 : } else if (last_lo < lo) { \
5568 : rl->trevsorted = false; \
5569 : } else { \
5570 : rl->tkey = false; \
5571 : } \
5572 : } \
5573 : } \
5574 : APPEND(rl, lo); \
5575 : if (rr) \
5576 : APPEND(rr, ro); \
5577 : last_lo = lo; \
5578 : matches++; \
5579 : } \
5580 : } \
5581 : if (rr) { \
5582 : if (matches > 1) { \
5583 : rr->tkey = false; \
5584 : rr->tseqbase = oid_nil; \
5585 : rl->trevsorted = false; \
5586 : } else if (matches == 0) { \
5587 : rskipped = BATcount(rr) > 0; \
5588 : } else if (rskipped) { \
5589 : rr->tseqbase = oid_nil; \
5590 : } \
5591 : } else if (matches > 1) { \
5592 : rl->trevsorted = false; \
5593 : } \
5594 : } \
5595 : } while (0)
5596 :
5597 : #define STARTSWITH_SORTED_LOOP(STR_CMP, STR_LEN, FNAME) \
5598 : do { \
5599 : canditer_init(&rci, sorted_r, sorted_cr); \
5600 : canditer_init(&lci, sorted_l, sorted_cl); \
5601 : for (lx = 0; lx < lci.ncand; lx++) { \
5602 : lo = canditer_next(&lci); \
5603 : vl = VALUE(l, lo - lbase); \
5604 : if (!strNil(vl)) \
5605 : break; \
5606 : } \
5607 : for (rx = 0; rx < rci.ncand; rx++) { \
5608 : ro = canditer_next(&rci); \
5609 : vr = VALUE(r, ro - rbase); \
5610 : if (!strNil(vr)) { \
5611 : canditer_setidx(&rci, rx); \
5612 : break; \
5613 : } \
5614 : } \
5615 : for (; rx < rci.ncand; rx++) { \
5616 : GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
5617 : ro = canditer_next(&rci); \
5618 : vr = VALUE(r, ro - rbase); \
5619 : vr_len = STR_LEN; \
5620 : matches = 0; \
5621 : for (canditer_setidx(&lci, lx), n = lx; n < lci.ncand; n++) { \
5622 : lo = canditer_next_dense(&lci); \
5623 : vl = VALUE(l, lo - lbase); \
5624 : cmp = STR_CMP; \
5625 : if (cmp < 0) { \
5626 : lx++; \
5627 : continue; \
5628 : } \
5629 : else if (cmp > 0) \
5630 : break; \
5631 : if (BATcount(rl) == BATcapacity(rl)) { \
5632 : newcap = BATgrows(rl); \
5633 : BATsetcount(rl, BATcount(rl)); \
5634 : if (rr) \
5635 : BATsetcount(rr, BATcount(rr)); \
5636 : if (BATextend(rl, newcap) != GDK_SUCCEED || \
5637 : (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
5638 : msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
5639 : goto exit; \
5640 : } \
5641 : assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
5642 : } \
5643 : if (BATcount(rl) > 0) { \
5644 : if (last_lo + 1 != lo) \
5645 : rl->tseqbase = oid_nil; \
5646 : if (matches == 0) { \
5647 : if (rr) \
5648 : rr->trevsorted = false; \
5649 : if (last_lo > lo) { \
5650 : rl->tsorted = false; \
5651 : rl->tkey = false; \
5652 : } else if (last_lo < lo) { \
5653 : rl->trevsorted = false; \
5654 : } else { \
5655 : rl->tkey = false; \
5656 : } \
5657 : } \
5658 : } \
5659 : APPEND(rl, lo); \
5660 : if (rr) \
5661 : APPEND(rr, ro); \
5662 : last_lo = lo; \
5663 : matches++; \
5664 : } \
5665 : if (rr) { \
5666 : if (matches > 1) { \
5667 : rr->tkey = false; \
5668 : rr->tseqbase = oid_nil; \
5669 : rl->trevsorted = false; \
5670 : } else if (matches == 0) { \
5671 : rskipped = BATcount(rr) > 0; \
5672 : } else if (rskipped) { \
5673 : rr->tseqbase = oid_nil; \
5674 : } \
5675 : } else if (matches > 1) { \
5676 : rl->trevsorted = false; \
5677 : } \
5678 : } \
5679 : } while (0)
5680 :
5681 : static void
5682 533 : do_strrev(char *dst, const char *src, size_t len)
5683 : {
5684 533 : dst[len] = 0;
5685 533 : if (strNil(src)) {
5686 8 : assert(len == strlen(str_nil));
5687 8 : strcpy(dst, str_nil);
5688 8 : return;
5689 : }
5690 4321 : while (*src) {
5691 3796 : if ((*src & 0xF8) == 0xF0) {
5692 0 : assert(len >= 4);
5693 0 : dst[len - 4] = *src++;
5694 0 : assert((*src & 0xC0) == 0x80);
5695 0 : dst[len - 3] = *src++;
5696 0 : assert((*src & 0xC0) == 0x80);
5697 0 : dst[len - 2] = *src++;
5698 0 : assert((*src & 0xC0) == 0x80);
5699 0 : dst[len - 1] = *src++;
5700 0 : len -= 4;
5701 3796 : } else if ((*src & 0xF0) == 0xE0) {
5702 0 : assert(len >= 3);
5703 0 : dst[len - 3] = *src++;
5704 0 : assert((*src & 0xC0) == 0x80);
5705 0 : dst[len - 2] = *src++;
5706 0 : assert((*src & 0xC0) == 0x80);
5707 0 : dst[len - 1] = *src++;
5708 0 : len -= 3;
5709 3796 : } else if ((*src & 0xE0) == 0xC0) {
5710 0 : assert(len >= 2);
5711 0 : dst[len - 2] = *src++;
5712 0 : assert((*src & 0xC0) == 0x80);
5713 0 : dst[len - 1] = *src++;
5714 0 : len -= 2;
5715 : } else {
5716 3796 : assert(len >= 1);
5717 3796 : assert((*src & 0x80) == 0);
5718 3796 : dst[--len] = *src++;
5719 : }
5720 : }
5721 525 : assert(len == 0);
5722 : }
5723 :
5724 : static BAT *
5725 24 : batstr_strrev(BAT *b)
5726 : {
5727 24 : BAT *bn = NULL;
5728 24 : BATiter bi;
5729 24 : BUN p, q;
5730 24 : const char *src;
5731 24 : size_t len;
5732 24 : char *dst;
5733 24 : size_t dstlen;
5734 :
5735 24 : dstlen = 1024;
5736 24 : dst = GDKmalloc(dstlen);
5737 24 : if (dst == NULL)
5738 : return NULL;
5739 :
5740 24 : assert(b->ttype == TYPE_str);
5741 :
5742 24 : bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
5743 24 : if (bn == NULL) {
5744 0 : GDKfree(dst);
5745 0 : return NULL;
5746 : }
5747 :
5748 24 : bi = bat_iterator(b);
5749 559 : BATloop(b, p, q) {
5750 535 : src = (const char *) BUNtail(bi, p);
5751 533 : len = strlen(src);
5752 533 : if (len >= dstlen) {
5753 0 : char *ndst;
5754 0 : dstlen = len + 1024;
5755 0 : ndst = GDKrealloc(dst, dstlen);
5756 0 : if (ndst == NULL) {
5757 0 : bat_iterator_end(&bi);
5758 0 : BBPreclaim(bn);
5759 0 : GDKfree(dst);
5760 0 : return NULL;
5761 : }
5762 : dst = ndst;
5763 : }
5764 533 : do_strrev(dst, src, len);
5765 535 : if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
5766 0 : bat_iterator_end(&bi);
5767 0 : BBPreclaim(bn);
5768 0 : GDKfree(dst);
5769 0 : return NULL;
5770 : }
5771 : }
5772 :
5773 24 : bat_iterator_end(&bi);
5774 24 : GDKfree(dst);
5775 24 : return bn;
5776 : }
5777 :
5778 : static BAT *
5779 18 : batstr_strlower(BAT *b)
5780 : {
5781 18 : BAT *bn = NULL;
5782 18 : BATiter bi;
5783 18 : BUN p, q;
5784 :
5785 18 : assert(b->ttype == TYPE_str);
5786 :
5787 18 : bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
5788 18 : if (bn == NULL)
5789 : return NULL;
5790 :
5791 18 : bi = bat_iterator(b);
5792 98 : BATloop(b, p, q) {
5793 80 : str vb = BUNtail(bi, p), vb_low = NULL;
5794 80 : if (STRlower(&vb_low, &vb)) {
5795 0 : bat_iterator_end(&bi);
5796 0 : BBPreclaim(bn);
5797 0 : return NULL;
5798 : }
5799 80 : if (BUNappend(bn, vb_low, false) != GDK_SUCCEED) {
5800 0 : GDKfree(vb_low);
5801 0 : bat_iterator_end(&bi);
5802 0 : BBPreclaim(bn);
5803 0 : return NULL;
5804 : }
5805 79 : GDKfree(vb_low);
5806 : }
5807 18 : bat_iterator_end(&bi);
5808 18 : return bn;
5809 : }
5810 :
5811 : static str
5812 14 : str_join_nested(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr,
5813 : bit anti, int (*str_cmp)(const char *, const char *, int), str fname)
5814 : {
5815 14 : str msg = MAL_SUCCEED;
5816 :
5817 14 : lng timeoffset = 0;
5818 14 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
5819 14 : if (qry_ctx != NULL)
5820 14 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
5821 14 : (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
5822 :
5823 14 : TRC_DEBUG(ALGO,
5824 : "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
5825 : "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
5826 : "sr=%s#" BUNFMT "%s%s)\n",
5827 : fname, "nested loop",
5828 : BATgetId(l), BATcount(l), ATOMname(l->ttype),
5829 : l->tsorted ? "-sorted" : "",
5830 : l->trevsorted ? "-revsorted" : "",
5831 : BATgetId(r), BATcount(r), ATOMname(r->ttype),
5832 : r->tsorted ? "-sorted" : "",
5833 : r->trevsorted ? "-revsorted" : "",
5834 : cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
5835 : cl && cl->tsorted ? "-sorted" : "",
5836 : cl && cl->trevsorted ? "-revsorted" : "",
5837 : cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
5838 : cr && cr->tsorted ? "-sorted" : "",
5839 : cr && cr->trevsorted ? "-revsorted" : "");
5840 :
5841 42 : assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
5842 14 : assert(ATOMtype(l->ttype) == TYPE_str);
5843 :
5844 14 : BATiter li = bat_iterator(l);
5845 14 : BATiter ri = bat_iterator(r);
5846 14 : assert(ri.vh && r->ttype);
5847 :
5848 14 : struct canditer lci, rci;
5849 14 : oid lbase = l->hseqbase,
5850 14 : rbase = r->hseqbase,
5851 14 : lo, ro, last_lo = 0;
5852 14 : const char *lvals = (const char *) li.base,
5853 14 : *rvals = (const char *) ri.base,
5854 14 : *lvars = li.vh->base,
5855 14 : *rvars = ri.vh->base,
5856 : *vl, *vr;
5857 14 : BUN matches, newcap;
5858 14 : int rskipped = 0, vr_len = 0;
5859 14 : size_t counter = 0;
5860 :
5861 14 : if (anti)
5862 0 : STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) != 0), str_strlen(vr), fname);
5863 : else
5864 734 : STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) == 0), str_strlen(vr), fname);
5865 :
5866 14 : assert(!rr || BATcount(rl) == BATcount(rr));
5867 14 : BATsetcount(rl, BATcount(rl));
5868 14 : if (rr)
5869 14 : BATsetcount(rr, BATcount(rr));
5870 :
5871 14 : if (BATcount(rl) > 0) {
5872 13 : if (BATtdense(rl))
5873 2 : rl->tseqbase = ((oid *) rl->theap->base)[0];
5874 13 : if (rr && BATtdense(rr))
5875 4 : rr->tseqbase = ((oid *) rr->theap->base)[0];
5876 : } else {
5877 1 : rl->tseqbase = 0;
5878 1 : if (rr)
5879 1 : rr->tseqbase = 0;
5880 : }
5881 :
5882 14 : TRC_DEBUG(ALGO,
5883 : "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
5884 : fname,
5885 : BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
5886 : rl->tsorted ? "-sorted" : "",
5887 : rl->trevsorted ? "-revsorted" : "",
5888 : rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
5889 : rr && rr->tsorted ? "-sorted" : "",
5890 : rr && rr->trevsorted ? "-revsorted" : "");
5891 :
5892 14 : exit:
5893 14 : bat_iterator_end(&li);
5894 14 : bat_iterator_end(&ri);
5895 14 : return msg;
5896 : }
5897 :
5898 : static str
5899 32 : contains_join(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr, bit anti,
5900 : int (*str_cmp)(const char *, const char *, int), const str fname)
5901 : {
5902 32 : str msg = MAL_SUCCEED;
5903 :
5904 32 : lng timeoffset = 0;
5905 32 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
5906 32 : if (qry_ctx != NULL)
5907 32 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
5908 32 : (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
5909 :
5910 32 : TRC_DEBUG(ALGO,
5911 : "(%s, l=%s#" BUNFMT "[%s]%s%s,"
5912 : "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
5913 : "sr=%s#" BUNFMT "%s%s)\n",
5914 : fname,
5915 : BATgetId(l), BATcount(l), ATOMname(l->ttype),
5916 : l->tsorted ? "-sorted" : "",
5917 : l->trevsorted ? "-revsorted" : "",
5918 : BATgetId(r), BATcount(r), ATOMname(r->ttype),
5919 : r->tsorted ? "-sorted" : "",
5920 : r->trevsorted ? "-revsorted" : "",
5921 : cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
5922 : cl && cl->tsorted ? "-sorted" : "",
5923 : cl && cl->trevsorted ? "-revsorted" : "",
5924 : cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
5925 : cr && cr->tsorted ? "-sorted" : "",
5926 : cr && cr->trevsorted ? "-revsorted" : "");
5927 :
5928 32 : bool with_strimps = false;
5929 :
5930 32 : if (BAThasstrimps(l)) {
5931 8 : with_strimps = true;
5932 8 : if (STRMPcreate(l, NULL) != GDK_SUCCEED) {
5933 0 : GDKclrerr();
5934 0 : with_strimps = false;
5935 : }
5936 : }
5937 :
5938 96 : assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
5939 32 : assert(ATOMtype(l->ttype) == TYPE_str);
5940 :
5941 32 : BATiter li = bat_iterator(l);
5942 32 : BATiter ri = bat_iterator(r);
5943 32 : assert(ri.vh && r->ttype);
5944 :
5945 32 : struct canditer lci, rci;
5946 32 : oid lbase = l->hseqbase,
5947 32 : rbase = r->hseqbase,
5948 32 : lo, ro, lastl = 0;
5949 32 : const char *lvals = (const char *) li.base,
5950 32 : *rvals = (const char *) ri.base,
5951 32 : *lvars = li.vh->base,
5952 32 : *rvars = ri.vh->base,
5953 : *vl, *vr;
5954 32 : int rskipped = 0, vr_len = 0;
5955 32 : BUN matches, newcap;
5956 32 : size_t counter = 0;
5957 :
5958 32 : if (anti)
5959 0 : CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) == 0, str_strlen(vr));
5960 : else
5961 30207 : CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr));
5962 :
5963 32 : assert(!rr || BATcount(rl) == BATcount(rr));
5964 32 : BATsetcount(rl, BATcount(rl));
5965 32 : if (rr)
5966 32 : BATsetcount(rr, BATcount(rr));
5967 32 : if (BATcount(rl) > 0) {
5968 29 : if (BATtdense(rl))
5969 7 : rl->tseqbase = ((oid *) rl->theap->base)[0];
5970 29 : if (rr && BATtdense(rr))
5971 5 : rr->tseqbase = ((oid *) rr->theap->base)[0];
5972 : } else {
5973 3 : rl->tseqbase = 0;
5974 3 : if (rr)
5975 3 : rr->tseqbase = 0;
5976 : }
5977 :
5978 32 : TRC_DEBUG(ALGO,
5979 : "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
5980 : fname,
5981 : BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
5982 : rl->tsorted ? "-sorted" : "",
5983 : rl->trevsorted ? "-revsorted" : "",
5984 : rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
5985 : rr && rr->tsorted ? "-sorted" : "",
5986 : rr && rr->trevsorted ? "-revsorted" : "");
5987 32 : exit:
5988 32 : bat_iterator_end(&li);
5989 32 : bat_iterator_end(&ri);
5990 32 : return msg;
5991 : }
5992 :
5993 : static str
5994 26 : startswith_join(BAT **rl_ptr, BAT **rr_ptr, BAT *l, BAT *r, BAT *cl, BAT *cr,
5995 : bit anti, int (*str_cmp)(const char *, const char *, int), str fname)
5996 : {
5997 26 : str msg = MAL_SUCCEED;
5998 26 : gdk_return rc;
5999 :
6000 26 : lng timeoffset = 0;
6001 26 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
6002 26 : if (qry_ctx != NULL)
6003 26 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ?
6004 26 : (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
6005 :
6006 26 : assert(*rl_ptr && *rr_ptr);
6007 :
6008 26 : BAT *sorted_l = NULL, *sorted_r = NULL,
6009 26 : *sorted_cl = NULL, *sorted_cr = NULL,
6010 26 : *ord_sorted_l = NULL, *ord_sorted_r = NULL,
6011 26 : *proj_rl = NULL, *proj_rr = NULL,
6012 26 : *rl = *rl_ptr, *rr = *rr_ptr;
6013 :
6014 26 : TRC_DEBUG(ALGO,
6015 : "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
6016 : "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
6017 : "sr=%s#" BUNFMT "%s%s)\n",
6018 : fname, "sorted inputs",
6019 : BATgetId(l), BATcount(l), ATOMname(l->ttype),
6020 : l->tsorted ? "-sorted" : "",
6021 : l->trevsorted ? "-revsorted" : "",
6022 : BATgetId(r), BATcount(r), ATOMname(r->ttype),
6023 : r->tsorted ? "-sorted" : "",
6024 : r->trevsorted ? "-revsorted" : "",
6025 : cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
6026 : cl && cl->tsorted ? "-sorted" : "",
6027 : cl && cl->trevsorted ? "-revsorted" : "",
6028 : cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
6029 : cr && cr->tsorted ? "-sorted" : "",
6030 : cr && cr->trevsorted ? "-revsorted" : "");
6031 :
6032 26 : bool l_sorted = BATordered(l);
6033 26 : bool r_sorted = BATordered(r);
6034 :
6035 26 : if (l_sorted == FALSE) {
6036 26 : rc = BATsort(&sorted_l, &ord_sorted_l, NULL,
6037 : l, NULL, NULL, false, false, false);
6038 26 : if (rc != GDK_SUCCEED) {
6039 0 : throw(MAL, fname, "Sorting left input failed");
6040 : } else {
6041 26 : if (cl) {
6042 0 : rc = BATsort(&sorted_cl, NULL, NULL,
6043 : cl, ord_sorted_l, NULL, false, false, false);
6044 0 : if (rc != GDK_SUCCEED) {
6045 0 : BBPnreclaim(2, sorted_l, ord_sorted_l);
6046 0 : throw(MAL, fname, "Sorting left candidates input failed");
6047 : }
6048 : }
6049 : }
6050 : } else {
6051 0 : sorted_l = l;
6052 0 : sorted_cl = cl;
6053 : }
6054 :
6055 26 : if (r_sorted == FALSE) {
6056 18 : rc = BATsort(&sorted_r, &ord_sorted_r, NULL,
6057 : r, NULL, NULL, false, false, false);
6058 18 : if (rc != GDK_SUCCEED) {
6059 0 : BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
6060 0 : throw(MAL, fname, "Sorting right input failed");
6061 : } else {
6062 18 : if (cr) {
6063 0 : rc = BATsort(&sorted_cr, NULL, NULL,
6064 : cr, ord_sorted_r, NULL, false, false, false);
6065 0 : if (rc != GDK_SUCCEED) {
6066 0 : BBPnreclaim(5, sorted_l, ord_sorted_l, sorted_cl, sorted_r, ord_sorted_r);
6067 0 : throw(MAL, fname, "Sorting right candidates input failed");
6068 : }
6069 : }
6070 : }
6071 : } else {
6072 8 : sorted_r = r;
6073 8 : sorted_cr = cr;
6074 : }
6075 :
6076 26 : assert(BATordered(sorted_l) && BATordered(sorted_r));
6077 :
6078 26 : BATiter li = bat_iterator(sorted_l);
6079 26 : BATiter ri = bat_iterator(sorted_r);
6080 26 : assert(ri.vh && r->ttype);
6081 :
6082 26 : struct canditer lci, rci;
6083 26 : oid lbase = sorted_l->hseqbase,
6084 26 : rbase = sorted_r->hseqbase,
6085 26 : lo, ro, last_lo = 0;
6086 26 : const char *lvals = (const char *) li.base,
6087 26 : *rvals = (const char *) ri.base,
6088 26 : *lvars = li.vh->base,
6089 26 : *rvars = ri.vh->base,
6090 : *vl, *vr;
6091 26 : BUN matches, newcap, n = 0, rx = 0, lx = 0;
6092 26 : int rskipped = 0, vr_len = 0, cmp = 0;
6093 26 : size_t counter = 0;
6094 :
6095 26 : if (anti)
6096 0 : STR_JOIN_NESTED_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr), fname);
6097 : else
6098 1325 : STARTSWITH_SORTED_LOOP(str_cmp(vl, vr, vr_len), str_strlen(vr), fname);
6099 :
6100 26 : assert(!rr || BATcount(rl) == BATcount(rr));
6101 26 : BATsetcount(rl, BATcount(rl));
6102 26 : if (rr)
6103 26 : BATsetcount(rr, BATcount(rr));
6104 :
6105 26 : if (BATcount(rl) > 0) {
6106 18 : if (BATtdense(rl))
6107 11 : rl->tseqbase = ((oid *) rl->theap->base)[0];
6108 18 : if (rr && BATtdense(rr))
6109 7 : rr->tseqbase = ((oid *) rr->theap->base)[0];
6110 : } else {
6111 8 : rl->tseqbase = 0;
6112 8 : if (rr)
6113 8 : rr->tseqbase = 0;
6114 : }
6115 :
6116 26 : if (l_sorted == FALSE) {
6117 26 : proj_rl = BATproject(rl, ord_sorted_l);
6118 26 : if (!proj_rl) {
6119 0 : msg = createException(MAL, fname, "Project left pre-sort order failed");
6120 0 : goto exit;
6121 : } else {
6122 26 : BBPreclaim(rl);
6123 26 : *rl_ptr = proj_rl;
6124 : }
6125 : }
6126 :
6127 26 : if (rr && r_sorted == FALSE) {
6128 18 : proj_rr = BATproject(rr, ord_sorted_r);
6129 18 : if (!proj_rr) {
6130 0 : BBPreclaim(proj_rl);
6131 0 : msg = createException(MAL, fname, "Project right pre-sort order failed");
6132 0 : goto exit;
6133 : } else {
6134 18 : BBPreclaim(rr);
6135 18 : *rr_ptr = proj_rr;
6136 : }
6137 : }
6138 :
6139 26 : TRC_DEBUG(ALGO,
6140 : "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
6141 : fname,
6142 : BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
6143 : rl->tsorted ? "-sorted" : "",
6144 : rl->trevsorted ? "-revsorted" : "",
6145 : rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
6146 : rr && rr->tsorted ? "-sorted" : "",
6147 : rr && rr->trevsorted ? "-revsorted" : "");
6148 :
6149 26 : exit:
6150 26 : if (l_sorted == FALSE)
6151 26 : BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
6152 :
6153 26 : if (r_sorted == FALSE)
6154 18 : BBPnreclaim(3, sorted_r, ord_sorted_r, sorted_cr);
6155 :
6156 26 : bat_iterator_end(&li);
6157 26 : bat_iterator_end(&ri);
6158 26 : return msg;
6159 : }
6160 :
6161 : static str
6162 72 : STRjoin(bat *rl_id, bat *rr_id, const bat l_id, const bat r_id,
6163 : const bat cl_id, const bat cr_id, const bit anti, bool icase,
6164 : int (*str_cmp)(const char *, const char *, int), const str fname)
6165 : {
6166 72 : str msg = MAL_SUCCEED;
6167 :
6168 72 : BAT *rl = NULL, *rr = NULL, *l = NULL, *r = NULL, *cl = NULL, *cr = NULL;
6169 :
6170 72 : if (!(l = BATdescriptor(l_id)) || !(r = BATdescriptor(r_id))) {
6171 0 : BBPnreclaim(2, l, r);
6172 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
6173 : }
6174 :
6175 72 : if ((cl_id && !is_bat_nil(cl_id) && (cl = BATdescriptor(cl_id)) == NULL) ||
6176 72 : (cr_id && !is_bat_nil(cr_id) && (cr = BATdescriptor(cr_id)) == NULL)) {
6177 0 : BBPnreclaim(4, l, r, cl, cr);
6178 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
6179 : }
6180 :
6181 72 : rl = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
6182 72 : if (rr_id)
6183 72 : rr = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
6184 :
6185 72 : if (!rl || (rr_id && !rr)) {
6186 0 : BBPnreclaim(6, l, r, cl, cr, rl, rr);
6187 0 : throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
6188 : }
6189 :
6190 72 : set_empty_bat_props(rl);
6191 72 : if (rr_id)
6192 72 : set_empty_bat_props(rr);
6193 :
6194 216 : assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
6195 72 : assert(ATOMtype(l->ttype) == TYPE_str);
6196 :
6197 72 : BAT *nl = l, *nr = r;
6198 :
6199 72 : if (strcmp(fname, "str.containsjoin") == 0) {
6200 32 : msg = contains_join(rl, rr, l, r, cl, cr, anti, str_cmp, fname);
6201 32 : if (msg) {
6202 0 : BBPnreclaim(6, rl, rr, l, r, cl, cr);
6203 0 : return msg;
6204 : }
6205 : } else {
6206 40 : struct canditer lci, rci;
6207 40 : canditer_init(&lci, l, cl);
6208 40 : canditer_init(&rci, r, cr);
6209 40 : BUN lcnt = lci.ncand, rcnt = rci.ncand;
6210 40 : BUN nl_cost = lci.ncand * rci.ncand,
6211 40 : sorted_cost =
6212 40 : (BUN) floor(0.8 * (lcnt*log2((double)lcnt)
6213 40 : + rcnt*log2((double)rcnt)));
6214 :
6215 40 : if (nl_cost < sorted_cost) {
6216 14 : msg = str_join_nested(rl, rr, nl, nr, cl, cr, anti, str_cmp, fname);
6217 : } else {
6218 26 : BAT *l_low = NULL, *r_low = NULL, *l_rev = NULL, *r_rev = NULL;
6219 26 : if (icase) {
6220 9 : l_low = batstr_strlower(nl);
6221 9 : if (l_low == NULL) {
6222 0 : BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
6223 0 : throw(MAL, fname, "Failed lowering strings of left input");
6224 : }
6225 9 : r_low = batstr_strlower(nr);
6226 9 : if (r_low == NULL) {
6227 0 : BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_low);
6228 0 : throw(MAL, fname, "Failed lowering strings of right input");
6229 : }
6230 9 : BBPnreclaim(2, nl, nr);
6231 9 : nl = l_low;
6232 9 : nr = r_low;
6233 : }
6234 26 : if (strcmp(fname, "str.endswithjoin") == 0) {
6235 12 : l_rev = batstr_strrev(nl);
6236 12 : if (l_rev == NULL) {
6237 0 : BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
6238 0 : throw(MAL, fname, "Failed reversing strings of left input");
6239 : }
6240 12 : r_rev = batstr_strrev(nr);
6241 12 : if (r_rev == NULL) {
6242 0 : BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_rev);
6243 0 : throw(MAL, fname, "Failed reversing strings of right input");
6244 : }
6245 12 : BBPnreclaim(2, nl, nr);
6246 12 : nl = l_rev;
6247 12 : nr = r_rev;
6248 : }
6249 26 : msg = startswith_join(&rl, &rr, nl, nr, cl, cr, anti, str_is_prefix, fname);
6250 : }
6251 : }
6252 :
6253 72 : if (!msg) {
6254 72 : *rl_id = rl->batCacheid;
6255 72 : BBPkeepref(rl);
6256 72 : if (rr_id) {
6257 72 : *rr_id = rr->batCacheid;
6258 72 : BBPkeepref(rr);
6259 : }
6260 : } else {
6261 0 : BBPnreclaim(2, rl, rr);
6262 : }
6263 :
6264 72 : BBPnreclaim(4, nl, nr, cl, cr);
6265 72 : return msg;
6266 : }
6267 :
6268 : #define STRJOIN_MAPARGS(STK, PCI, RL_ID, RR_ID, L_ID, R_ID, CL_ID, CR_ID, IC_ID, ANTI) \
6269 : do { \
6270 : RL_ID = getArgReference(STK, PCI, 0); \
6271 : RR_ID = PCI->retc == 1 ? 0 : getArgReference(STK, PCI, 1); \
6272 : int i = PCI->retc == 1 ? 1 : 2; \
6273 : L_ID = getArgReference(STK, PCI, i++); \
6274 : R_ID = getArgReference(STK, PCI, i++); \
6275 : IC_ID = PCI->argc - PCI->retc == 7 ? \
6276 : NULL : getArgReference(stk, pci, i++); \
6277 : CL_ID = getArgReference(STK, PCI, i++); \
6278 : CR_ID = getArgReference(STK, PCI, i++); \
6279 : ANTI = PCI->argc - PCI->retc == 7 ? \
6280 : getArgReference(STK, PCI, 8) : getArgReference(STK, PCI, 9);\
6281 : } while (0)
6282 :
6283 : static inline str
6284 44 : ignorecase(const bat *ic_id, bool *icase, str fname)
6285 : {
6286 44 : BAT *c = NULL;
6287 :
6288 44 : if ((c = BATdescriptor(*ic_id)) == NULL)
6289 0 : throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
6290 :
6291 44 : assert(BATcount(c) == 1);
6292 :
6293 44 : BATiter bi = bat_iterator(c);
6294 44 : *icase = *(bit *) BUNtloc(bi, 0);
6295 44 : bat_iterator_end(&bi);
6296 :
6297 44 : BBPreclaim(c);
6298 44 : return MAL_SUCCEED;
6299 : }
6300 :
6301 : /**
6302 : * @rl_id: result left oid
6303 : * @rr_id: result right oid
6304 : * @l_id: left oid
6305 : * @r_id: right oid
6306 : * @cl_id: candidates left oid
6307 : * @cr_id: candidates right oid
6308 : * @ic_id: ignore case oid
6309 : * @anti: anti join oid
6310 : */
6311 : static str
6312 20 : STRstartswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
6313 : {
6314 20 : (void)cntxt;
6315 20 : (void)mb;
6316 :
6317 20 : str msg = MAL_SUCCEED;
6318 20 : bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
6319 20 : *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
6320 20 : bit *anti = NULL;
6321 20 : bool icase = false;
6322 :
6323 40 : STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
6324 :
6325 20 : if (pci->argc - pci->retc == 8)
6326 16 : msg = ignorecase(ic_id, &icase, "str.startswithjoin");
6327 :
6328 36 : return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
6329 : cl_id ? *cl_id : 0,
6330 : cr_id ? *cr_id : 0,
6331 32 : *anti, icase, icase ? str_is_iprefix : str_is_prefix,
6332 : "str.startswithjoin");
6333 : }
6334 :
6335 : /**
6336 : * @rl_id: result left oid
6337 : * @rr_id: result right oid
6338 : * @l_id: left oid
6339 : * @r_id: right oid
6340 : * @cl_id: candidates left oid
6341 : * @cr_id: candidates right oid
6342 : * @ic_id: ignore case oid
6343 : * @anti: anti join oid
6344 : */
6345 : static str
6346 20 : STRendswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
6347 : {
6348 20 : (void) cntxt;
6349 20 : (void) mb;
6350 :
6351 20 : str msg = MAL_SUCCEED;
6352 20 : bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
6353 20 : *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
6354 20 : bit *anti = NULL;
6355 20 : bool icase = false;
6356 :
6357 40 : STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
6358 :
6359 20 : if (pci->argc - pci->retc == 8)
6360 16 : msg = ignorecase(ic_id, &icase, "str.endswithjoin");
6361 :
6362 36 : return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
6363 : cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
6364 32 : *anti, icase, icase ? str_is_isuffix : str_is_suffix,
6365 : "str.endswithjoin");
6366 : }
6367 :
6368 : /**
6369 : * @rl_id: result left oid
6370 : * @rr_id: result right oid
6371 : * @l_id: left oid
6372 : * @r_id: right oid
6373 : * @cl_id: candidates left oid
6374 : * @cr_id: candidates right oid
6375 : * @ic_id: ignore case oid
6376 : * @anti: anti join oid
6377 : */
6378 : static str
6379 32 : STRcontainsjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
6380 : {
6381 32 : (void) cntxt;
6382 32 : (void) mb;
6383 :
6384 32 : str msg = MAL_SUCCEED;
6385 32 : bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
6386 32 : *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
6387 32 : bit *anti = NULL;
6388 32 : bool icase = false;
6389 :
6390 64 : STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
6391 :
6392 32 : if (pci->argc - pci->retc == 8)
6393 12 : msg = ignorecase(ic_id, &icase, "str.containsjoin");
6394 :
6395 44 : return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
6396 : cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
6397 56 : *anti, icase, icase ? str_icontains : str_contains,
6398 : "str.containsjoin");
6399 : }
6400 :
6401 : #include "mel.h"
6402 : mel_func str_init_funcs[] = {
6403 : command("str", "str", STRtostr, false, "Noop routine.", args(1,2, arg("",str),arg("s",str))),
6404 : command("str", "string", STRTail, false, "Return the tail s[offset..n]\nof a string s[0..n].", args(1,3, arg("",str),arg("s",str),arg("offset",int))),
6405 : command("str", "string3", STRSubString, false, "Return substring s[offset..offset+count] of a string s[0..n]", args(1,4, arg("",str),arg("s",str),arg("offset",int),arg("count",int))),
6406 : command("str", "length", STRLength, false, "Return the length of a string.", args(1,2, arg("",int),arg("s",str))),
6407 : command("str", "nbytes", STRBytes, false, "Return the string length in bytes.", args(1,2, arg("",int),arg("s",str))),
6408 : command("str", "unicodeAt", STRWChrAt, false, "get a unicode character\n(as an int) from a string position.", args(1,3, arg("",int),arg("s",str),arg("index",int))),
6409 : command("str", "unicode", STRFromWChr, false, "convert a unicode to a character.", args(1,2, arg("",str),arg("wchar",int))),
6410 : pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring.", args(1,3, arg("",bit),arg("s",str),arg("prefix",str))),
6411 : pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
6412 : pattern("str", "endswith", STRendswith, false, "Check if string ends with substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
6413 : pattern("str", "endswith", STRendswith, false, "Check if string ends with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
6414 : pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle.", args(1,3, arg("",bit),arg("haystack",str),arg("needle",str))),
6415 : pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle, icase flag.", args(1,4, arg("",bit),arg("haystack",str),arg("needle",str),arg("icase",bit))),
6416 : command("str", "toLower", STRlower, false, "Convert a string to lower case.", args(1,2, arg("",str),arg("s",str))),
6417 : command("str", "toUpper", STRupper, false, "Convert a string to upper case.", args(1,2, arg("",str),arg("s",str))),
6418 : pattern("str", "search", STRstr_search, false, "Search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
6419 : pattern("str", "search", STRstr_search, false, "Search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
6420 : pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
6421 : pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
6422 : command("str", "splitpart", STRsplitpart, false, "Split string on delimiter. Returns\ngiven field (counting from one.)", args(1,4, arg("",str),arg("s",str),arg("needle",str),arg("field",int))),
6423 : command("str", "trim", STRStrip, false, "Strip whitespaces around a string.", args(1,2, arg("",str),arg("s",str))),
6424 : command("str", "ltrim", STRLtrim, false, "Strip whitespaces from start of a string.", args(1,2, arg("",str),arg("s",str))),
6425 : command("str", "rtrim", STRRtrim, false, "Strip whitespaces from end of a string.", args(1,2, arg("",str),arg("s",str))),
6426 : command("str", "trim2", STRStrip2, false, "Remove the longest string containing only characters from the second string around the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
6427 : command("str", "ltrim2", STRLtrim2, false, "Remove the longest string containing only characters from the second string from the start of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
6428 : command("str", "rtrim2", STRRtrim2, false, "Remove the longest string containing only characters from the second string from the end of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
6429 : command("str", "lpad", STRLpad, false, "Fill up a string to the given length prepending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
6430 : command("str", "rpad", STRRpad, false, "Fill up a string to the given length appending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
6431 : command("str", "lpad3", STRLpad3, false, "Fill up the first string to the given length prepending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
6432 : command("str", "rpad3", STRRpad3, false, "Fill up the first string to the given length appending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
6433 : command("str", "substitute", STRSubstitute, false, "Substitute first occurrence of 'src' by\n'dst'. Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.", args(1,5, arg("",str),arg("s",str),arg("src",str),arg("dst",str),arg("rep",bit))),
6434 : command("str", "like", STRlikewrap, false, "SQL pattern match function", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
6435 : command("str", "like3", STRlikewrap3, false, "SQL pattern match function", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
6436 : command("str", "ascii", STRascii, false, "Return unicode of head of string", args(1,2, arg("",int),arg("s",str))),
6437 : command("str", "substring", STRsubstringTail, false, "Extract the tail of a string", args(1,3, arg("",str),arg("s",str),arg("start",int))),
6438 : command("str", "substring3", STRsubstring, false, "Extract a substring from str starting at start, for length len", args(1,4, arg("",str),arg("s",str),arg("start",int),arg("len",int))),
6439 : command("str", "prefix", STRprefix, false, "Extract the prefix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6440 : command("str", "suffix", STRsuffix, false, "Extract the suffix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6441 : command("str", "stringleft", STRprefix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6442 : command("str", "stringright", STRsuffix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6443 : command("str", "locate", STRlocate, false, "Locate the start position of a string", args(1,3, arg("",int),arg("s1",str),arg("s2",str))),
6444 : command("str", "locate3", STRlocate3, false, "Locate the start position of a string", args(1,4, arg("",int),arg("s1",str),arg("s2",str),arg("start",int))),
6445 : command("str", "insert", STRinsert, false, "Insert a string into another", args(1,5, arg("",str),arg("s",str),arg("start",int),arg("l",int),arg("s2",str))),
6446 : command("str", "replace", STRreplace, false, "Insert a string into another", args(1,4, arg("",str),arg("s",str),arg("pat",str),arg("s2",str))),
6447 : command("str", "repeat", STRrepeat, false, "", args(1,3, arg("",str),arg("s2",str),arg("c",int))),
6448 : command("str", "space", STRspace, false, "", args(1,2, arg("",str),arg("l",int))),
6449 : command("str", "epilogue", STRepilogue, false, "", args(1,1, arg("",void))),
6450 : command("str", "asciify", STRasciify, false, "Transform string from UTF8 to ASCII", args(1, 2, arg("out",str), arg("in",str))),
6451 : pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("anti",bit))),
6452 : pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("caseignore",bit),arg("anti",bit))),
6453 : pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("anti",bit))),
6454 : pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("caseignore",bit),arg("anti",bit))),
6455 : pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("anti",bit))),
6456 : pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("caseignore",bit),arg("anti",bit))),
6457 : pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6458 : pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6459 : pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6460 : pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6461 : pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6462 : pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6463 : pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6464 : pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6465 : pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6466 : pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6467 : pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6468 : pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6469 : { .imp=NULL }
6470 : };
6471 : #include "mal_import.h"
6472 : #ifdef _MSC_VER
6473 : #undef read
6474 : #pragma section(".CRT$XCU",read)
6475 : #endif
6476 329 : LIB_STARTUP_FUNC(init_str_mal)
6477 329 : { mal_module2("str", NULL, str_init_funcs, STRprelude, NULL); }
|