Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * N.J. Nes, M.L. Kersten
15 : * The String Module
16 : * Strings can be created in many ways. Already in the built-in
17 : * operations each atom can be cast to a string using the str(atom)
18 : * mil command. The string module gives the possibility of
19 : * construction string as a substring of the a given string (s). There
20 : * are two such construction functions. The first is the substring
21 : * from some position (offset) until the end of the string. The second
22 : * start again on the given offset position but only copies count
23 : * number of bytes. The functions fail when the position and count
24 : * fall out of bounds. A negative position indicates that the position
25 : * is computed from the end of the source string.
26 : *
27 : * The strings can be compared using the "=" and "!=" operators.
28 : *
29 : * The operator "+" concatenates a string and an atom. The atom will
30 : * be converted to a string using the atom to string c function. The
31 : * string and the result of the conversion are concatenated to form a
32 : * new string. This string is returned.
33 : *
34 : * The length function returns the length of the string. The length is
35 : * the number of characters in the string. The maximum string length
36 : * handled by the kernel is 32-bits long.
37 : *
38 : * chrAt() returns the character at position index in the string
39 : * s. The function will fail when the index is out of range. The range
40 : * is from 0 to length(s)-1.
41 : *
42 : * The startsWith and endsWith functions test if the string s starts
43 : * with or ends with the given prefix or suffix.
44 : *
45 : * The toLower and toUpper functions cast the string to lower or upper
46 : * case characters.
47 : *
48 : * The search(str,chr) function searches for the first occurrence of a
49 : * character from the begining of the string. The search(chr,str)
50 : * searches for the last occurrence (or first from the end of the
51 : * string). The last search function locates the position of first
52 : * occurrence of the string s2 in string s. All search functions
53 : * return -1 when the search failed. Otherwise the position is
54 : * returned.
55 : *
56 : * All string functions fail when an incorrect string (NULL pointer)
57 : * is given. In the current implementation, a fail is signaled by
58 : * returning nil, since this facilitates the use of the string module
59 : * in bulk operations.
60 : *
61 : * All functions in the module have now been converted to
62 : * Unicode. Internally, we use UTF-8 to store strings as Unicode in
63 : * zero-terminated byte-sequences.
64 : */
65 : #include "monetdb_config.h"
66 : #include "str.h"
67 : #include <string.h>
68 : #ifdef HAVE_ICONV
69 : #include <iconv.h>
70 : #include <locale.h>
71 : #endif
72 : #ifdef HAVE_WCWIDTH
73 : #include <wchar.h>
74 : #endif
75 : #include "mal_interpreter.h"
76 :
77 : #include "utf8.h"
78 :
79 : /*
80 : * UTF-8 Handling
81 : * UTF-8 is a way to store Unicode strings in zero-terminated byte
82 : * sequences, which you can e.g. strcmp() with old 8-bit Latin-1
83 : * strcmp() functions and which then gives the same results as doing
84 : * the strcmp() on equivalent Latin-1 and ASCII character strings
85 : * stored in simple one-byte sequences. These characteristics make
86 : * UTF-8 an attractive format for upgrading an ASCII-oriented computer
87 : * program towards one that supports Unicode. That is why we use UTF-8
88 : * in MonetDB.
89 : *
90 : * For MonetDB, UTF-8 mostly has no consequences, as strings stored in
91 : * BATs are regarded as data, and it does not matter for the database
92 : * kernel whether the zero-terminated byte sequence it is processing
93 : * has UTF-8 or Latin-1 semantics. This module is the only place where
94 : * explicit string functionality is located. We {\bf do} have to adapt
95 : * the behavior of the length(), search(), substring() and the
96 : * like commands to the fact that one (Unicode) character is now
97 : * stored in a variable number of bytes (possibly > 1).
98 : *
99 : * One of the things that become more complex in Unicode are
100 : * uppercase/lowercase conversions. The below tables are the simple
101 : * one-to-one Unicode case mappings. We do not support the special
102 : * casing mappings (e.g. from one to two letters).
103 : *
104 : * References:
105 : * simple casing: http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
106 : * complex casing: http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt
107 : *
108 : * The Unicode case conversion implementation in MonetDB fills a
109 : * mapping BAT of int,int combinations, in which we perform
110 : * high-performance hash-lookup (all code inlined).
111 : */
112 :
113 : /* These tables were generated from the Unicode 13.0.0 spec. */
114 : static const struct UTF8_lower_upper {
115 : const unsigned int from, to;
116 : } UTF8_toUpper[] = { /* code points with non-null uppercase conversion */
117 : {0x0061, 0x0041,},
118 : {0x0062, 0x0042,},
119 : {0x0063, 0x0043,},
120 : {0x0064, 0x0044,},
121 : {0x0065, 0x0045,},
122 : {0x0066, 0x0046,},
123 : {0x0067, 0x0047,},
124 : {0x0068, 0x0048,},
125 : {0x0069, 0x0049,},
126 : {0x006A, 0x004A,},
127 : {0x006B, 0x004B,},
128 : {0x006C, 0x004C,},
129 : {0x006D, 0x004D,},
130 : {0x006E, 0x004E,},
131 : {0x006F, 0x004F,},
132 : {0x0070, 0x0050,},
133 : {0x0071, 0x0051,},
134 : {0x0072, 0x0052,},
135 : {0x0073, 0x0053,},
136 : {0x0074, 0x0054,},
137 : {0x0075, 0x0055,},
138 : {0x0076, 0x0056,},
139 : {0x0077, 0x0057,},
140 : {0x0078, 0x0058,},
141 : {0x0079, 0x0059,},
142 : {0x007A, 0x005A,},
143 : {0x00B5, 0x039C,},
144 : {0x00E0, 0x00C0,},
145 : {0x00E1, 0x00C1,},
146 : {0x00E2, 0x00C2,},
147 : {0x00E3, 0x00C3,},
148 : {0x00E4, 0x00C4,},
149 : {0x00E5, 0x00C5,},
150 : {0x00E6, 0x00C6,},
151 : {0x00E7, 0x00C7,},
152 : {0x00E8, 0x00C8,},
153 : {0x00E9, 0x00C9,},
154 : {0x00EA, 0x00CA,},
155 : {0x00EB, 0x00CB,},
156 : {0x00EC, 0x00CC,},
157 : {0x00ED, 0x00CD,},
158 : {0x00EE, 0x00CE,},
159 : {0x00EF, 0x00CF,},
160 : {0x00F0, 0x00D0,},
161 : {0x00F1, 0x00D1,},
162 : {0x00F2, 0x00D2,},
163 : {0x00F3, 0x00D3,},
164 : {0x00F4, 0x00D4,},
165 : {0x00F5, 0x00D5,},
166 : {0x00F6, 0x00D6,},
167 : {0x00F8, 0x00D8,},
168 : {0x00F9, 0x00D9,},
169 : {0x00FA, 0x00DA,},
170 : {0x00FB, 0x00DB,},
171 : {0x00FC, 0x00DC,},
172 : {0x00FD, 0x00DD,},
173 : {0x00FE, 0x00DE,},
174 : {0x00FF, 0x0178,},
175 : {0x0101, 0x0100,},
176 : {0x0103, 0x0102,},
177 : {0x0105, 0x0104,},
178 : {0x0107, 0x0106,},
179 : {0x0109, 0x0108,},
180 : {0x010B, 0x010A,},
181 : {0x010D, 0x010C,},
182 : {0x010F, 0x010E,},
183 : {0x0111, 0x0110,},
184 : {0x0113, 0x0112,},
185 : {0x0115, 0x0114,},
186 : {0x0117, 0x0116,},
187 : {0x0119, 0x0118,},
188 : {0x011B, 0x011A,},
189 : {0x011D, 0x011C,},
190 : {0x011F, 0x011E,},
191 : {0x0121, 0x0120,},
192 : {0x0123, 0x0122,},
193 : {0x0125, 0x0124,},
194 : {0x0127, 0x0126,},
195 : {0x0129, 0x0128,},
196 : {0x012B, 0x012A,},
197 : {0x012D, 0x012C,},
198 : {0x012F, 0x012E,},
199 : {0x0131, 0x0049,},
200 : {0x0133, 0x0132,},
201 : {0x0135, 0x0134,},
202 : {0x0137, 0x0136,},
203 : {0x013A, 0x0139,},
204 : {0x013C, 0x013B,},
205 : {0x013E, 0x013D,},
206 : {0x0140, 0x013F,},
207 : {0x0142, 0x0141,},
208 : {0x0144, 0x0143,},
209 : {0x0146, 0x0145,},
210 : {0x0148, 0x0147,},
211 : {0x014B, 0x014A,},
212 : {0x014D, 0x014C,},
213 : {0x014F, 0x014E,},
214 : {0x0151, 0x0150,},
215 : {0x0153, 0x0152,},
216 : {0x0155, 0x0154,},
217 : {0x0157, 0x0156,},
218 : {0x0159, 0x0158,},
219 : {0x015B, 0x015A,},
220 : {0x015D, 0x015C,},
221 : {0x015F, 0x015E,},
222 : {0x0161, 0x0160,},
223 : {0x0163, 0x0162,},
224 : {0x0165, 0x0164,},
225 : {0x0167, 0x0166,},
226 : {0x0169, 0x0168,},
227 : {0x016B, 0x016A,},
228 : {0x016D, 0x016C,},
229 : {0x016F, 0x016E,},
230 : {0x0171, 0x0170,},
231 : {0x0173, 0x0172,},
232 : {0x0175, 0x0174,},
233 : {0x0177, 0x0176,},
234 : {0x017A, 0x0179,},
235 : {0x017C, 0x017B,},
236 : {0x017E, 0x017D,},
237 : {0x017F, 0x0053,},
238 : {0x0180, 0x0243,},
239 : {0x0183, 0x0182,},
240 : {0x0185, 0x0184,},
241 : {0x0188, 0x0187,},
242 : {0x018C, 0x018B,},
243 : {0x0192, 0x0191,},
244 : {0x0195, 0x01F6,},
245 : {0x0199, 0x0198,},
246 : {0x019A, 0x023D,},
247 : {0x019E, 0x0220,},
248 : {0x01A1, 0x01A0,},
249 : {0x01A3, 0x01A2,},
250 : {0x01A5, 0x01A4,},
251 : {0x01A8, 0x01A7,},
252 : {0x01AD, 0x01AC,},
253 : {0x01B0, 0x01AF,},
254 : {0x01B4, 0x01B3,},
255 : {0x01B6, 0x01B5,},
256 : {0x01B9, 0x01B8,},
257 : {0x01BD, 0x01BC,},
258 : {0x01BF, 0x01F7,},
259 : {0x01C5, 0x01C4,},
260 : {0x01C6, 0x01C4,},
261 : {0x01C8, 0x01C7,},
262 : {0x01C9, 0x01C7,},
263 : {0x01CB, 0x01CA,},
264 : {0x01CC, 0x01CA,},
265 : {0x01CE, 0x01CD,},
266 : {0x01D0, 0x01CF,},
267 : {0x01D2, 0x01D1,},
268 : {0x01D4, 0x01D3,},
269 : {0x01D6, 0x01D5,},
270 : {0x01D8, 0x01D7,},
271 : {0x01DA, 0x01D9,},
272 : {0x01DC, 0x01DB,},
273 : {0x01DD, 0x018E,},
274 : {0x01DF, 0x01DE,},
275 : {0x01E1, 0x01E0,},
276 : {0x01E3, 0x01E2,},
277 : {0x01E5, 0x01E4,},
278 : {0x01E7, 0x01E6,},
279 : {0x01E9, 0x01E8,},
280 : {0x01EB, 0x01EA,},
281 : {0x01ED, 0x01EC,},
282 : {0x01EF, 0x01EE,},
283 : {0x01F2, 0x01F1,},
284 : {0x01F3, 0x01F1,},
285 : {0x01F5, 0x01F4,},
286 : {0x01F9, 0x01F8,},
287 : {0x01FB, 0x01FA,},
288 : {0x01FD, 0x01FC,},
289 : {0x01FF, 0x01FE,},
290 : {0x0201, 0x0200,},
291 : {0x0203, 0x0202,},
292 : {0x0205, 0x0204,},
293 : {0x0207, 0x0206,},
294 : {0x0209, 0x0208,},
295 : {0x020B, 0x020A,},
296 : {0x020D, 0x020C,},
297 : {0x020F, 0x020E,},
298 : {0x0211, 0x0210,},
299 : {0x0213, 0x0212,},
300 : {0x0215, 0x0214,},
301 : {0x0217, 0x0216,},
302 : {0x0219, 0x0218,},
303 : {0x021B, 0x021A,},
304 : {0x021D, 0x021C,},
305 : {0x021F, 0x021E,},
306 : {0x0223, 0x0222,},
307 : {0x0225, 0x0224,},
308 : {0x0227, 0x0226,},
309 : {0x0229, 0x0228,},
310 : {0x022B, 0x022A,},
311 : {0x022D, 0x022C,},
312 : {0x022F, 0x022E,},
313 : {0x0231, 0x0230,},
314 : {0x0233, 0x0232,},
315 : {0x023C, 0x023B,},
316 : {0x023F, 0x2C7E,},
317 : {0x0240, 0x2C7F,},
318 : {0x0242, 0x0241,},
319 : {0x0247, 0x0246,},
320 : {0x0249, 0x0248,},
321 : {0x024B, 0x024A,},
322 : {0x024D, 0x024C,},
323 : {0x024F, 0x024E,},
324 : {0x0250, 0x2C6F,},
325 : {0x0251, 0x2C6D,},
326 : {0x0252, 0x2C70,},
327 : {0x0253, 0x0181,},
328 : {0x0254, 0x0186,},
329 : {0x0256, 0x0189,},
330 : {0x0257, 0x018A,},
331 : {0x0259, 0x018F,},
332 : {0x025B, 0x0190,},
333 : {0x025C, 0xA7AB,},
334 : {0x0260, 0x0193,},
335 : {0x0261, 0xA7AC,},
336 : {0x0263, 0x0194,},
337 : {0x0265, 0xA78D,},
338 : {0x0266, 0xA7AA,},
339 : {0x0268, 0x0197,},
340 : {0x0269, 0x0196,},
341 : {0x026A, 0xA7AE,},
342 : {0x026B, 0x2C62,},
343 : {0x026C, 0xA7AD,},
344 : {0x026F, 0x019C,},
345 : {0x0271, 0x2C6E,},
346 : {0x0272, 0x019D,},
347 : {0x0275, 0x019F,},
348 : {0x027D, 0x2C64,},
349 : {0x0280, 0x01A6,},
350 : {0x0282, 0xA7C5,},
351 : {0x0283, 0x01A9,},
352 : {0x0287, 0xA7B1,},
353 : {0x0288, 0x01AE,},
354 : {0x0289, 0x0244,},
355 : {0x028A, 0x01B1,},
356 : {0x028B, 0x01B2,},
357 : {0x028C, 0x0245,},
358 : {0x0292, 0x01B7,},
359 : {0x029D, 0xA7B2,},
360 : {0x029E, 0xA7B0,},
361 : {0x0345, 0x0399,},
362 : {0x0371, 0x0370,},
363 : {0x0373, 0x0372,},
364 : {0x0377, 0x0376,},
365 : {0x037B, 0x03FD,},
366 : {0x037C, 0x03FE,},
367 : {0x037D, 0x03FF,},
368 : {0x03AC, 0x0386,},
369 : {0x03AD, 0x0388,},
370 : {0x03AE, 0x0389,},
371 : {0x03AF, 0x038A,},
372 : {0x03B1, 0x0391,},
373 : {0x03B2, 0x0392,},
374 : {0x03B3, 0x0393,},
375 : {0x03B4, 0x0394,},
376 : {0x03B5, 0x0395,},
377 : {0x03B6, 0x0396,},
378 : {0x03B7, 0x0397,},
379 : {0x03B8, 0x0398,},
380 : {0x03B9, 0x0399,},
381 : {0x03BA, 0x039A,},
382 : {0x03BB, 0x039B,},
383 : {0x03BC, 0x039C,},
384 : {0x03BD, 0x039D,},
385 : {0x03BE, 0x039E,},
386 : {0x03BF, 0x039F,},
387 : {0x03C0, 0x03A0,},
388 : {0x03C1, 0x03A1,},
389 : {0x03C2, 0x03A3,},
390 : {0x03C3, 0x03A3,},
391 : {0x03C4, 0x03A4,},
392 : {0x03C5, 0x03A5,},
393 : {0x03C6, 0x03A6,},
394 : {0x03C7, 0x03A7,},
395 : {0x03C8, 0x03A8,},
396 : {0x03C9, 0x03A9,},
397 : {0x03CA, 0x03AA,},
398 : {0x03CB, 0x03AB,},
399 : {0x03CC, 0x038C,},
400 : {0x03CD, 0x038E,},
401 : {0x03CE, 0x038F,},
402 : {0x03D0, 0x0392,},
403 : {0x03D1, 0x0398,},
404 : {0x03D5, 0x03A6,},
405 : {0x03D6, 0x03A0,},
406 : {0x03D7, 0x03CF,},
407 : {0x03D9, 0x03D8,},
408 : {0x03DB, 0x03DA,},
409 : {0x03DD, 0x03DC,},
410 : {0x03DF, 0x03DE,},
411 : {0x03E1, 0x03E0,},
412 : {0x03E3, 0x03E2,},
413 : {0x03E5, 0x03E4,},
414 : {0x03E7, 0x03E6,},
415 : {0x03E9, 0x03E8,},
416 : {0x03EB, 0x03EA,},
417 : {0x03ED, 0x03EC,},
418 : {0x03EF, 0x03EE,},
419 : {0x03F0, 0x039A,},
420 : {0x03F1, 0x03A1,},
421 : {0x03F2, 0x03F9,},
422 : {0x03F3, 0x037F,},
423 : {0x03F5, 0x0395,},
424 : {0x03F8, 0x03F7,},
425 : {0x03FB, 0x03FA,},
426 : {0x0430, 0x0410,},
427 : {0x0431, 0x0411,},
428 : {0x0432, 0x0412,},
429 : {0x0433, 0x0413,},
430 : {0x0434, 0x0414,},
431 : {0x0435, 0x0415,},
432 : {0x0436, 0x0416,},
433 : {0x0437, 0x0417,},
434 : {0x0438, 0x0418,},
435 : {0x0439, 0x0419,},
436 : {0x043A, 0x041A,},
437 : {0x043B, 0x041B,},
438 : {0x043C, 0x041C,},
439 : {0x043D, 0x041D,},
440 : {0x043E, 0x041E,},
441 : {0x043F, 0x041F,},
442 : {0x0440, 0x0420,},
443 : {0x0441, 0x0421,},
444 : {0x0442, 0x0422,},
445 : {0x0443, 0x0423,},
446 : {0x0444, 0x0424,},
447 : {0x0445, 0x0425,},
448 : {0x0446, 0x0426,},
449 : {0x0447, 0x0427,},
450 : {0x0448, 0x0428,},
451 : {0x0449, 0x0429,},
452 : {0x044A, 0x042A,},
453 : {0x044B, 0x042B,},
454 : {0x044C, 0x042C,},
455 : {0x044D, 0x042D,},
456 : {0x044E, 0x042E,},
457 : {0x044F, 0x042F,},
458 : {0x0450, 0x0400,},
459 : {0x0451, 0x0401,},
460 : {0x0452, 0x0402,},
461 : {0x0453, 0x0403,},
462 : {0x0454, 0x0404,},
463 : {0x0455, 0x0405,},
464 : {0x0456, 0x0406,},
465 : {0x0457, 0x0407,},
466 : {0x0458, 0x0408,},
467 : {0x0459, 0x0409,},
468 : {0x045A, 0x040A,},
469 : {0x045B, 0x040B,},
470 : {0x045C, 0x040C,},
471 : {0x045D, 0x040D,},
472 : {0x045E, 0x040E,},
473 : {0x045F, 0x040F,},
474 : {0x0461, 0x0460,},
475 : {0x0463, 0x0462,},
476 : {0x0465, 0x0464,},
477 : {0x0467, 0x0466,},
478 : {0x0469, 0x0468,},
479 : {0x046B, 0x046A,},
480 : {0x046D, 0x046C,},
481 : {0x046F, 0x046E,},
482 : {0x0471, 0x0470,},
483 : {0x0473, 0x0472,},
484 : {0x0475, 0x0474,},
485 : {0x0477, 0x0476,},
486 : {0x0479, 0x0478,},
487 : {0x047B, 0x047A,},
488 : {0x047D, 0x047C,},
489 : {0x047F, 0x047E,},
490 : {0x0481, 0x0480,},
491 : {0x048B, 0x048A,},
492 : {0x048D, 0x048C,},
493 : {0x048F, 0x048E,},
494 : {0x0491, 0x0490,},
495 : {0x0493, 0x0492,},
496 : {0x0495, 0x0494,},
497 : {0x0497, 0x0496,},
498 : {0x0499, 0x0498,},
499 : {0x049B, 0x049A,},
500 : {0x049D, 0x049C,},
501 : {0x049F, 0x049E,},
502 : {0x04A1, 0x04A0,},
503 : {0x04A3, 0x04A2,},
504 : {0x04A5, 0x04A4,},
505 : {0x04A7, 0x04A6,},
506 : {0x04A9, 0x04A8,},
507 : {0x04AB, 0x04AA,},
508 : {0x04AD, 0x04AC,},
509 : {0x04AF, 0x04AE,},
510 : {0x04B1, 0x04B0,},
511 : {0x04B3, 0x04B2,},
512 : {0x04B5, 0x04B4,},
513 : {0x04B7, 0x04B6,},
514 : {0x04B9, 0x04B8,},
515 : {0x04BB, 0x04BA,},
516 : {0x04BD, 0x04BC,},
517 : {0x04BF, 0x04BE,},
518 : {0x04C2, 0x04C1,},
519 : {0x04C4, 0x04C3,},
520 : {0x04C6, 0x04C5,},
521 : {0x04C8, 0x04C7,},
522 : {0x04CA, 0x04C9,},
523 : {0x04CC, 0x04CB,},
524 : {0x04CE, 0x04CD,},
525 : {0x04CF, 0x04C0,},
526 : {0x04D1, 0x04D0,},
527 : {0x04D3, 0x04D2,},
528 : {0x04D5, 0x04D4,},
529 : {0x04D7, 0x04D6,},
530 : {0x04D9, 0x04D8,},
531 : {0x04DB, 0x04DA,},
532 : {0x04DD, 0x04DC,},
533 : {0x04DF, 0x04DE,},
534 : {0x04E1, 0x04E0,},
535 : {0x04E3, 0x04E2,},
536 : {0x04E5, 0x04E4,},
537 : {0x04E7, 0x04E6,},
538 : {0x04E9, 0x04E8,},
539 : {0x04EB, 0x04EA,},
540 : {0x04ED, 0x04EC,},
541 : {0x04EF, 0x04EE,},
542 : {0x04F1, 0x04F0,},
543 : {0x04F3, 0x04F2,},
544 : {0x04F5, 0x04F4,},
545 : {0x04F7, 0x04F6,},
546 : {0x04F9, 0x04F8,},
547 : {0x04FB, 0x04FA,},
548 : {0x04FD, 0x04FC,},
549 : {0x04FF, 0x04FE,},
550 : {0x0501, 0x0500,},
551 : {0x0503, 0x0502,},
552 : {0x0505, 0x0504,},
553 : {0x0507, 0x0506,},
554 : {0x0509, 0x0508,},
555 : {0x050B, 0x050A,},
556 : {0x050D, 0x050C,},
557 : {0x050F, 0x050E,},
558 : {0x0511, 0x0510,},
559 : {0x0513, 0x0512,},
560 : {0x0515, 0x0514,},
561 : {0x0517, 0x0516,},
562 : {0x0519, 0x0518,},
563 : {0x051B, 0x051A,},
564 : {0x051D, 0x051C,},
565 : {0x051F, 0x051E,},
566 : {0x0521, 0x0520,},
567 : {0x0523, 0x0522,},
568 : {0x0525, 0x0524,},
569 : {0x0527, 0x0526,},
570 : {0x0529, 0x0528,},
571 : {0x052B, 0x052A,},
572 : {0x052D, 0x052C,},
573 : {0x052F, 0x052E,},
574 : {0x0561, 0x0531,},
575 : {0x0562, 0x0532,},
576 : {0x0563, 0x0533,},
577 : {0x0564, 0x0534,},
578 : {0x0565, 0x0535,},
579 : {0x0566, 0x0536,},
580 : {0x0567, 0x0537,},
581 : {0x0568, 0x0538,},
582 : {0x0569, 0x0539,},
583 : {0x056A, 0x053A,},
584 : {0x056B, 0x053B,},
585 : {0x056C, 0x053C,},
586 : {0x056D, 0x053D,},
587 : {0x056E, 0x053E,},
588 : {0x056F, 0x053F,},
589 : {0x0570, 0x0540,},
590 : {0x0571, 0x0541,},
591 : {0x0572, 0x0542,},
592 : {0x0573, 0x0543,},
593 : {0x0574, 0x0544,},
594 : {0x0575, 0x0545,},
595 : {0x0576, 0x0546,},
596 : {0x0577, 0x0547,},
597 : {0x0578, 0x0548,},
598 : {0x0579, 0x0549,},
599 : {0x057A, 0x054A,},
600 : {0x057B, 0x054B,},
601 : {0x057C, 0x054C,},
602 : {0x057D, 0x054D,},
603 : {0x057E, 0x054E,},
604 : {0x057F, 0x054F,},
605 : {0x0580, 0x0550,},
606 : {0x0581, 0x0551,},
607 : {0x0582, 0x0552,},
608 : {0x0583, 0x0553,},
609 : {0x0584, 0x0554,},
610 : {0x0585, 0x0555,},
611 : {0x0586, 0x0556,},
612 : {0x10D0, 0x1C90,},
613 : {0x10D1, 0x1C91,},
614 : {0x10D2, 0x1C92,},
615 : {0x10D3, 0x1C93,},
616 : {0x10D4, 0x1C94,},
617 : {0x10D5, 0x1C95,},
618 : {0x10D6, 0x1C96,},
619 : {0x10D7, 0x1C97,},
620 : {0x10D8, 0x1C98,},
621 : {0x10D9, 0x1C99,},
622 : {0x10DA, 0x1C9A,},
623 : {0x10DB, 0x1C9B,},
624 : {0x10DC, 0x1C9C,},
625 : {0x10DD, 0x1C9D,},
626 : {0x10DE, 0x1C9E,},
627 : {0x10DF, 0x1C9F,},
628 : {0x10E0, 0x1CA0,},
629 : {0x10E1, 0x1CA1,},
630 : {0x10E2, 0x1CA2,},
631 : {0x10E3, 0x1CA3,},
632 : {0x10E4, 0x1CA4,},
633 : {0x10E5, 0x1CA5,},
634 : {0x10E6, 0x1CA6,},
635 : {0x10E7, 0x1CA7,},
636 : {0x10E8, 0x1CA8,},
637 : {0x10E9, 0x1CA9,},
638 : {0x10EA, 0x1CAA,},
639 : {0x10EB, 0x1CAB,},
640 : {0x10EC, 0x1CAC,},
641 : {0x10ED, 0x1CAD,},
642 : {0x10EE, 0x1CAE,},
643 : {0x10EF, 0x1CAF,},
644 : {0x10F0, 0x1CB0,},
645 : {0x10F1, 0x1CB1,},
646 : {0x10F2, 0x1CB2,},
647 : {0x10F3, 0x1CB3,},
648 : {0x10F4, 0x1CB4,},
649 : {0x10F5, 0x1CB5,},
650 : {0x10F6, 0x1CB6,},
651 : {0x10F7, 0x1CB7,},
652 : {0x10F8, 0x1CB8,},
653 : {0x10F9, 0x1CB9,},
654 : {0x10FA, 0x1CBA,},
655 : {0x10FD, 0x1CBD,},
656 : {0x10FE, 0x1CBE,},
657 : {0x10FF, 0x1CBF,},
658 : {0x13F8, 0x13F0,},
659 : {0x13F9, 0x13F1,},
660 : {0x13FA, 0x13F2,},
661 : {0x13FB, 0x13F3,},
662 : {0x13FC, 0x13F4,},
663 : {0x13FD, 0x13F5,},
664 : {0x1C80, 0x0412,},
665 : {0x1C81, 0x0414,},
666 : {0x1C82, 0x041E,},
667 : {0x1C83, 0x0421,},
668 : {0x1C84, 0x0422,},
669 : {0x1C85, 0x0422,},
670 : {0x1C86, 0x042A,},
671 : {0x1C87, 0x0462,},
672 : {0x1C88, 0xA64A,},
673 : {0x1D79, 0xA77D,},
674 : {0x1D7D, 0x2C63,},
675 : {0x1D8E, 0xA7C6,},
676 : {0x1E01, 0x1E00,},
677 : {0x1E03, 0x1E02,},
678 : {0x1E05, 0x1E04,},
679 : {0x1E07, 0x1E06,},
680 : {0x1E09, 0x1E08,},
681 : {0x1E0B, 0x1E0A,},
682 : {0x1E0D, 0x1E0C,},
683 : {0x1E0F, 0x1E0E,},
684 : {0x1E11, 0x1E10,},
685 : {0x1E13, 0x1E12,},
686 : {0x1E15, 0x1E14,},
687 : {0x1E17, 0x1E16,},
688 : {0x1E19, 0x1E18,},
689 : {0x1E1B, 0x1E1A,},
690 : {0x1E1D, 0x1E1C,},
691 : {0x1E1F, 0x1E1E,},
692 : {0x1E21, 0x1E20,},
693 : {0x1E23, 0x1E22,},
694 : {0x1E25, 0x1E24,},
695 : {0x1E27, 0x1E26,},
696 : {0x1E29, 0x1E28,},
697 : {0x1E2B, 0x1E2A,},
698 : {0x1E2D, 0x1E2C,},
699 : {0x1E2F, 0x1E2E,},
700 : {0x1E31, 0x1E30,},
701 : {0x1E33, 0x1E32,},
702 : {0x1E35, 0x1E34,},
703 : {0x1E37, 0x1E36,},
704 : {0x1E39, 0x1E38,},
705 : {0x1E3B, 0x1E3A,},
706 : {0x1E3D, 0x1E3C,},
707 : {0x1E3F, 0x1E3E,},
708 : {0x1E41, 0x1E40,},
709 : {0x1E43, 0x1E42,},
710 : {0x1E45, 0x1E44,},
711 : {0x1E47, 0x1E46,},
712 : {0x1E49, 0x1E48,},
713 : {0x1E4B, 0x1E4A,},
714 : {0x1E4D, 0x1E4C,},
715 : {0x1E4F, 0x1E4E,},
716 : {0x1E51, 0x1E50,},
717 : {0x1E53, 0x1E52,},
718 : {0x1E55, 0x1E54,},
719 : {0x1E57, 0x1E56,},
720 : {0x1E59, 0x1E58,},
721 : {0x1E5B, 0x1E5A,},
722 : {0x1E5D, 0x1E5C,},
723 : {0x1E5F, 0x1E5E,},
724 : {0x1E61, 0x1E60,},
725 : {0x1E63, 0x1E62,},
726 : {0x1E65, 0x1E64,},
727 : {0x1E67, 0x1E66,},
728 : {0x1E69, 0x1E68,},
729 : {0x1E6B, 0x1E6A,},
730 : {0x1E6D, 0x1E6C,},
731 : {0x1E6F, 0x1E6E,},
732 : {0x1E71, 0x1E70,},
733 : {0x1E73, 0x1E72,},
734 : {0x1E75, 0x1E74,},
735 : {0x1E77, 0x1E76,},
736 : {0x1E79, 0x1E78,},
737 : {0x1E7B, 0x1E7A,},
738 : {0x1E7D, 0x1E7C,},
739 : {0x1E7F, 0x1E7E,},
740 : {0x1E81, 0x1E80,},
741 : {0x1E83, 0x1E82,},
742 : {0x1E85, 0x1E84,},
743 : {0x1E87, 0x1E86,},
744 : {0x1E89, 0x1E88,},
745 : {0x1E8B, 0x1E8A,},
746 : {0x1E8D, 0x1E8C,},
747 : {0x1E8F, 0x1E8E,},
748 : {0x1E91, 0x1E90,},
749 : {0x1E93, 0x1E92,},
750 : {0x1E95, 0x1E94,},
751 : {0x1E9B, 0x1E60,},
752 : {0x1EA1, 0x1EA0,},
753 : {0x1EA3, 0x1EA2,},
754 : {0x1EA5, 0x1EA4,},
755 : {0x1EA7, 0x1EA6,},
756 : {0x1EA9, 0x1EA8,},
757 : {0x1EAB, 0x1EAA,},
758 : {0x1EAD, 0x1EAC,},
759 : {0x1EAF, 0x1EAE,},
760 : {0x1EB1, 0x1EB0,},
761 : {0x1EB3, 0x1EB2,},
762 : {0x1EB5, 0x1EB4,},
763 : {0x1EB7, 0x1EB6,},
764 : {0x1EB9, 0x1EB8,},
765 : {0x1EBB, 0x1EBA,},
766 : {0x1EBD, 0x1EBC,},
767 : {0x1EBF, 0x1EBE,},
768 : {0x1EC1, 0x1EC0,},
769 : {0x1EC3, 0x1EC2,},
770 : {0x1EC5, 0x1EC4,},
771 : {0x1EC7, 0x1EC6,},
772 : {0x1EC9, 0x1EC8,},
773 : {0x1ECB, 0x1ECA,},
774 : {0x1ECD, 0x1ECC,},
775 : {0x1ECF, 0x1ECE,},
776 : {0x1ED1, 0x1ED0,},
777 : {0x1ED3, 0x1ED2,},
778 : {0x1ED5, 0x1ED4,},
779 : {0x1ED7, 0x1ED6,},
780 : {0x1ED9, 0x1ED8,},
781 : {0x1EDB, 0x1EDA,},
782 : {0x1EDD, 0x1EDC,},
783 : {0x1EDF, 0x1EDE,},
784 : {0x1EE1, 0x1EE0,},
785 : {0x1EE3, 0x1EE2,},
786 : {0x1EE5, 0x1EE4,},
787 : {0x1EE7, 0x1EE6,},
788 : {0x1EE9, 0x1EE8,},
789 : {0x1EEB, 0x1EEA,},
790 : {0x1EED, 0x1EEC,},
791 : {0x1EEF, 0x1EEE,},
792 : {0x1EF1, 0x1EF0,},
793 : {0x1EF3, 0x1EF2,},
794 : {0x1EF5, 0x1EF4,},
795 : {0x1EF7, 0x1EF6,},
796 : {0x1EF9, 0x1EF8,},
797 : {0x1EFB, 0x1EFA,},
798 : {0x1EFD, 0x1EFC,},
799 : {0x1EFF, 0x1EFE,},
800 : {0x1F00, 0x1F08,},
801 : {0x1F01, 0x1F09,},
802 : {0x1F02, 0x1F0A,},
803 : {0x1F03, 0x1F0B,},
804 : {0x1F04, 0x1F0C,},
805 : {0x1F05, 0x1F0D,},
806 : {0x1F06, 0x1F0E,},
807 : {0x1F07, 0x1F0F,},
808 : {0x1F10, 0x1F18,},
809 : {0x1F11, 0x1F19,},
810 : {0x1F12, 0x1F1A,},
811 : {0x1F13, 0x1F1B,},
812 : {0x1F14, 0x1F1C,},
813 : {0x1F15, 0x1F1D,},
814 : {0x1F20, 0x1F28,},
815 : {0x1F21, 0x1F29,},
816 : {0x1F22, 0x1F2A,},
817 : {0x1F23, 0x1F2B,},
818 : {0x1F24, 0x1F2C,},
819 : {0x1F25, 0x1F2D,},
820 : {0x1F26, 0x1F2E,},
821 : {0x1F27, 0x1F2F,},
822 : {0x1F30, 0x1F38,},
823 : {0x1F31, 0x1F39,},
824 : {0x1F32, 0x1F3A,},
825 : {0x1F33, 0x1F3B,},
826 : {0x1F34, 0x1F3C,},
827 : {0x1F35, 0x1F3D,},
828 : {0x1F36, 0x1F3E,},
829 : {0x1F37, 0x1F3F,},
830 : {0x1F40, 0x1F48,},
831 : {0x1F41, 0x1F49,},
832 : {0x1F42, 0x1F4A,},
833 : {0x1F43, 0x1F4B,},
834 : {0x1F44, 0x1F4C,},
835 : {0x1F45, 0x1F4D,},
836 : {0x1F51, 0x1F59,},
837 : {0x1F53, 0x1F5B,},
838 : {0x1F55, 0x1F5D,},
839 : {0x1F57, 0x1F5F,},
840 : {0x1F60, 0x1F68,},
841 : {0x1F61, 0x1F69,},
842 : {0x1F62, 0x1F6A,},
843 : {0x1F63, 0x1F6B,},
844 : {0x1F64, 0x1F6C,},
845 : {0x1F65, 0x1F6D,},
846 : {0x1F66, 0x1F6E,},
847 : {0x1F67, 0x1F6F,},
848 : {0x1F70, 0x1FBA,},
849 : {0x1F71, 0x1FBB,},
850 : {0x1F72, 0x1FC8,},
851 : {0x1F73, 0x1FC9,},
852 : {0x1F74, 0x1FCA,},
853 : {0x1F75, 0x1FCB,},
854 : {0x1F76, 0x1FDA,},
855 : {0x1F77, 0x1FDB,},
856 : {0x1F78, 0x1FF8,},
857 : {0x1F79, 0x1FF9,},
858 : {0x1F7A, 0x1FEA,},
859 : {0x1F7B, 0x1FEB,},
860 : {0x1F7C, 0x1FFA,},
861 : {0x1F7D, 0x1FFB,},
862 : {0x1F80, 0x1F88,},
863 : {0x1F81, 0x1F89,},
864 : {0x1F82, 0x1F8A,},
865 : {0x1F83, 0x1F8B,},
866 : {0x1F84, 0x1F8C,},
867 : {0x1F85, 0x1F8D,},
868 : {0x1F86, 0x1F8E,},
869 : {0x1F87, 0x1F8F,},
870 : {0x1F90, 0x1F98,},
871 : {0x1F91, 0x1F99,},
872 : {0x1F92, 0x1F9A,},
873 : {0x1F93, 0x1F9B,},
874 : {0x1F94, 0x1F9C,},
875 : {0x1F95, 0x1F9D,},
876 : {0x1F96, 0x1F9E,},
877 : {0x1F97, 0x1F9F,},
878 : {0x1FA0, 0x1FA8,},
879 : {0x1FA1, 0x1FA9,},
880 : {0x1FA2, 0x1FAA,},
881 : {0x1FA3, 0x1FAB,},
882 : {0x1FA4, 0x1FAC,},
883 : {0x1FA5, 0x1FAD,},
884 : {0x1FA6, 0x1FAE,},
885 : {0x1FA7, 0x1FAF,},
886 : {0x1FB0, 0x1FB8,},
887 : {0x1FB1, 0x1FB9,},
888 : {0x1FB3, 0x1FBC,},
889 : {0x1FBE, 0x0399,},
890 : {0x1FC3, 0x1FCC,},
891 : {0x1FD0, 0x1FD8,},
892 : {0x1FD1, 0x1FD9,},
893 : {0x1FE0, 0x1FE8,},
894 : {0x1FE1, 0x1FE9,},
895 : {0x1FE5, 0x1FEC,},
896 : {0x1FF3, 0x1FFC,},
897 : {0x214E, 0x2132,},
898 : {0x2170, 0x2160,},
899 : {0x2171, 0x2161,},
900 : {0x2172, 0x2162,},
901 : {0x2173, 0x2163,},
902 : {0x2174, 0x2164,},
903 : {0x2175, 0x2165,},
904 : {0x2176, 0x2166,},
905 : {0x2177, 0x2167,},
906 : {0x2178, 0x2168,},
907 : {0x2179, 0x2169,},
908 : {0x217A, 0x216A,},
909 : {0x217B, 0x216B,},
910 : {0x217C, 0x216C,},
911 : {0x217D, 0x216D,},
912 : {0x217E, 0x216E,},
913 : {0x217F, 0x216F,},
914 : {0x2184, 0x2183,},
915 : {0x24D0, 0x24B6,},
916 : {0x24D1, 0x24B7,},
917 : {0x24D2, 0x24B8,},
918 : {0x24D3, 0x24B9,},
919 : {0x24D4, 0x24BA,},
920 : {0x24D5, 0x24BB,},
921 : {0x24D6, 0x24BC,},
922 : {0x24D7, 0x24BD,},
923 : {0x24D8, 0x24BE,},
924 : {0x24D9, 0x24BF,},
925 : {0x24DA, 0x24C0,},
926 : {0x24DB, 0x24C1,},
927 : {0x24DC, 0x24C2,},
928 : {0x24DD, 0x24C3,},
929 : {0x24DE, 0x24C4,},
930 : {0x24DF, 0x24C5,},
931 : {0x24E0, 0x24C6,},
932 : {0x24E1, 0x24C7,},
933 : {0x24E2, 0x24C8,},
934 : {0x24E3, 0x24C9,},
935 : {0x24E4, 0x24CA,},
936 : {0x24E5, 0x24CB,},
937 : {0x24E6, 0x24CC,},
938 : {0x24E7, 0x24CD,},
939 : {0x24E8, 0x24CE,},
940 : {0x24E9, 0x24CF,},
941 : {0x2C30, 0x2C00,},
942 : {0x2C31, 0x2C01,},
943 : {0x2C32, 0x2C02,},
944 : {0x2C33, 0x2C03,},
945 : {0x2C34, 0x2C04,},
946 : {0x2C35, 0x2C05,},
947 : {0x2C36, 0x2C06,},
948 : {0x2C37, 0x2C07,},
949 : {0x2C38, 0x2C08,},
950 : {0x2C39, 0x2C09,},
951 : {0x2C3A, 0x2C0A,},
952 : {0x2C3B, 0x2C0B,},
953 : {0x2C3C, 0x2C0C,},
954 : {0x2C3D, 0x2C0D,},
955 : {0x2C3E, 0x2C0E,},
956 : {0x2C3F, 0x2C0F,},
957 : {0x2C40, 0x2C10,},
958 : {0x2C41, 0x2C11,},
959 : {0x2C42, 0x2C12,},
960 : {0x2C43, 0x2C13,},
961 : {0x2C44, 0x2C14,},
962 : {0x2C45, 0x2C15,},
963 : {0x2C46, 0x2C16,},
964 : {0x2C47, 0x2C17,},
965 : {0x2C48, 0x2C18,},
966 : {0x2C49, 0x2C19,},
967 : {0x2C4A, 0x2C1A,},
968 : {0x2C4B, 0x2C1B,},
969 : {0x2C4C, 0x2C1C,},
970 : {0x2C4D, 0x2C1D,},
971 : {0x2C4E, 0x2C1E,},
972 : {0x2C4F, 0x2C1F,},
973 : {0x2C50, 0x2C20,},
974 : {0x2C51, 0x2C21,},
975 : {0x2C52, 0x2C22,},
976 : {0x2C53, 0x2C23,},
977 : {0x2C54, 0x2C24,},
978 : {0x2C55, 0x2C25,},
979 : {0x2C56, 0x2C26,},
980 : {0x2C57, 0x2C27,},
981 : {0x2C58, 0x2C28,},
982 : {0x2C59, 0x2C29,},
983 : {0x2C5A, 0x2C2A,},
984 : {0x2C5B, 0x2C2B,},
985 : {0x2C5C, 0x2C2C,},
986 : {0x2C5D, 0x2C2D,},
987 : {0x2C5E, 0x2C2E,},
988 : {0x2C5F, 0x2C2F,},
989 : {0x2C61, 0x2C60,},
990 : {0x2C65, 0x023A,},
991 : {0x2C66, 0x023E,},
992 : {0x2C68, 0x2C67,},
993 : {0x2C6A, 0x2C69,},
994 : {0x2C6C, 0x2C6B,},
995 : {0x2C73, 0x2C72,},
996 : {0x2C76, 0x2C75,},
997 : {0x2C81, 0x2C80,},
998 : {0x2C83, 0x2C82,},
999 : {0x2C85, 0x2C84,},
1000 : {0x2C87, 0x2C86,},
1001 : {0x2C89, 0x2C88,},
1002 : {0x2C8B, 0x2C8A,},
1003 : {0x2C8D, 0x2C8C,},
1004 : {0x2C8F, 0x2C8E,},
1005 : {0x2C91, 0x2C90,},
1006 : {0x2C93, 0x2C92,},
1007 : {0x2C95, 0x2C94,},
1008 : {0x2C97, 0x2C96,},
1009 : {0x2C99, 0x2C98,},
1010 : {0x2C9B, 0x2C9A,},
1011 : {0x2C9D, 0x2C9C,},
1012 : {0x2C9F, 0x2C9E,},
1013 : {0x2CA1, 0x2CA0,},
1014 : {0x2CA3, 0x2CA2,},
1015 : {0x2CA5, 0x2CA4,},
1016 : {0x2CA7, 0x2CA6,},
1017 : {0x2CA9, 0x2CA8,},
1018 : {0x2CAB, 0x2CAA,},
1019 : {0x2CAD, 0x2CAC,},
1020 : {0x2CAF, 0x2CAE,},
1021 : {0x2CB1, 0x2CB0,},
1022 : {0x2CB3, 0x2CB2,},
1023 : {0x2CB5, 0x2CB4,},
1024 : {0x2CB7, 0x2CB6,},
1025 : {0x2CB9, 0x2CB8,},
1026 : {0x2CBB, 0x2CBA,},
1027 : {0x2CBD, 0x2CBC,},
1028 : {0x2CBF, 0x2CBE,},
1029 : {0x2CC1, 0x2CC0,},
1030 : {0x2CC3, 0x2CC2,},
1031 : {0x2CC5, 0x2CC4,},
1032 : {0x2CC7, 0x2CC6,},
1033 : {0x2CC9, 0x2CC8,},
1034 : {0x2CCB, 0x2CCA,},
1035 : {0x2CCD, 0x2CCC,},
1036 : {0x2CCF, 0x2CCE,},
1037 : {0x2CD1, 0x2CD0,},
1038 : {0x2CD3, 0x2CD2,},
1039 : {0x2CD5, 0x2CD4,},
1040 : {0x2CD7, 0x2CD6,},
1041 : {0x2CD9, 0x2CD8,},
1042 : {0x2CDB, 0x2CDA,},
1043 : {0x2CDD, 0x2CDC,},
1044 : {0x2CDF, 0x2CDE,},
1045 : {0x2CE1, 0x2CE0,},
1046 : {0x2CE3, 0x2CE2,},
1047 : {0x2CEC, 0x2CEB,},
1048 : {0x2CEE, 0x2CED,},
1049 : {0x2CF3, 0x2CF2,},
1050 : {0x2D00, 0x10A0,},
1051 : {0x2D01, 0x10A1,},
1052 : {0x2D02, 0x10A2,},
1053 : {0x2D03, 0x10A3,},
1054 : {0x2D04, 0x10A4,},
1055 : {0x2D05, 0x10A5,},
1056 : {0x2D06, 0x10A6,},
1057 : {0x2D07, 0x10A7,},
1058 : {0x2D08, 0x10A8,},
1059 : {0x2D09, 0x10A9,},
1060 : {0x2D0A, 0x10AA,},
1061 : {0x2D0B, 0x10AB,},
1062 : {0x2D0C, 0x10AC,},
1063 : {0x2D0D, 0x10AD,},
1064 : {0x2D0E, 0x10AE,},
1065 : {0x2D0F, 0x10AF,},
1066 : {0x2D10, 0x10B0,},
1067 : {0x2D11, 0x10B1,},
1068 : {0x2D12, 0x10B2,},
1069 : {0x2D13, 0x10B3,},
1070 : {0x2D14, 0x10B4,},
1071 : {0x2D15, 0x10B5,},
1072 : {0x2D16, 0x10B6,},
1073 : {0x2D17, 0x10B7,},
1074 : {0x2D18, 0x10B8,},
1075 : {0x2D19, 0x10B9,},
1076 : {0x2D1A, 0x10BA,},
1077 : {0x2D1B, 0x10BB,},
1078 : {0x2D1C, 0x10BC,},
1079 : {0x2D1D, 0x10BD,},
1080 : {0x2D1E, 0x10BE,},
1081 : {0x2D1F, 0x10BF,},
1082 : {0x2D20, 0x10C0,},
1083 : {0x2D21, 0x10C1,},
1084 : {0x2D22, 0x10C2,},
1085 : {0x2D23, 0x10C3,},
1086 : {0x2D24, 0x10C4,},
1087 : {0x2D25, 0x10C5,},
1088 : {0x2D27, 0x10C7,},
1089 : {0x2D2D, 0x10CD,},
1090 : {0xA641, 0xA640,},
1091 : {0xA643, 0xA642,},
1092 : {0xA645, 0xA644,},
1093 : {0xA647, 0xA646,},
1094 : {0xA649, 0xA648,},
1095 : {0xA64B, 0xA64A,},
1096 : {0xA64D, 0xA64C,},
1097 : {0xA64F, 0xA64E,},
1098 : {0xA651, 0xA650,},
1099 : {0xA653, 0xA652,},
1100 : {0xA655, 0xA654,},
1101 : {0xA657, 0xA656,},
1102 : {0xA659, 0xA658,},
1103 : {0xA65B, 0xA65A,},
1104 : {0xA65D, 0xA65C,},
1105 : {0xA65F, 0xA65E,},
1106 : {0xA661, 0xA660,},
1107 : {0xA663, 0xA662,},
1108 : {0xA665, 0xA664,},
1109 : {0xA667, 0xA666,},
1110 : {0xA669, 0xA668,},
1111 : {0xA66B, 0xA66A,},
1112 : {0xA66D, 0xA66C,},
1113 : {0xA681, 0xA680,},
1114 : {0xA683, 0xA682,},
1115 : {0xA685, 0xA684,},
1116 : {0xA687, 0xA686,},
1117 : {0xA689, 0xA688,},
1118 : {0xA68B, 0xA68A,},
1119 : {0xA68D, 0xA68C,},
1120 : {0xA68F, 0xA68E,},
1121 : {0xA691, 0xA690,},
1122 : {0xA693, 0xA692,},
1123 : {0xA695, 0xA694,},
1124 : {0xA697, 0xA696,},
1125 : {0xA699, 0xA698,},
1126 : {0xA69B, 0xA69A,},
1127 : {0xA723, 0xA722,},
1128 : {0xA725, 0xA724,},
1129 : {0xA727, 0xA726,},
1130 : {0xA729, 0xA728,},
1131 : {0xA72B, 0xA72A,},
1132 : {0xA72D, 0xA72C,},
1133 : {0xA72F, 0xA72E,},
1134 : {0xA733, 0xA732,},
1135 : {0xA735, 0xA734,},
1136 : {0xA737, 0xA736,},
1137 : {0xA739, 0xA738,},
1138 : {0xA73B, 0xA73A,},
1139 : {0xA73D, 0xA73C,},
1140 : {0xA73F, 0xA73E,},
1141 : {0xA741, 0xA740,},
1142 : {0xA743, 0xA742,},
1143 : {0xA745, 0xA744,},
1144 : {0xA747, 0xA746,},
1145 : {0xA749, 0xA748,},
1146 : {0xA74B, 0xA74A,},
1147 : {0xA74D, 0xA74C,},
1148 : {0xA74F, 0xA74E,},
1149 : {0xA751, 0xA750,},
1150 : {0xA753, 0xA752,},
1151 : {0xA755, 0xA754,},
1152 : {0xA757, 0xA756,},
1153 : {0xA759, 0xA758,},
1154 : {0xA75B, 0xA75A,},
1155 : {0xA75D, 0xA75C,},
1156 : {0xA75F, 0xA75E,},
1157 : {0xA761, 0xA760,},
1158 : {0xA763, 0xA762,},
1159 : {0xA765, 0xA764,},
1160 : {0xA767, 0xA766,},
1161 : {0xA769, 0xA768,},
1162 : {0xA76B, 0xA76A,},
1163 : {0xA76D, 0xA76C,},
1164 : {0xA76F, 0xA76E,},
1165 : {0xA77A, 0xA779,},
1166 : {0xA77C, 0xA77B,},
1167 : {0xA77F, 0xA77E,},
1168 : {0xA781, 0xA780,},
1169 : {0xA783, 0xA782,},
1170 : {0xA785, 0xA784,},
1171 : {0xA787, 0xA786,},
1172 : {0xA78C, 0xA78B,},
1173 : {0xA791, 0xA790,},
1174 : {0xA793, 0xA792,},
1175 : {0xA794, 0xA7C4,},
1176 : {0xA797, 0xA796,},
1177 : {0xA799, 0xA798,},
1178 : {0xA79B, 0xA79A,},
1179 : {0xA79D, 0xA79C,},
1180 : {0xA79F, 0xA79E,},
1181 : {0xA7A1, 0xA7A0,},
1182 : {0xA7A3, 0xA7A2,},
1183 : {0xA7A5, 0xA7A4,},
1184 : {0xA7A7, 0xA7A6,},
1185 : {0xA7A9, 0xA7A8,},
1186 : {0xA7B5, 0xA7B4,},
1187 : {0xA7B7, 0xA7B6,},
1188 : {0xA7B9, 0xA7B8,},
1189 : {0xA7BB, 0xA7BA,},
1190 : {0xA7BD, 0xA7BC,},
1191 : {0xA7BF, 0xA7BE,},
1192 : {0xA7C1, 0xA7C0,},
1193 : {0xA7C3, 0xA7C2,},
1194 : {0xA7C8, 0xA7C7,},
1195 : {0xA7CA, 0xA7C9,},
1196 : {0xA7D1, 0xA7D0,},
1197 : {0xA7D7, 0xA7D6,},
1198 : {0xA7D9, 0xA7D8,},
1199 : {0xA7F6, 0xA7F5,},
1200 : {0xAB53, 0xA7B3,},
1201 : {0xAB70, 0x13A0,},
1202 : {0xAB71, 0x13A1,},
1203 : {0xAB72, 0x13A2,},
1204 : {0xAB73, 0x13A3,},
1205 : {0xAB74, 0x13A4,},
1206 : {0xAB75, 0x13A5,},
1207 : {0xAB76, 0x13A6,},
1208 : {0xAB77, 0x13A7,},
1209 : {0xAB78, 0x13A8,},
1210 : {0xAB79, 0x13A9,},
1211 : {0xAB7A, 0x13AA,},
1212 : {0xAB7B, 0x13AB,},
1213 : {0xAB7C, 0x13AC,},
1214 : {0xAB7D, 0x13AD,},
1215 : {0xAB7E, 0x13AE,},
1216 : {0xAB7F, 0x13AF,},
1217 : {0xAB80, 0x13B0,},
1218 : {0xAB81, 0x13B1,},
1219 : {0xAB82, 0x13B2,},
1220 : {0xAB83, 0x13B3,},
1221 : {0xAB84, 0x13B4,},
1222 : {0xAB85, 0x13B5,},
1223 : {0xAB86, 0x13B6,},
1224 : {0xAB87, 0x13B7,},
1225 : {0xAB88, 0x13B8,},
1226 : {0xAB89, 0x13B9,},
1227 : {0xAB8A, 0x13BA,},
1228 : {0xAB8B, 0x13BB,},
1229 : {0xAB8C, 0x13BC,},
1230 : {0xAB8D, 0x13BD,},
1231 : {0xAB8E, 0x13BE,},
1232 : {0xAB8F, 0x13BF,},
1233 : {0xAB90, 0x13C0,},
1234 : {0xAB91, 0x13C1,},
1235 : {0xAB92, 0x13C2,},
1236 : {0xAB93, 0x13C3,},
1237 : {0xAB94, 0x13C4,},
1238 : {0xAB95, 0x13C5,},
1239 : {0xAB96, 0x13C6,},
1240 : {0xAB97, 0x13C7,},
1241 : {0xAB98, 0x13C8,},
1242 : {0xAB99, 0x13C9,},
1243 : {0xAB9A, 0x13CA,},
1244 : {0xAB9B, 0x13CB,},
1245 : {0xAB9C, 0x13CC,},
1246 : {0xAB9D, 0x13CD,},
1247 : {0xAB9E, 0x13CE,},
1248 : {0xAB9F, 0x13CF,},
1249 : {0xABA0, 0x13D0,},
1250 : {0xABA1, 0x13D1,},
1251 : {0xABA2, 0x13D2,},
1252 : {0xABA3, 0x13D3,},
1253 : {0xABA4, 0x13D4,},
1254 : {0xABA5, 0x13D5,},
1255 : {0xABA6, 0x13D6,},
1256 : {0xABA7, 0x13D7,},
1257 : {0xABA8, 0x13D8,},
1258 : {0xABA9, 0x13D9,},
1259 : {0xABAA, 0x13DA,},
1260 : {0xABAB, 0x13DB,},
1261 : {0xABAC, 0x13DC,},
1262 : {0xABAD, 0x13DD,},
1263 : {0xABAE, 0x13DE,},
1264 : {0xABAF, 0x13DF,},
1265 : {0xABB0, 0x13E0,},
1266 : {0xABB1, 0x13E1,},
1267 : {0xABB2, 0x13E2,},
1268 : {0xABB3, 0x13E3,},
1269 : {0xABB4, 0x13E4,},
1270 : {0xABB5, 0x13E5,},
1271 : {0xABB6, 0x13E6,},
1272 : {0xABB7, 0x13E7,},
1273 : {0xABB8, 0x13E8,},
1274 : {0xABB9, 0x13E9,},
1275 : {0xABBA, 0x13EA,},
1276 : {0xABBB, 0x13EB,},
1277 : {0xABBC, 0x13EC,},
1278 : {0xABBD, 0x13ED,},
1279 : {0xABBE, 0x13EE,},
1280 : {0xABBF, 0x13EF,},
1281 : {0xFF41, 0xFF21,},
1282 : {0xFF42, 0xFF22,},
1283 : {0xFF43, 0xFF23,},
1284 : {0xFF44, 0xFF24,},
1285 : {0xFF45, 0xFF25,},
1286 : {0xFF46, 0xFF26,},
1287 : {0xFF47, 0xFF27,},
1288 : {0xFF48, 0xFF28,},
1289 : {0xFF49, 0xFF29,},
1290 : {0xFF4A, 0xFF2A,},
1291 : {0xFF4B, 0xFF2B,},
1292 : {0xFF4C, 0xFF2C,},
1293 : {0xFF4D, 0xFF2D,},
1294 : {0xFF4E, 0xFF2E,},
1295 : {0xFF4F, 0xFF2F,},
1296 : {0xFF50, 0xFF30,},
1297 : {0xFF51, 0xFF31,},
1298 : {0xFF52, 0xFF32,},
1299 : {0xFF53, 0xFF33,},
1300 : {0xFF54, 0xFF34,},
1301 : {0xFF55, 0xFF35,},
1302 : {0xFF56, 0xFF36,},
1303 : {0xFF57, 0xFF37,},
1304 : {0xFF58, 0xFF38,},
1305 : {0xFF59, 0xFF39,},
1306 : {0xFF5A, 0xFF3A,},
1307 : {0x10428, 0x10400,},
1308 : {0x10429, 0x10401,},
1309 : {0x1042A, 0x10402,},
1310 : {0x1042B, 0x10403,},
1311 : {0x1042C, 0x10404,},
1312 : {0x1042D, 0x10405,},
1313 : {0x1042E, 0x10406,},
1314 : {0x1042F, 0x10407,},
1315 : {0x10430, 0x10408,},
1316 : {0x10431, 0x10409,},
1317 : {0x10432, 0x1040A,},
1318 : {0x10433, 0x1040B,},
1319 : {0x10434, 0x1040C,},
1320 : {0x10435, 0x1040D,},
1321 : {0x10436, 0x1040E,},
1322 : {0x10437, 0x1040F,},
1323 : {0x10438, 0x10410,},
1324 : {0x10439, 0x10411,},
1325 : {0x1043A, 0x10412,},
1326 : {0x1043B, 0x10413,},
1327 : {0x1043C, 0x10414,},
1328 : {0x1043D, 0x10415,},
1329 : {0x1043E, 0x10416,},
1330 : {0x1043F, 0x10417,},
1331 : {0x10440, 0x10418,},
1332 : {0x10441, 0x10419,},
1333 : {0x10442, 0x1041A,},
1334 : {0x10443, 0x1041B,},
1335 : {0x10444, 0x1041C,},
1336 : {0x10445, 0x1041D,},
1337 : {0x10446, 0x1041E,},
1338 : {0x10447, 0x1041F,},
1339 : {0x10448, 0x10420,},
1340 : {0x10449, 0x10421,},
1341 : {0x1044A, 0x10422,},
1342 : {0x1044B, 0x10423,},
1343 : {0x1044C, 0x10424,},
1344 : {0x1044D, 0x10425,},
1345 : {0x1044E, 0x10426,},
1346 : {0x1044F, 0x10427,},
1347 : {0x104D8, 0x104B0,},
1348 : {0x104D9, 0x104B1,},
1349 : {0x104DA, 0x104B2,},
1350 : {0x104DB, 0x104B3,},
1351 : {0x104DC, 0x104B4,},
1352 : {0x104DD, 0x104B5,},
1353 : {0x104DE, 0x104B6,},
1354 : {0x104DF, 0x104B7,},
1355 : {0x104E0, 0x104B8,},
1356 : {0x104E1, 0x104B9,},
1357 : {0x104E2, 0x104BA,},
1358 : {0x104E3, 0x104BB,},
1359 : {0x104E4, 0x104BC,},
1360 : {0x104E5, 0x104BD,},
1361 : {0x104E6, 0x104BE,},
1362 : {0x104E7, 0x104BF,},
1363 : {0x104E8, 0x104C0,},
1364 : {0x104E9, 0x104C1,},
1365 : {0x104EA, 0x104C2,},
1366 : {0x104EB, 0x104C3,},
1367 : {0x104EC, 0x104C4,},
1368 : {0x104ED, 0x104C5,},
1369 : {0x104EE, 0x104C6,},
1370 : {0x104EF, 0x104C7,},
1371 : {0x104F0, 0x104C8,},
1372 : {0x104F1, 0x104C9,},
1373 : {0x104F2, 0x104CA,},
1374 : {0x104F3, 0x104CB,},
1375 : {0x104F4, 0x104CC,},
1376 : {0x104F5, 0x104CD,},
1377 : {0x104F6, 0x104CE,},
1378 : {0x104F7, 0x104CF,},
1379 : {0x104F8, 0x104D0,},
1380 : {0x104F9, 0x104D1,},
1381 : {0x104FA, 0x104D2,},
1382 : {0x104FB, 0x104D3,},
1383 : {0x10597, 0x10570,},
1384 : {0x10598, 0x10571,},
1385 : {0x10599, 0x10572,},
1386 : {0x1059A, 0x10573,},
1387 : {0x1059B, 0x10574,},
1388 : {0x1059C, 0x10575,},
1389 : {0x1059D, 0x10576,},
1390 : {0x1059E, 0x10577,},
1391 : {0x1059F, 0x10578,},
1392 : {0x105A0, 0x10579,},
1393 : {0x105A1, 0x1057A,},
1394 : {0x105A3, 0x1057C,},
1395 : {0x105A4, 0x1057D,},
1396 : {0x105A5, 0x1057E,},
1397 : {0x105A6, 0x1057F,},
1398 : {0x105A7, 0x10580,},
1399 : {0x105A8, 0x10581,},
1400 : {0x105A9, 0x10582,},
1401 : {0x105AA, 0x10583,},
1402 : {0x105AB, 0x10584,},
1403 : {0x105AC, 0x10585,},
1404 : {0x105AD, 0x10586,},
1405 : {0x105AE, 0x10587,},
1406 : {0x105AF, 0x10588,},
1407 : {0x105B0, 0x10589,},
1408 : {0x105B1, 0x1058A,},
1409 : {0x105B3, 0x1058C,},
1410 : {0x105B4, 0x1058D,},
1411 : {0x105B5, 0x1058E,},
1412 : {0x105B6, 0x1058F,},
1413 : {0x105B7, 0x10590,},
1414 : {0x105B8, 0x10591,},
1415 : {0x105B9, 0x10592,},
1416 : {0x105BB, 0x10594,},
1417 : {0x105BC, 0x10595,},
1418 : {0x10CC0, 0x10C80,},
1419 : {0x10CC1, 0x10C81,},
1420 : {0x10CC2, 0x10C82,},
1421 : {0x10CC3, 0x10C83,},
1422 : {0x10CC4, 0x10C84,},
1423 : {0x10CC5, 0x10C85,},
1424 : {0x10CC6, 0x10C86,},
1425 : {0x10CC7, 0x10C87,},
1426 : {0x10CC8, 0x10C88,},
1427 : {0x10CC9, 0x10C89,},
1428 : {0x10CCA, 0x10C8A,},
1429 : {0x10CCB, 0x10C8B,},
1430 : {0x10CCC, 0x10C8C,},
1431 : {0x10CCD, 0x10C8D,},
1432 : {0x10CCE, 0x10C8E,},
1433 : {0x10CCF, 0x10C8F,},
1434 : {0x10CD0, 0x10C90,},
1435 : {0x10CD1, 0x10C91,},
1436 : {0x10CD2, 0x10C92,},
1437 : {0x10CD3, 0x10C93,},
1438 : {0x10CD4, 0x10C94,},
1439 : {0x10CD5, 0x10C95,},
1440 : {0x10CD6, 0x10C96,},
1441 : {0x10CD7, 0x10C97,},
1442 : {0x10CD8, 0x10C98,},
1443 : {0x10CD9, 0x10C99,},
1444 : {0x10CDA, 0x10C9A,},
1445 : {0x10CDB, 0x10C9B,},
1446 : {0x10CDC, 0x10C9C,},
1447 : {0x10CDD, 0x10C9D,},
1448 : {0x10CDE, 0x10C9E,},
1449 : {0x10CDF, 0x10C9F,},
1450 : {0x10CE0, 0x10CA0,},
1451 : {0x10CE1, 0x10CA1,},
1452 : {0x10CE2, 0x10CA2,},
1453 : {0x10CE3, 0x10CA3,},
1454 : {0x10CE4, 0x10CA4,},
1455 : {0x10CE5, 0x10CA5,},
1456 : {0x10CE6, 0x10CA6,},
1457 : {0x10CE7, 0x10CA7,},
1458 : {0x10CE8, 0x10CA8,},
1459 : {0x10CE9, 0x10CA9,},
1460 : {0x10CEA, 0x10CAA,},
1461 : {0x10CEB, 0x10CAB,},
1462 : {0x10CEC, 0x10CAC,},
1463 : {0x10CED, 0x10CAD,},
1464 : {0x10CEE, 0x10CAE,},
1465 : {0x10CEF, 0x10CAF,},
1466 : {0x10CF0, 0x10CB0,},
1467 : {0x10CF1, 0x10CB1,},
1468 : {0x10CF2, 0x10CB2,},
1469 : {0x118C0, 0x118A0,},
1470 : {0x118C1, 0x118A1,},
1471 : {0x118C2, 0x118A2,},
1472 : {0x118C3, 0x118A3,},
1473 : {0x118C4, 0x118A4,},
1474 : {0x118C5, 0x118A5,},
1475 : {0x118C6, 0x118A6,},
1476 : {0x118C7, 0x118A7,},
1477 : {0x118C8, 0x118A8,},
1478 : {0x118C9, 0x118A9,},
1479 : {0x118CA, 0x118AA,},
1480 : {0x118CB, 0x118AB,},
1481 : {0x118CC, 0x118AC,},
1482 : {0x118CD, 0x118AD,},
1483 : {0x118CE, 0x118AE,},
1484 : {0x118CF, 0x118AF,},
1485 : {0x118D0, 0x118B0,},
1486 : {0x118D1, 0x118B1,},
1487 : {0x118D2, 0x118B2,},
1488 : {0x118D3, 0x118B3,},
1489 : {0x118D4, 0x118B4,},
1490 : {0x118D5, 0x118B5,},
1491 : {0x118D6, 0x118B6,},
1492 : {0x118D7, 0x118B7,},
1493 : {0x118D8, 0x118B8,},
1494 : {0x118D9, 0x118B9,},
1495 : {0x118DA, 0x118BA,},
1496 : {0x118DB, 0x118BB,},
1497 : {0x118DC, 0x118BC,},
1498 : {0x118DD, 0x118BD,},
1499 : {0x118DE, 0x118BE,},
1500 : {0x118DF, 0x118BF,},
1501 : {0x16E60, 0x16E40,},
1502 : {0x16E61, 0x16E41,},
1503 : {0x16E62, 0x16E42,},
1504 : {0x16E63, 0x16E43,},
1505 : {0x16E64, 0x16E44,},
1506 : {0x16E65, 0x16E45,},
1507 : {0x16E66, 0x16E46,},
1508 : {0x16E67, 0x16E47,},
1509 : {0x16E68, 0x16E48,},
1510 : {0x16E69, 0x16E49,},
1511 : {0x16E6A, 0x16E4A,},
1512 : {0x16E6B, 0x16E4B,},
1513 : {0x16E6C, 0x16E4C,},
1514 : {0x16E6D, 0x16E4D,},
1515 : {0x16E6E, 0x16E4E,},
1516 : {0x16E6F, 0x16E4F,},
1517 : {0x16E70, 0x16E50,},
1518 : {0x16E71, 0x16E51,},
1519 : {0x16E72, 0x16E52,},
1520 : {0x16E73, 0x16E53,},
1521 : {0x16E74, 0x16E54,},
1522 : {0x16E75, 0x16E55,},
1523 : {0x16E76, 0x16E56,},
1524 : {0x16E77, 0x16E57,},
1525 : {0x16E78, 0x16E58,},
1526 : {0x16E79, 0x16E59,},
1527 : {0x16E7A, 0x16E5A,},
1528 : {0x16E7B, 0x16E5B,},
1529 : {0x16E7C, 0x16E5C,},
1530 : {0x16E7D, 0x16E5D,},
1531 : {0x16E7E, 0x16E5E,},
1532 : {0x16E7F, 0x16E5F,},
1533 : {0x1E922, 0x1E900,},
1534 : {0x1E923, 0x1E901,},
1535 : {0x1E924, 0x1E902,},
1536 : {0x1E925, 0x1E903,},
1537 : {0x1E926, 0x1E904,},
1538 : {0x1E927, 0x1E905,},
1539 : {0x1E928, 0x1E906,},
1540 : {0x1E929, 0x1E907,},
1541 : {0x1E92A, 0x1E908,},
1542 : {0x1E92B, 0x1E909,},
1543 : {0x1E92C, 0x1E90A,},
1544 : {0x1E92D, 0x1E90B,},
1545 : {0x1E92E, 0x1E90C,},
1546 : {0x1E92F, 0x1E90D,},
1547 : {0x1E930, 0x1E90E,},
1548 : {0x1E931, 0x1E90F,},
1549 : {0x1E932, 0x1E910,},
1550 : {0x1E933, 0x1E911,},
1551 : {0x1E934, 0x1E912,},
1552 : {0x1E935, 0x1E913,},
1553 : {0x1E936, 0x1E914,},
1554 : {0x1E937, 0x1E915,},
1555 : {0x1E938, 0x1E916,},
1556 : {0x1E939, 0x1E917,},
1557 : {0x1E93A, 0x1E918,},
1558 : {0x1E93B, 0x1E919,},
1559 : {0x1E93C, 0x1E91A,},
1560 : {0x1E93D, 0x1E91B,},
1561 : {0x1E93E, 0x1E91C,},
1562 : {0x1E93F, 0x1E91D,},
1563 : {0x1E940, 0x1E91E,},
1564 : {0x1E941, 0x1E91F,},
1565 : {0x1E942, 0x1E920,},
1566 : {0x1E943, 0x1E921,},
1567 : }, UTF8_toLower[] = { /* code points with non-null lowercase conversion */
1568 : {0x0041, 0x0061,},
1569 : {0x0042, 0x0062,},
1570 : {0x0043, 0x0063,},
1571 : {0x0044, 0x0064,},
1572 : {0x0045, 0x0065,},
1573 : {0x0046, 0x0066,},
1574 : {0x0047, 0x0067,},
1575 : {0x0048, 0x0068,},
1576 : {0x0049, 0x0069,},
1577 : {0x004A, 0x006A,},
1578 : {0x004B, 0x006B,},
1579 : {0x004C, 0x006C,},
1580 : {0x004D, 0x006D,},
1581 : {0x004E, 0x006E,},
1582 : {0x004F, 0x006F,},
1583 : {0x0050, 0x0070,},
1584 : {0x0051, 0x0071,},
1585 : {0x0052, 0x0072,},
1586 : {0x0053, 0x0073,},
1587 : {0x0054, 0x0074,},
1588 : {0x0055, 0x0075,},
1589 : {0x0056, 0x0076,},
1590 : {0x0057, 0x0077,},
1591 : {0x0058, 0x0078,},
1592 : {0x0059, 0x0079,},
1593 : {0x005A, 0x007A,},
1594 : {0x00C0, 0x00E0,},
1595 : {0x00C1, 0x00E1,},
1596 : {0x00C2, 0x00E2,},
1597 : {0x00C3, 0x00E3,},
1598 : {0x00C4, 0x00E4,},
1599 : {0x00C5, 0x00E5,},
1600 : {0x00C6, 0x00E6,},
1601 : {0x00C7, 0x00E7,},
1602 : {0x00C8, 0x00E8,},
1603 : {0x00C9, 0x00E9,},
1604 : {0x00CA, 0x00EA,},
1605 : {0x00CB, 0x00EB,},
1606 : {0x00CC, 0x00EC,},
1607 : {0x00CD, 0x00ED,},
1608 : {0x00CE, 0x00EE,},
1609 : {0x00CF, 0x00EF,},
1610 : {0x00D0, 0x00F0,},
1611 : {0x00D1, 0x00F1,},
1612 : {0x00D2, 0x00F2,},
1613 : {0x00D3, 0x00F3,},
1614 : {0x00D4, 0x00F4,},
1615 : {0x00D5, 0x00F5,},
1616 : {0x00D6, 0x00F6,},
1617 : {0x00D8, 0x00F8,},
1618 : {0x00D9, 0x00F9,},
1619 : {0x00DA, 0x00FA,},
1620 : {0x00DB, 0x00FB,},
1621 : {0x00DC, 0x00FC,},
1622 : {0x00DD, 0x00FD,},
1623 : {0x00DE, 0x00FE,},
1624 : {0x0100, 0x0101,},
1625 : {0x0102, 0x0103,},
1626 : {0x0104, 0x0105,},
1627 : {0x0106, 0x0107,},
1628 : {0x0108, 0x0109,},
1629 : {0x010A, 0x010B,},
1630 : {0x010C, 0x010D,},
1631 : {0x010E, 0x010F,},
1632 : {0x0110, 0x0111,},
1633 : {0x0112, 0x0113,},
1634 : {0x0114, 0x0115,},
1635 : {0x0116, 0x0117,},
1636 : {0x0118, 0x0119,},
1637 : {0x011A, 0x011B,},
1638 : {0x011C, 0x011D,},
1639 : {0x011E, 0x011F,},
1640 : {0x0120, 0x0121,},
1641 : {0x0122, 0x0123,},
1642 : {0x0124, 0x0125,},
1643 : {0x0126, 0x0127,},
1644 : {0x0128, 0x0129,},
1645 : {0x012A, 0x012B,},
1646 : {0x012C, 0x012D,},
1647 : {0x012E, 0x012F,},
1648 : {0x0130, 0x0069,},
1649 : {0x0132, 0x0133,},
1650 : {0x0134, 0x0135,},
1651 : {0x0136, 0x0137,},
1652 : {0x0139, 0x013A,},
1653 : {0x013B, 0x013C,},
1654 : {0x013D, 0x013E,},
1655 : {0x013F, 0x0140,},
1656 : {0x0141, 0x0142,},
1657 : {0x0143, 0x0144,},
1658 : {0x0145, 0x0146,},
1659 : {0x0147, 0x0148,},
1660 : {0x014A, 0x014B,},
1661 : {0x014C, 0x014D,},
1662 : {0x014E, 0x014F,},
1663 : {0x0150, 0x0151,},
1664 : {0x0152, 0x0153,},
1665 : {0x0154, 0x0155,},
1666 : {0x0156, 0x0157,},
1667 : {0x0158, 0x0159,},
1668 : {0x015A, 0x015B,},
1669 : {0x015C, 0x015D,},
1670 : {0x015E, 0x015F,},
1671 : {0x0160, 0x0161,},
1672 : {0x0162, 0x0163,},
1673 : {0x0164, 0x0165,},
1674 : {0x0166, 0x0167,},
1675 : {0x0168, 0x0169,},
1676 : {0x016A, 0x016B,},
1677 : {0x016C, 0x016D,},
1678 : {0x016E, 0x016F,},
1679 : {0x0170, 0x0171,},
1680 : {0x0172, 0x0173,},
1681 : {0x0174, 0x0175,},
1682 : {0x0176, 0x0177,},
1683 : {0x0178, 0x00FF,},
1684 : {0x0179, 0x017A,},
1685 : {0x017B, 0x017C,},
1686 : {0x017D, 0x017E,},
1687 : {0x0181, 0x0253,},
1688 : {0x0182, 0x0183,},
1689 : {0x0184, 0x0185,},
1690 : {0x0186, 0x0254,},
1691 : {0x0187, 0x0188,},
1692 : {0x0189, 0x0256,},
1693 : {0x018A, 0x0257,},
1694 : {0x018B, 0x018C,},
1695 : {0x018E, 0x01DD,},
1696 : {0x018F, 0x0259,},
1697 : {0x0190, 0x025B,},
1698 : {0x0191, 0x0192,},
1699 : {0x0193, 0x0260,},
1700 : {0x0194, 0x0263,},
1701 : {0x0196, 0x0269,},
1702 : {0x0197, 0x0268,},
1703 : {0x0198, 0x0199,},
1704 : {0x019C, 0x026F,},
1705 : {0x019D, 0x0272,},
1706 : {0x019F, 0x0275,},
1707 : {0x01A0, 0x01A1,},
1708 : {0x01A2, 0x01A3,},
1709 : {0x01A4, 0x01A5,},
1710 : {0x01A6, 0x0280,},
1711 : {0x01A7, 0x01A8,},
1712 : {0x01A9, 0x0283,},
1713 : {0x01AC, 0x01AD,},
1714 : {0x01AE, 0x0288,},
1715 : {0x01AF, 0x01B0,},
1716 : {0x01B1, 0x028A,},
1717 : {0x01B2, 0x028B,},
1718 : {0x01B3, 0x01B4,},
1719 : {0x01B5, 0x01B6,},
1720 : {0x01B7, 0x0292,},
1721 : {0x01B8, 0x01B9,},
1722 : {0x01BC, 0x01BD,},
1723 : {0x01C4, 0x01C6,},
1724 : {0x01C5, 0x01C6,},
1725 : {0x01C7, 0x01C9,},
1726 : {0x01C8, 0x01C9,},
1727 : {0x01CA, 0x01CC,},
1728 : {0x01CB, 0x01CC,},
1729 : {0x01CD, 0x01CE,},
1730 : {0x01CF, 0x01D0,},
1731 : {0x01D1, 0x01D2,},
1732 : {0x01D3, 0x01D4,},
1733 : {0x01D5, 0x01D6,},
1734 : {0x01D7, 0x01D8,},
1735 : {0x01D9, 0x01DA,},
1736 : {0x01DB, 0x01DC,},
1737 : {0x01DE, 0x01DF,},
1738 : {0x01E0, 0x01E1,},
1739 : {0x01E2, 0x01E3,},
1740 : {0x01E4, 0x01E5,},
1741 : {0x01E6, 0x01E7,},
1742 : {0x01E8, 0x01E9,},
1743 : {0x01EA, 0x01EB,},
1744 : {0x01EC, 0x01ED,},
1745 : {0x01EE, 0x01EF,},
1746 : {0x01F1, 0x01F3,},
1747 : {0x01F2, 0x01F3,},
1748 : {0x01F4, 0x01F5,},
1749 : {0x01F6, 0x0195,},
1750 : {0x01F7, 0x01BF,},
1751 : {0x01F8, 0x01F9,},
1752 : {0x01FA, 0x01FB,},
1753 : {0x01FC, 0x01FD,},
1754 : {0x01FE, 0x01FF,},
1755 : {0x0200, 0x0201,},
1756 : {0x0202, 0x0203,},
1757 : {0x0204, 0x0205,},
1758 : {0x0206, 0x0207,},
1759 : {0x0208, 0x0209,},
1760 : {0x020A, 0x020B,},
1761 : {0x020C, 0x020D,},
1762 : {0x020E, 0x020F,},
1763 : {0x0210, 0x0211,},
1764 : {0x0212, 0x0213,},
1765 : {0x0214, 0x0215,},
1766 : {0x0216, 0x0217,},
1767 : {0x0218, 0x0219,},
1768 : {0x021A, 0x021B,},
1769 : {0x021C, 0x021D,},
1770 : {0x021E, 0x021F,},
1771 : {0x0220, 0x019E,},
1772 : {0x0222, 0x0223,},
1773 : {0x0224, 0x0225,},
1774 : {0x0226, 0x0227,},
1775 : {0x0228, 0x0229,},
1776 : {0x022A, 0x022B,},
1777 : {0x022C, 0x022D,},
1778 : {0x022E, 0x022F,},
1779 : {0x0230, 0x0231,},
1780 : {0x0232, 0x0233,},
1781 : {0x023A, 0x2C65,},
1782 : {0x023B, 0x023C,},
1783 : {0x023D, 0x019A,},
1784 : {0x023E, 0x2C66,},
1785 : {0x0241, 0x0242,},
1786 : {0x0243, 0x0180,},
1787 : {0x0244, 0x0289,},
1788 : {0x0245, 0x028C,},
1789 : {0x0246, 0x0247,},
1790 : {0x0248, 0x0249,},
1791 : {0x024A, 0x024B,},
1792 : {0x024C, 0x024D,},
1793 : {0x024E, 0x024F,},
1794 : {0x0370, 0x0371,},
1795 : {0x0372, 0x0373,},
1796 : {0x0376, 0x0377,},
1797 : {0x037F, 0x03F3,},
1798 : {0x0386, 0x03AC,},
1799 : {0x0388, 0x03AD,},
1800 : {0x0389, 0x03AE,},
1801 : {0x038A, 0x03AF,},
1802 : {0x038C, 0x03CC,},
1803 : {0x038E, 0x03CD,},
1804 : {0x038F, 0x03CE,},
1805 : {0x0391, 0x03B1,},
1806 : {0x0392, 0x03B2,},
1807 : {0x0393, 0x03B3,},
1808 : {0x0394, 0x03B4,},
1809 : {0x0395, 0x03B5,},
1810 : {0x0396, 0x03B6,},
1811 : {0x0397, 0x03B7,},
1812 : {0x0398, 0x03B8,},
1813 : {0x0399, 0x03B9,},
1814 : {0x039A, 0x03BA,},
1815 : {0x039B, 0x03BB,},
1816 : {0x039C, 0x03BC,},
1817 : {0x039D, 0x03BD,},
1818 : {0x039E, 0x03BE,},
1819 : {0x039F, 0x03BF,},
1820 : {0x03A0, 0x03C0,},
1821 : {0x03A1, 0x03C1,},
1822 : {0x03A3, 0x03C3,},
1823 : {0x03A4, 0x03C4,},
1824 : {0x03A5, 0x03C5,},
1825 : {0x03A6, 0x03C6,},
1826 : {0x03A7, 0x03C7,},
1827 : {0x03A8, 0x03C8,},
1828 : {0x03A9, 0x03C9,},
1829 : {0x03AA, 0x03CA,},
1830 : {0x03AB, 0x03CB,},
1831 : {0x03CF, 0x03D7,},
1832 : {0x03D8, 0x03D9,},
1833 : {0x03DA, 0x03DB,},
1834 : {0x03DC, 0x03DD,},
1835 : {0x03DE, 0x03DF,},
1836 : {0x03E0, 0x03E1,},
1837 : {0x03E2, 0x03E3,},
1838 : {0x03E4, 0x03E5,},
1839 : {0x03E6, 0x03E7,},
1840 : {0x03E8, 0x03E9,},
1841 : {0x03EA, 0x03EB,},
1842 : {0x03EC, 0x03ED,},
1843 : {0x03EE, 0x03EF,},
1844 : {0x03F4, 0x03B8,},
1845 : {0x03F7, 0x03F8,},
1846 : {0x03F9, 0x03F2,},
1847 : {0x03FA, 0x03FB,},
1848 : {0x03FD, 0x037B,},
1849 : {0x03FE, 0x037C,},
1850 : {0x03FF, 0x037D,},
1851 : {0x0400, 0x0450,},
1852 : {0x0401, 0x0451,},
1853 : {0x0402, 0x0452,},
1854 : {0x0403, 0x0453,},
1855 : {0x0404, 0x0454,},
1856 : {0x0405, 0x0455,},
1857 : {0x0406, 0x0456,},
1858 : {0x0407, 0x0457,},
1859 : {0x0408, 0x0458,},
1860 : {0x0409, 0x0459,},
1861 : {0x040A, 0x045A,},
1862 : {0x040B, 0x045B,},
1863 : {0x040C, 0x045C,},
1864 : {0x040D, 0x045D,},
1865 : {0x040E, 0x045E,},
1866 : {0x040F, 0x045F,},
1867 : {0x0410, 0x0430,},
1868 : {0x0411, 0x0431,},
1869 : {0x0412, 0x0432,},
1870 : {0x0413, 0x0433,},
1871 : {0x0414, 0x0434,},
1872 : {0x0415, 0x0435,},
1873 : {0x0416, 0x0436,},
1874 : {0x0417, 0x0437,},
1875 : {0x0418, 0x0438,},
1876 : {0x0419, 0x0439,},
1877 : {0x041A, 0x043A,},
1878 : {0x041B, 0x043B,},
1879 : {0x041C, 0x043C,},
1880 : {0x041D, 0x043D,},
1881 : {0x041E, 0x043E,},
1882 : {0x041F, 0x043F,},
1883 : {0x0420, 0x0440,},
1884 : {0x0421, 0x0441,},
1885 : {0x0422, 0x0442,},
1886 : {0x0423, 0x0443,},
1887 : {0x0424, 0x0444,},
1888 : {0x0425, 0x0445,},
1889 : {0x0426, 0x0446,},
1890 : {0x0427, 0x0447,},
1891 : {0x0428, 0x0448,},
1892 : {0x0429, 0x0449,},
1893 : {0x042A, 0x044A,},
1894 : {0x042B, 0x044B,},
1895 : {0x042C, 0x044C,},
1896 : {0x042D, 0x044D,},
1897 : {0x042E, 0x044E,},
1898 : {0x042F, 0x044F,},
1899 : {0x0460, 0x0461,},
1900 : {0x0462, 0x0463,},
1901 : {0x0464, 0x0465,},
1902 : {0x0466, 0x0467,},
1903 : {0x0468, 0x0469,},
1904 : {0x046A, 0x046B,},
1905 : {0x046C, 0x046D,},
1906 : {0x046E, 0x046F,},
1907 : {0x0470, 0x0471,},
1908 : {0x0472, 0x0473,},
1909 : {0x0474, 0x0475,},
1910 : {0x0476, 0x0477,},
1911 : {0x0478, 0x0479,},
1912 : {0x047A, 0x047B,},
1913 : {0x047C, 0x047D,},
1914 : {0x047E, 0x047F,},
1915 : {0x0480, 0x0481,},
1916 : {0x048A, 0x048B,},
1917 : {0x048C, 0x048D,},
1918 : {0x048E, 0x048F,},
1919 : {0x0490, 0x0491,},
1920 : {0x0492, 0x0493,},
1921 : {0x0494, 0x0495,},
1922 : {0x0496, 0x0497,},
1923 : {0x0498, 0x0499,},
1924 : {0x049A, 0x049B,},
1925 : {0x049C, 0x049D,},
1926 : {0x049E, 0x049F,},
1927 : {0x04A0, 0x04A1,},
1928 : {0x04A2, 0x04A3,},
1929 : {0x04A4, 0x04A5,},
1930 : {0x04A6, 0x04A7,},
1931 : {0x04A8, 0x04A9,},
1932 : {0x04AA, 0x04AB,},
1933 : {0x04AC, 0x04AD,},
1934 : {0x04AE, 0x04AF,},
1935 : {0x04B0, 0x04B1,},
1936 : {0x04B2, 0x04B3,},
1937 : {0x04B4, 0x04B5,},
1938 : {0x04B6, 0x04B7,},
1939 : {0x04B8, 0x04B9,},
1940 : {0x04BA, 0x04BB,},
1941 : {0x04BC, 0x04BD,},
1942 : {0x04BE, 0x04BF,},
1943 : {0x04C0, 0x04CF,},
1944 : {0x04C1, 0x04C2,},
1945 : {0x04C3, 0x04C4,},
1946 : {0x04C5, 0x04C6,},
1947 : {0x04C7, 0x04C8,},
1948 : {0x04C9, 0x04CA,},
1949 : {0x04CB, 0x04CC,},
1950 : {0x04CD, 0x04CE,},
1951 : {0x04D0, 0x04D1,},
1952 : {0x04D2, 0x04D3,},
1953 : {0x04D4, 0x04D5,},
1954 : {0x04D6, 0x04D7,},
1955 : {0x04D8, 0x04D9,},
1956 : {0x04DA, 0x04DB,},
1957 : {0x04DC, 0x04DD,},
1958 : {0x04DE, 0x04DF,},
1959 : {0x04E0, 0x04E1,},
1960 : {0x04E2, 0x04E3,},
1961 : {0x04E4, 0x04E5,},
1962 : {0x04E6, 0x04E7,},
1963 : {0x04E8, 0x04E9,},
1964 : {0x04EA, 0x04EB,},
1965 : {0x04EC, 0x04ED,},
1966 : {0x04EE, 0x04EF,},
1967 : {0x04F0, 0x04F1,},
1968 : {0x04F2, 0x04F3,},
1969 : {0x04F4, 0x04F5,},
1970 : {0x04F6, 0x04F7,},
1971 : {0x04F8, 0x04F9,},
1972 : {0x04FA, 0x04FB,},
1973 : {0x04FC, 0x04FD,},
1974 : {0x04FE, 0x04FF,},
1975 : {0x0500, 0x0501,},
1976 : {0x0502, 0x0503,},
1977 : {0x0504, 0x0505,},
1978 : {0x0506, 0x0507,},
1979 : {0x0508, 0x0509,},
1980 : {0x050A, 0x050B,},
1981 : {0x050C, 0x050D,},
1982 : {0x050E, 0x050F,},
1983 : {0x0510, 0x0511,},
1984 : {0x0512, 0x0513,},
1985 : {0x0514, 0x0515,},
1986 : {0x0516, 0x0517,},
1987 : {0x0518, 0x0519,},
1988 : {0x051A, 0x051B,},
1989 : {0x051C, 0x051D,},
1990 : {0x051E, 0x051F,},
1991 : {0x0520, 0x0521,},
1992 : {0x0522, 0x0523,},
1993 : {0x0524, 0x0525,},
1994 : {0x0526, 0x0527,},
1995 : {0x0528, 0x0529,},
1996 : {0x052A, 0x052B,},
1997 : {0x052C, 0x052D,},
1998 : {0x052E, 0x052F,},
1999 : {0x0531, 0x0561,},
2000 : {0x0532, 0x0562,},
2001 : {0x0533, 0x0563,},
2002 : {0x0534, 0x0564,},
2003 : {0x0535, 0x0565,},
2004 : {0x0536, 0x0566,},
2005 : {0x0537, 0x0567,},
2006 : {0x0538, 0x0568,},
2007 : {0x0539, 0x0569,},
2008 : {0x053A, 0x056A,},
2009 : {0x053B, 0x056B,},
2010 : {0x053C, 0x056C,},
2011 : {0x053D, 0x056D,},
2012 : {0x053E, 0x056E,},
2013 : {0x053F, 0x056F,},
2014 : {0x0540, 0x0570,},
2015 : {0x0541, 0x0571,},
2016 : {0x0542, 0x0572,},
2017 : {0x0543, 0x0573,},
2018 : {0x0544, 0x0574,},
2019 : {0x0545, 0x0575,},
2020 : {0x0546, 0x0576,},
2021 : {0x0547, 0x0577,},
2022 : {0x0548, 0x0578,},
2023 : {0x0549, 0x0579,},
2024 : {0x054A, 0x057A,},
2025 : {0x054B, 0x057B,},
2026 : {0x054C, 0x057C,},
2027 : {0x054D, 0x057D,},
2028 : {0x054E, 0x057E,},
2029 : {0x054F, 0x057F,},
2030 : {0x0550, 0x0580,},
2031 : {0x0551, 0x0581,},
2032 : {0x0552, 0x0582,},
2033 : {0x0553, 0x0583,},
2034 : {0x0554, 0x0584,},
2035 : {0x0555, 0x0585,},
2036 : {0x0556, 0x0586,},
2037 : {0x10A0, 0x2D00,},
2038 : {0x10A1, 0x2D01,},
2039 : {0x10A2, 0x2D02,},
2040 : {0x10A3, 0x2D03,},
2041 : {0x10A4, 0x2D04,},
2042 : {0x10A5, 0x2D05,},
2043 : {0x10A6, 0x2D06,},
2044 : {0x10A7, 0x2D07,},
2045 : {0x10A8, 0x2D08,},
2046 : {0x10A9, 0x2D09,},
2047 : {0x10AA, 0x2D0A,},
2048 : {0x10AB, 0x2D0B,},
2049 : {0x10AC, 0x2D0C,},
2050 : {0x10AD, 0x2D0D,},
2051 : {0x10AE, 0x2D0E,},
2052 : {0x10AF, 0x2D0F,},
2053 : {0x10B0, 0x2D10,},
2054 : {0x10B1, 0x2D11,},
2055 : {0x10B2, 0x2D12,},
2056 : {0x10B3, 0x2D13,},
2057 : {0x10B4, 0x2D14,},
2058 : {0x10B5, 0x2D15,},
2059 : {0x10B6, 0x2D16,},
2060 : {0x10B7, 0x2D17,},
2061 : {0x10B8, 0x2D18,},
2062 : {0x10B9, 0x2D19,},
2063 : {0x10BA, 0x2D1A,},
2064 : {0x10BB, 0x2D1B,},
2065 : {0x10BC, 0x2D1C,},
2066 : {0x10BD, 0x2D1D,},
2067 : {0x10BE, 0x2D1E,},
2068 : {0x10BF, 0x2D1F,},
2069 : {0x10C0, 0x2D20,},
2070 : {0x10C1, 0x2D21,},
2071 : {0x10C2, 0x2D22,},
2072 : {0x10C3, 0x2D23,},
2073 : {0x10C4, 0x2D24,},
2074 : {0x10C5, 0x2D25,},
2075 : {0x10C7, 0x2D27,},
2076 : {0x10CD, 0x2D2D,},
2077 : {0x13A0, 0xAB70,},
2078 : {0x13A1, 0xAB71,},
2079 : {0x13A2, 0xAB72,},
2080 : {0x13A3, 0xAB73,},
2081 : {0x13A4, 0xAB74,},
2082 : {0x13A5, 0xAB75,},
2083 : {0x13A6, 0xAB76,},
2084 : {0x13A7, 0xAB77,},
2085 : {0x13A8, 0xAB78,},
2086 : {0x13A9, 0xAB79,},
2087 : {0x13AA, 0xAB7A,},
2088 : {0x13AB, 0xAB7B,},
2089 : {0x13AC, 0xAB7C,},
2090 : {0x13AD, 0xAB7D,},
2091 : {0x13AE, 0xAB7E,},
2092 : {0x13AF, 0xAB7F,},
2093 : {0x13B0, 0xAB80,},
2094 : {0x13B1, 0xAB81,},
2095 : {0x13B2, 0xAB82,},
2096 : {0x13B3, 0xAB83,},
2097 : {0x13B4, 0xAB84,},
2098 : {0x13B5, 0xAB85,},
2099 : {0x13B6, 0xAB86,},
2100 : {0x13B7, 0xAB87,},
2101 : {0x13B8, 0xAB88,},
2102 : {0x13B9, 0xAB89,},
2103 : {0x13BA, 0xAB8A,},
2104 : {0x13BB, 0xAB8B,},
2105 : {0x13BC, 0xAB8C,},
2106 : {0x13BD, 0xAB8D,},
2107 : {0x13BE, 0xAB8E,},
2108 : {0x13BF, 0xAB8F,},
2109 : {0x13C0, 0xAB90,},
2110 : {0x13C1, 0xAB91,},
2111 : {0x13C2, 0xAB92,},
2112 : {0x13C3, 0xAB93,},
2113 : {0x13C4, 0xAB94,},
2114 : {0x13C5, 0xAB95,},
2115 : {0x13C6, 0xAB96,},
2116 : {0x13C7, 0xAB97,},
2117 : {0x13C8, 0xAB98,},
2118 : {0x13C9, 0xAB99,},
2119 : {0x13CA, 0xAB9A,},
2120 : {0x13CB, 0xAB9B,},
2121 : {0x13CC, 0xAB9C,},
2122 : {0x13CD, 0xAB9D,},
2123 : {0x13CE, 0xAB9E,},
2124 : {0x13CF, 0xAB9F,},
2125 : {0x13D0, 0xABA0,},
2126 : {0x13D1, 0xABA1,},
2127 : {0x13D2, 0xABA2,},
2128 : {0x13D3, 0xABA3,},
2129 : {0x13D4, 0xABA4,},
2130 : {0x13D5, 0xABA5,},
2131 : {0x13D6, 0xABA6,},
2132 : {0x13D7, 0xABA7,},
2133 : {0x13D8, 0xABA8,},
2134 : {0x13D9, 0xABA9,},
2135 : {0x13DA, 0xABAA,},
2136 : {0x13DB, 0xABAB,},
2137 : {0x13DC, 0xABAC,},
2138 : {0x13DD, 0xABAD,},
2139 : {0x13DE, 0xABAE,},
2140 : {0x13DF, 0xABAF,},
2141 : {0x13E0, 0xABB0,},
2142 : {0x13E1, 0xABB1,},
2143 : {0x13E2, 0xABB2,},
2144 : {0x13E3, 0xABB3,},
2145 : {0x13E4, 0xABB4,},
2146 : {0x13E5, 0xABB5,},
2147 : {0x13E6, 0xABB6,},
2148 : {0x13E7, 0xABB7,},
2149 : {0x13E8, 0xABB8,},
2150 : {0x13E9, 0xABB9,},
2151 : {0x13EA, 0xABBA,},
2152 : {0x13EB, 0xABBB,},
2153 : {0x13EC, 0xABBC,},
2154 : {0x13ED, 0xABBD,},
2155 : {0x13EE, 0xABBE,},
2156 : {0x13EF, 0xABBF,},
2157 : {0x13F0, 0x13F8,},
2158 : {0x13F1, 0x13F9,},
2159 : {0x13F2, 0x13FA,},
2160 : {0x13F3, 0x13FB,},
2161 : {0x13F4, 0x13FC,},
2162 : {0x13F5, 0x13FD,},
2163 : {0x1C90, 0x10D0,},
2164 : {0x1C91, 0x10D1,},
2165 : {0x1C92, 0x10D2,},
2166 : {0x1C93, 0x10D3,},
2167 : {0x1C94, 0x10D4,},
2168 : {0x1C95, 0x10D5,},
2169 : {0x1C96, 0x10D6,},
2170 : {0x1C97, 0x10D7,},
2171 : {0x1C98, 0x10D8,},
2172 : {0x1C99, 0x10D9,},
2173 : {0x1C9A, 0x10DA,},
2174 : {0x1C9B, 0x10DB,},
2175 : {0x1C9C, 0x10DC,},
2176 : {0x1C9D, 0x10DD,},
2177 : {0x1C9E, 0x10DE,},
2178 : {0x1C9F, 0x10DF,},
2179 : {0x1CA0, 0x10E0,},
2180 : {0x1CA1, 0x10E1,},
2181 : {0x1CA2, 0x10E2,},
2182 : {0x1CA3, 0x10E3,},
2183 : {0x1CA4, 0x10E4,},
2184 : {0x1CA5, 0x10E5,},
2185 : {0x1CA6, 0x10E6,},
2186 : {0x1CA7, 0x10E7,},
2187 : {0x1CA8, 0x10E8,},
2188 : {0x1CA9, 0x10E9,},
2189 : {0x1CAA, 0x10EA,},
2190 : {0x1CAB, 0x10EB,},
2191 : {0x1CAC, 0x10EC,},
2192 : {0x1CAD, 0x10ED,},
2193 : {0x1CAE, 0x10EE,},
2194 : {0x1CAF, 0x10EF,},
2195 : {0x1CB0, 0x10F0,},
2196 : {0x1CB1, 0x10F1,},
2197 : {0x1CB2, 0x10F2,},
2198 : {0x1CB3, 0x10F3,},
2199 : {0x1CB4, 0x10F4,},
2200 : {0x1CB5, 0x10F5,},
2201 : {0x1CB6, 0x10F6,},
2202 : {0x1CB7, 0x10F7,},
2203 : {0x1CB8, 0x10F8,},
2204 : {0x1CB9, 0x10F9,},
2205 : {0x1CBA, 0x10FA,},
2206 : {0x1CBD, 0x10FD,},
2207 : {0x1CBE, 0x10FE,},
2208 : {0x1CBF, 0x10FF,},
2209 : {0x1E00, 0x1E01,},
2210 : {0x1E02, 0x1E03,},
2211 : {0x1E04, 0x1E05,},
2212 : {0x1E06, 0x1E07,},
2213 : {0x1E08, 0x1E09,},
2214 : {0x1E0A, 0x1E0B,},
2215 : {0x1E0C, 0x1E0D,},
2216 : {0x1E0E, 0x1E0F,},
2217 : {0x1E10, 0x1E11,},
2218 : {0x1E12, 0x1E13,},
2219 : {0x1E14, 0x1E15,},
2220 : {0x1E16, 0x1E17,},
2221 : {0x1E18, 0x1E19,},
2222 : {0x1E1A, 0x1E1B,},
2223 : {0x1E1C, 0x1E1D,},
2224 : {0x1E1E, 0x1E1F,},
2225 : {0x1E20, 0x1E21,},
2226 : {0x1E22, 0x1E23,},
2227 : {0x1E24, 0x1E25,},
2228 : {0x1E26, 0x1E27,},
2229 : {0x1E28, 0x1E29,},
2230 : {0x1E2A, 0x1E2B,},
2231 : {0x1E2C, 0x1E2D,},
2232 : {0x1E2E, 0x1E2F,},
2233 : {0x1E30, 0x1E31,},
2234 : {0x1E32, 0x1E33,},
2235 : {0x1E34, 0x1E35,},
2236 : {0x1E36, 0x1E37,},
2237 : {0x1E38, 0x1E39,},
2238 : {0x1E3A, 0x1E3B,},
2239 : {0x1E3C, 0x1E3D,},
2240 : {0x1E3E, 0x1E3F,},
2241 : {0x1E40, 0x1E41,},
2242 : {0x1E42, 0x1E43,},
2243 : {0x1E44, 0x1E45,},
2244 : {0x1E46, 0x1E47,},
2245 : {0x1E48, 0x1E49,},
2246 : {0x1E4A, 0x1E4B,},
2247 : {0x1E4C, 0x1E4D,},
2248 : {0x1E4E, 0x1E4F,},
2249 : {0x1E50, 0x1E51,},
2250 : {0x1E52, 0x1E53,},
2251 : {0x1E54, 0x1E55,},
2252 : {0x1E56, 0x1E57,},
2253 : {0x1E58, 0x1E59,},
2254 : {0x1E5A, 0x1E5B,},
2255 : {0x1E5C, 0x1E5D,},
2256 : {0x1E5E, 0x1E5F,},
2257 : {0x1E60, 0x1E61,},
2258 : {0x1E62, 0x1E63,},
2259 : {0x1E64, 0x1E65,},
2260 : {0x1E66, 0x1E67,},
2261 : {0x1E68, 0x1E69,},
2262 : {0x1E6A, 0x1E6B,},
2263 : {0x1E6C, 0x1E6D,},
2264 : {0x1E6E, 0x1E6F,},
2265 : {0x1E70, 0x1E71,},
2266 : {0x1E72, 0x1E73,},
2267 : {0x1E74, 0x1E75,},
2268 : {0x1E76, 0x1E77,},
2269 : {0x1E78, 0x1E79,},
2270 : {0x1E7A, 0x1E7B,},
2271 : {0x1E7C, 0x1E7D,},
2272 : {0x1E7E, 0x1E7F,},
2273 : {0x1E80, 0x1E81,},
2274 : {0x1E82, 0x1E83,},
2275 : {0x1E84, 0x1E85,},
2276 : {0x1E86, 0x1E87,},
2277 : {0x1E88, 0x1E89,},
2278 : {0x1E8A, 0x1E8B,},
2279 : {0x1E8C, 0x1E8D,},
2280 : {0x1E8E, 0x1E8F,},
2281 : {0x1E90, 0x1E91,},
2282 : {0x1E92, 0x1E93,},
2283 : {0x1E94, 0x1E95,},
2284 : {0x1E9E, 0x00DF,},
2285 : {0x1EA0, 0x1EA1,},
2286 : {0x1EA2, 0x1EA3,},
2287 : {0x1EA4, 0x1EA5,},
2288 : {0x1EA6, 0x1EA7,},
2289 : {0x1EA8, 0x1EA9,},
2290 : {0x1EAA, 0x1EAB,},
2291 : {0x1EAC, 0x1EAD,},
2292 : {0x1EAE, 0x1EAF,},
2293 : {0x1EB0, 0x1EB1,},
2294 : {0x1EB2, 0x1EB3,},
2295 : {0x1EB4, 0x1EB5,},
2296 : {0x1EB6, 0x1EB7,},
2297 : {0x1EB8, 0x1EB9,},
2298 : {0x1EBA, 0x1EBB,},
2299 : {0x1EBC, 0x1EBD,},
2300 : {0x1EBE, 0x1EBF,},
2301 : {0x1EC0, 0x1EC1,},
2302 : {0x1EC2, 0x1EC3,},
2303 : {0x1EC4, 0x1EC5,},
2304 : {0x1EC6, 0x1EC7,},
2305 : {0x1EC8, 0x1EC9,},
2306 : {0x1ECA, 0x1ECB,},
2307 : {0x1ECC, 0x1ECD,},
2308 : {0x1ECE, 0x1ECF,},
2309 : {0x1ED0, 0x1ED1,},
2310 : {0x1ED2, 0x1ED3,},
2311 : {0x1ED4, 0x1ED5,},
2312 : {0x1ED6, 0x1ED7,},
2313 : {0x1ED8, 0x1ED9,},
2314 : {0x1EDA, 0x1EDB,},
2315 : {0x1EDC, 0x1EDD,},
2316 : {0x1EDE, 0x1EDF,},
2317 : {0x1EE0, 0x1EE1,},
2318 : {0x1EE2, 0x1EE3,},
2319 : {0x1EE4, 0x1EE5,},
2320 : {0x1EE6, 0x1EE7,},
2321 : {0x1EE8, 0x1EE9,},
2322 : {0x1EEA, 0x1EEB,},
2323 : {0x1EEC, 0x1EED,},
2324 : {0x1EEE, 0x1EEF,},
2325 : {0x1EF0, 0x1EF1,},
2326 : {0x1EF2, 0x1EF3,},
2327 : {0x1EF4, 0x1EF5,},
2328 : {0x1EF6, 0x1EF7,},
2329 : {0x1EF8, 0x1EF9,},
2330 : {0x1EFA, 0x1EFB,},
2331 : {0x1EFC, 0x1EFD,},
2332 : {0x1EFE, 0x1EFF,},
2333 : {0x1F08, 0x1F00,},
2334 : {0x1F09, 0x1F01,},
2335 : {0x1F0A, 0x1F02,},
2336 : {0x1F0B, 0x1F03,},
2337 : {0x1F0C, 0x1F04,},
2338 : {0x1F0D, 0x1F05,},
2339 : {0x1F0E, 0x1F06,},
2340 : {0x1F0F, 0x1F07,},
2341 : {0x1F18, 0x1F10,},
2342 : {0x1F19, 0x1F11,},
2343 : {0x1F1A, 0x1F12,},
2344 : {0x1F1B, 0x1F13,},
2345 : {0x1F1C, 0x1F14,},
2346 : {0x1F1D, 0x1F15,},
2347 : {0x1F28, 0x1F20,},
2348 : {0x1F29, 0x1F21,},
2349 : {0x1F2A, 0x1F22,},
2350 : {0x1F2B, 0x1F23,},
2351 : {0x1F2C, 0x1F24,},
2352 : {0x1F2D, 0x1F25,},
2353 : {0x1F2E, 0x1F26,},
2354 : {0x1F2F, 0x1F27,},
2355 : {0x1F38, 0x1F30,},
2356 : {0x1F39, 0x1F31,},
2357 : {0x1F3A, 0x1F32,},
2358 : {0x1F3B, 0x1F33,},
2359 : {0x1F3C, 0x1F34,},
2360 : {0x1F3D, 0x1F35,},
2361 : {0x1F3E, 0x1F36,},
2362 : {0x1F3F, 0x1F37,},
2363 : {0x1F48, 0x1F40,},
2364 : {0x1F49, 0x1F41,},
2365 : {0x1F4A, 0x1F42,},
2366 : {0x1F4B, 0x1F43,},
2367 : {0x1F4C, 0x1F44,},
2368 : {0x1F4D, 0x1F45,},
2369 : {0x1F59, 0x1F51,},
2370 : {0x1F5B, 0x1F53,},
2371 : {0x1F5D, 0x1F55,},
2372 : {0x1F5F, 0x1F57,},
2373 : {0x1F68, 0x1F60,},
2374 : {0x1F69, 0x1F61,},
2375 : {0x1F6A, 0x1F62,},
2376 : {0x1F6B, 0x1F63,},
2377 : {0x1F6C, 0x1F64,},
2378 : {0x1F6D, 0x1F65,},
2379 : {0x1F6E, 0x1F66,},
2380 : {0x1F6F, 0x1F67,},
2381 : {0x1F88, 0x1F80,},
2382 : {0x1F89, 0x1F81,},
2383 : {0x1F8A, 0x1F82,},
2384 : {0x1F8B, 0x1F83,},
2385 : {0x1F8C, 0x1F84,},
2386 : {0x1F8D, 0x1F85,},
2387 : {0x1F8E, 0x1F86,},
2388 : {0x1F8F, 0x1F87,},
2389 : {0x1F98, 0x1F90,},
2390 : {0x1F99, 0x1F91,},
2391 : {0x1F9A, 0x1F92,},
2392 : {0x1F9B, 0x1F93,},
2393 : {0x1F9C, 0x1F94,},
2394 : {0x1F9D, 0x1F95,},
2395 : {0x1F9E, 0x1F96,},
2396 : {0x1F9F, 0x1F97,},
2397 : {0x1FA8, 0x1FA0,},
2398 : {0x1FA9, 0x1FA1,},
2399 : {0x1FAA, 0x1FA2,},
2400 : {0x1FAB, 0x1FA3,},
2401 : {0x1FAC, 0x1FA4,},
2402 : {0x1FAD, 0x1FA5,},
2403 : {0x1FAE, 0x1FA6,},
2404 : {0x1FAF, 0x1FA7,},
2405 : {0x1FB8, 0x1FB0,},
2406 : {0x1FB9, 0x1FB1,},
2407 : {0x1FBA, 0x1F70,},
2408 : {0x1FBB, 0x1F71,},
2409 : {0x1FBC, 0x1FB3,},
2410 : {0x1FC8, 0x1F72,},
2411 : {0x1FC9, 0x1F73,},
2412 : {0x1FCA, 0x1F74,},
2413 : {0x1FCB, 0x1F75,},
2414 : {0x1FCC, 0x1FC3,},
2415 : {0x1FD8, 0x1FD0,},
2416 : {0x1FD9, 0x1FD1,},
2417 : {0x1FDA, 0x1F76,},
2418 : {0x1FDB, 0x1F77,},
2419 : {0x1FE8, 0x1FE0,},
2420 : {0x1FE9, 0x1FE1,},
2421 : {0x1FEA, 0x1F7A,},
2422 : {0x1FEB, 0x1F7B,},
2423 : {0x1FEC, 0x1FE5,},
2424 : {0x1FF8, 0x1F78,},
2425 : {0x1FF9, 0x1F79,},
2426 : {0x1FFA, 0x1F7C,},
2427 : {0x1FFB, 0x1F7D,},
2428 : {0x1FFC, 0x1FF3,},
2429 : {0x2126, 0x03C9,},
2430 : {0x212A, 0x006B,},
2431 : {0x212B, 0x00E5,},
2432 : {0x2132, 0x214E,},
2433 : {0x2160, 0x2170,},
2434 : {0x2161, 0x2171,},
2435 : {0x2162, 0x2172,},
2436 : {0x2163, 0x2173,},
2437 : {0x2164, 0x2174,},
2438 : {0x2165, 0x2175,},
2439 : {0x2166, 0x2176,},
2440 : {0x2167, 0x2177,},
2441 : {0x2168, 0x2178,},
2442 : {0x2169, 0x2179,},
2443 : {0x216A, 0x217A,},
2444 : {0x216B, 0x217B,},
2445 : {0x216C, 0x217C,},
2446 : {0x216D, 0x217D,},
2447 : {0x216E, 0x217E,},
2448 : {0x216F, 0x217F,},
2449 : {0x2183, 0x2184,},
2450 : {0x24B6, 0x24D0,},
2451 : {0x24B7, 0x24D1,},
2452 : {0x24B8, 0x24D2,},
2453 : {0x24B9, 0x24D3,},
2454 : {0x24BA, 0x24D4,},
2455 : {0x24BB, 0x24D5,},
2456 : {0x24BC, 0x24D6,},
2457 : {0x24BD, 0x24D7,},
2458 : {0x24BE, 0x24D8,},
2459 : {0x24BF, 0x24D9,},
2460 : {0x24C0, 0x24DA,},
2461 : {0x24C1, 0x24DB,},
2462 : {0x24C2, 0x24DC,},
2463 : {0x24C3, 0x24DD,},
2464 : {0x24C4, 0x24DE,},
2465 : {0x24C5, 0x24DF,},
2466 : {0x24C6, 0x24E0,},
2467 : {0x24C7, 0x24E1,},
2468 : {0x24C8, 0x24E2,},
2469 : {0x24C9, 0x24E3,},
2470 : {0x24CA, 0x24E4,},
2471 : {0x24CB, 0x24E5,},
2472 : {0x24CC, 0x24E6,},
2473 : {0x24CD, 0x24E7,},
2474 : {0x24CE, 0x24E8,},
2475 : {0x24CF, 0x24E9,},
2476 : {0x2C00, 0x2C30,},
2477 : {0x2C01, 0x2C31,},
2478 : {0x2C02, 0x2C32,},
2479 : {0x2C03, 0x2C33,},
2480 : {0x2C04, 0x2C34,},
2481 : {0x2C05, 0x2C35,},
2482 : {0x2C06, 0x2C36,},
2483 : {0x2C07, 0x2C37,},
2484 : {0x2C08, 0x2C38,},
2485 : {0x2C09, 0x2C39,},
2486 : {0x2C0A, 0x2C3A,},
2487 : {0x2C0B, 0x2C3B,},
2488 : {0x2C0C, 0x2C3C,},
2489 : {0x2C0D, 0x2C3D,},
2490 : {0x2C0E, 0x2C3E,},
2491 : {0x2C0F, 0x2C3F,},
2492 : {0x2C10, 0x2C40,},
2493 : {0x2C11, 0x2C41,},
2494 : {0x2C12, 0x2C42,},
2495 : {0x2C13, 0x2C43,},
2496 : {0x2C14, 0x2C44,},
2497 : {0x2C15, 0x2C45,},
2498 : {0x2C16, 0x2C46,},
2499 : {0x2C17, 0x2C47,},
2500 : {0x2C18, 0x2C48,},
2501 : {0x2C19, 0x2C49,},
2502 : {0x2C1A, 0x2C4A,},
2503 : {0x2C1B, 0x2C4B,},
2504 : {0x2C1C, 0x2C4C,},
2505 : {0x2C1D, 0x2C4D,},
2506 : {0x2C1E, 0x2C4E,},
2507 : {0x2C1F, 0x2C4F,},
2508 : {0x2C20, 0x2C50,},
2509 : {0x2C21, 0x2C51,},
2510 : {0x2C22, 0x2C52,},
2511 : {0x2C23, 0x2C53,},
2512 : {0x2C24, 0x2C54,},
2513 : {0x2C25, 0x2C55,},
2514 : {0x2C26, 0x2C56,},
2515 : {0x2C27, 0x2C57,},
2516 : {0x2C28, 0x2C58,},
2517 : {0x2C29, 0x2C59,},
2518 : {0x2C2A, 0x2C5A,},
2519 : {0x2C2B, 0x2C5B,},
2520 : {0x2C2C, 0x2C5C,},
2521 : {0x2C2D, 0x2C5D,},
2522 : {0x2C2E, 0x2C5E,},
2523 : {0x2C2F, 0x2C5F,},
2524 : {0x2C60, 0x2C61,},
2525 : {0x2C62, 0x026B,},
2526 : {0x2C63, 0x1D7D,},
2527 : {0x2C64, 0x027D,},
2528 : {0x2C67, 0x2C68,},
2529 : {0x2C69, 0x2C6A,},
2530 : {0x2C6B, 0x2C6C,},
2531 : {0x2C6D, 0x0251,},
2532 : {0x2C6E, 0x0271,},
2533 : {0x2C6F, 0x0250,},
2534 : {0x2C70, 0x0252,},
2535 : {0x2C72, 0x2C73,},
2536 : {0x2C75, 0x2C76,},
2537 : {0x2C7E, 0x023F,},
2538 : {0x2C7F, 0x0240,},
2539 : {0x2C80, 0x2C81,},
2540 : {0x2C82, 0x2C83,},
2541 : {0x2C84, 0x2C85,},
2542 : {0x2C86, 0x2C87,},
2543 : {0x2C88, 0x2C89,},
2544 : {0x2C8A, 0x2C8B,},
2545 : {0x2C8C, 0x2C8D,},
2546 : {0x2C8E, 0x2C8F,},
2547 : {0x2C90, 0x2C91,},
2548 : {0x2C92, 0x2C93,},
2549 : {0x2C94, 0x2C95,},
2550 : {0x2C96, 0x2C97,},
2551 : {0x2C98, 0x2C99,},
2552 : {0x2C9A, 0x2C9B,},
2553 : {0x2C9C, 0x2C9D,},
2554 : {0x2C9E, 0x2C9F,},
2555 : {0x2CA0, 0x2CA1,},
2556 : {0x2CA2, 0x2CA3,},
2557 : {0x2CA4, 0x2CA5,},
2558 : {0x2CA6, 0x2CA7,},
2559 : {0x2CA8, 0x2CA9,},
2560 : {0x2CAA, 0x2CAB,},
2561 : {0x2CAC, 0x2CAD,},
2562 : {0x2CAE, 0x2CAF,},
2563 : {0x2CB0, 0x2CB1,},
2564 : {0x2CB2, 0x2CB3,},
2565 : {0x2CB4, 0x2CB5,},
2566 : {0x2CB6, 0x2CB7,},
2567 : {0x2CB8, 0x2CB9,},
2568 : {0x2CBA, 0x2CBB,},
2569 : {0x2CBC, 0x2CBD,},
2570 : {0x2CBE, 0x2CBF,},
2571 : {0x2CC0, 0x2CC1,},
2572 : {0x2CC2, 0x2CC3,},
2573 : {0x2CC4, 0x2CC5,},
2574 : {0x2CC6, 0x2CC7,},
2575 : {0x2CC8, 0x2CC9,},
2576 : {0x2CCA, 0x2CCB,},
2577 : {0x2CCC, 0x2CCD,},
2578 : {0x2CCE, 0x2CCF,},
2579 : {0x2CD0, 0x2CD1,},
2580 : {0x2CD2, 0x2CD3,},
2581 : {0x2CD4, 0x2CD5,},
2582 : {0x2CD6, 0x2CD7,},
2583 : {0x2CD8, 0x2CD9,},
2584 : {0x2CDA, 0x2CDB,},
2585 : {0x2CDC, 0x2CDD,},
2586 : {0x2CDE, 0x2CDF,},
2587 : {0x2CE0, 0x2CE1,},
2588 : {0x2CE2, 0x2CE3,},
2589 : {0x2CEB, 0x2CEC,},
2590 : {0x2CED, 0x2CEE,},
2591 : {0x2CF2, 0x2CF3,},
2592 : {0xA640, 0xA641,},
2593 : {0xA642, 0xA643,},
2594 : {0xA644, 0xA645,},
2595 : {0xA646, 0xA647,},
2596 : {0xA648, 0xA649,},
2597 : {0xA64A, 0xA64B,},
2598 : {0xA64C, 0xA64D,},
2599 : {0xA64E, 0xA64F,},
2600 : {0xA650, 0xA651,},
2601 : {0xA652, 0xA653,},
2602 : {0xA654, 0xA655,},
2603 : {0xA656, 0xA657,},
2604 : {0xA658, 0xA659,},
2605 : {0xA65A, 0xA65B,},
2606 : {0xA65C, 0xA65D,},
2607 : {0xA65E, 0xA65F,},
2608 : {0xA660, 0xA661,},
2609 : {0xA662, 0xA663,},
2610 : {0xA664, 0xA665,},
2611 : {0xA666, 0xA667,},
2612 : {0xA668, 0xA669,},
2613 : {0xA66A, 0xA66B,},
2614 : {0xA66C, 0xA66D,},
2615 : {0xA680, 0xA681,},
2616 : {0xA682, 0xA683,},
2617 : {0xA684, 0xA685,},
2618 : {0xA686, 0xA687,},
2619 : {0xA688, 0xA689,},
2620 : {0xA68A, 0xA68B,},
2621 : {0xA68C, 0xA68D,},
2622 : {0xA68E, 0xA68F,},
2623 : {0xA690, 0xA691,},
2624 : {0xA692, 0xA693,},
2625 : {0xA694, 0xA695,},
2626 : {0xA696, 0xA697,},
2627 : {0xA698, 0xA699,},
2628 : {0xA69A, 0xA69B,},
2629 : {0xA722, 0xA723,},
2630 : {0xA724, 0xA725,},
2631 : {0xA726, 0xA727,},
2632 : {0xA728, 0xA729,},
2633 : {0xA72A, 0xA72B,},
2634 : {0xA72C, 0xA72D,},
2635 : {0xA72E, 0xA72F,},
2636 : {0xA732, 0xA733,},
2637 : {0xA734, 0xA735,},
2638 : {0xA736, 0xA737,},
2639 : {0xA738, 0xA739,},
2640 : {0xA73A, 0xA73B,},
2641 : {0xA73C, 0xA73D,},
2642 : {0xA73E, 0xA73F,},
2643 : {0xA740, 0xA741,},
2644 : {0xA742, 0xA743,},
2645 : {0xA744, 0xA745,},
2646 : {0xA746, 0xA747,},
2647 : {0xA748, 0xA749,},
2648 : {0xA74A, 0xA74B,},
2649 : {0xA74C, 0xA74D,},
2650 : {0xA74E, 0xA74F,},
2651 : {0xA750, 0xA751,},
2652 : {0xA752, 0xA753,},
2653 : {0xA754, 0xA755,},
2654 : {0xA756, 0xA757,},
2655 : {0xA758, 0xA759,},
2656 : {0xA75A, 0xA75B,},
2657 : {0xA75C, 0xA75D,},
2658 : {0xA75E, 0xA75F,},
2659 : {0xA760, 0xA761,},
2660 : {0xA762, 0xA763,},
2661 : {0xA764, 0xA765,},
2662 : {0xA766, 0xA767,},
2663 : {0xA768, 0xA769,},
2664 : {0xA76A, 0xA76B,},
2665 : {0xA76C, 0xA76D,},
2666 : {0xA76E, 0xA76F,},
2667 : {0xA779, 0xA77A,},
2668 : {0xA77B, 0xA77C,},
2669 : {0xA77D, 0x1D79,},
2670 : {0xA77E, 0xA77F,},
2671 : {0xA780, 0xA781,},
2672 : {0xA782, 0xA783,},
2673 : {0xA784, 0xA785,},
2674 : {0xA786, 0xA787,},
2675 : {0xA78B, 0xA78C,},
2676 : {0xA78D, 0x0265,},
2677 : {0xA790, 0xA791,},
2678 : {0xA792, 0xA793,},
2679 : {0xA796, 0xA797,},
2680 : {0xA798, 0xA799,},
2681 : {0xA79A, 0xA79B,},
2682 : {0xA79C, 0xA79D,},
2683 : {0xA79E, 0xA79F,},
2684 : {0xA7A0, 0xA7A1,},
2685 : {0xA7A2, 0xA7A3,},
2686 : {0xA7A4, 0xA7A5,},
2687 : {0xA7A6, 0xA7A7,},
2688 : {0xA7A8, 0xA7A9,},
2689 : {0xA7AA, 0x0266,},
2690 : {0xA7AB, 0x025C,},
2691 : {0xA7AC, 0x0261,},
2692 : {0xA7AD, 0x026C,},
2693 : {0xA7AE, 0x026A,},
2694 : {0xA7B0, 0x029E,},
2695 : {0xA7B1, 0x0287,},
2696 : {0xA7B2, 0x029D,},
2697 : {0xA7B3, 0xAB53,},
2698 : {0xA7B4, 0xA7B5,},
2699 : {0xA7B6, 0xA7B7,},
2700 : {0xA7B8, 0xA7B9,},
2701 : {0xA7BA, 0xA7BB,},
2702 : {0xA7BC, 0xA7BD,},
2703 : {0xA7BE, 0xA7BF,},
2704 : {0xA7C0, 0xA7C1,},
2705 : {0xA7C2, 0xA7C3,},
2706 : {0xA7C4, 0xA794,},
2707 : {0xA7C5, 0x0282,},
2708 : {0xA7C6, 0x1D8E,},
2709 : {0xA7C7, 0xA7C8,},
2710 : {0xA7C9, 0xA7CA,},
2711 : {0xA7D0, 0xA7D1,},
2712 : {0xA7D6, 0xA7D7,},
2713 : {0xA7D8, 0xA7D9,},
2714 : {0xA7F5, 0xA7F6,},
2715 : {0xFF21, 0xFF41,},
2716 : {0xFF22, 0xFF42,},
2717 : {0xFF23, 0xFF43,},
2718 : {0xFF24, 0xFF44,},
2719 : {0xFF25, 0xFF45,},
2720 : {0xFF26, 0xFF46,},
2721 : {0xFF27, 0xFF47,},
2722 : {0xFF28, 0xFF48,},
2723 : {0xFF29, 0xFF49,},
2724 : {0xFF2A, 0xFF4A,},
2725 : {0xFF2B, 0xFF4B,},
2726 : {0xFF2C, 0xFF4C,},
2727 : {0xFF2D, 0xFF4D,},
2728 : {0xFF2E, 0xFF4E,},
2729 : {0xFF2F, 0xFF4F,},
2730 : {0xFF30, 0xFF50,},
2731 : {0xFF31, 0xFF51,},
2732 : {0xFF32, 0xFF52,},
2733 : {0xFF33, 0xFF53,},
2734 : {0xFF34, 0xFF54,},
2735 : {0xFF35, 0xFF55,},
2736 : {0xFF36, 0xFF56,},
2737 : {0xFF37, 0xFF57,},
2738 : {0xFF38, 0xFF58,},
2739 : {0xFF39, 0xFF59,},
2740 : {0xFF3A, 0xFF5A,},
2741 : {0x10400, 0x10428,},
2742 : {0x10401, 0x10429,},
2743 : {0x10402, 0x1042A,},
2744 : {0x10403, 0x1042B,},
2745 : {0x10404, 0x1042C,},
2746 : {0x10405, 0x1042D,},
2747 : {0x10406, 0x1042E,},
2748 : {0x10407, 0x1042F,},
2749 : {0x10408, 0x10430,},
2750 : {0x10409, 0x10431,},
2751 : {0x1040A, 0x10432,},
2752 : {0x1040B, 0x10433,},
2753 : {0x1040C, 0x10434,},
2754 : {0x1040D, 0x10435,},
2755 : {0x1040E, 0x10436,},
2756 : {0x1040F, 0x10437,},
2757 : {0x10410, 0x10438,},
2758 : {0x10411, 0x10439,},
2759 : {0x10412, 0x1043A,},
2760 : {0x10413, 0x1043B,},
2761 : {0x10414, 0x1043C,},
2762 : {0x10415, 0x1043D,},
2763 : {0x10416, 0x1043E,},
2764 : {0x10417, 0x1043F,},
2765 : {0x10418, 0x10440,},
2766 : {0x10419, 0x10441,},
2767 : {0x1041A, 0x10442,},
2768 : {0x1041B, 0x10443,},
2769 : {0x1041C, 0x10444,},
2770 : {0x1041D, 0x10445,},
2771 : {0x1041E, 0x10446,},
2772 : {0x1041F, 0x10447,},
2773 : {0x10420, 0x10448,},
2774 : {0x10421, 0x10449,},
2775 : {0x10422, 0x1044A,},
2776 : {0x10423, 0x1044B,},
2777 : {0x10424, 0x1044C,},
2778 : {0x10425, 0x1044D,},
2779 : {0x10426, 0x1044E,},
2780 : {0x10427, 0x1044F,},
2781 : {0x104B0, 0x104D8,},
2782 : {0x104B1, 0x104D9,},
2783 : {0x104B2, 0x104DA,},
2784 : {0x104B3, 0x104DB,},
2785 : {0x104B4, 0x104DC,},
2786 : {0x104B5, 0x104DD,},
2787 : {0x104B6, 0x104DE,},
2788 : {0x104B7, 0x104DF,},
2789 : {0x104B8, 0x104E0,},
2790 : {0x104B9, 0x104E1,},
2791 : {0x104BA, 0x104E2,},
2792 : {0x104BB, 0x104E3,},
2793 : {0x104BC, 0x104E4,},
2794 : {0x104BD, 0x104E5,},
2795 : {0x104BE, 0x104E6,},
2796 : {0x104BF, 0x104E7,},
2797 : {0x104C0, 0x104E8,},
2798 : {0x104C1, 0x104E9,},
2799 : {0x104C2, 0x104EA,},
2800 : {0x104C3, 0x104EB,},
2801 : {0x104C4, 0x104EC,},
2802 : {0x104C5, 0x104ED,},
2803 : {0x104C6, 0x104EE,},
2804 : {0x104C7, 0x104EF,},
2805 : {0x104C8, 0x104F0,},
2806 : {0x104C9, 0x104F1,},
2807 : {0x104CA, 0x104F2,},
2808 : {0x104CB, 0x104F3,},
2809 : {0x104CC, 0x104F4,},
2810 : {0x104CD, 0x104F5,},
2811 : {0x104CE, 0x104F6,},
2812 : {0x104CF, 0x104F7,},
2813 : {0x104D0, 0x104F8,},
2814 : {0x104D1, 0x104F9,},
2815 : {0x104D2, 0x104FA,},
2816 : {0x104D3, 0x104FB,},
2817 : {0x10570, 0x10597,},
2818 : {0x10571, 0x10598,},
2819 : {0x10572, 0x10599,},
2820 : {0x10573, 0x1059A,},
2821 : {0x10574, 0x1059B,},
2822 : {0x10575, 0x1059C,},
2823 : {0x10576, 0x1059D,},
2824 : {0x10577, 0x1059E,},
2825 : {0x10578, 0x1059F,},
2826 : {0x10579, 0x105A0,},
2827 : {0x1057A, 0x105A1,},
2828 : {0x1057C, 0x105A3,},
2829 : {0x1057D, 0x105A4,},
2830 : {0x1057E, 0x105A5,},
2831 : {0x1057F, 0x105A6,},
2832 : {0x10580, 0x105A7,},
2833 : {0x10581, 0x105A8,},
2834 : {0x10582, 0x105A9,},
2835 : {0x10583, 0x105AA,},
2836 : {0x10584, 0x105AB,},
2837 : {0x10585, 0x105AC,},
2838 : {0x10586, 0x105AD,},
2839 : {0x10587, 0x105AE,},
2840 : {0x10588, 0x105AF,},
2841 : {0x10589, 0x105B0,},
2842 : {0x1058A, 0x105B1,},
2843 : {0x1058C, 0x105B3,},
2844 : {0x1058D, 0x105B4,},
2845 : {0x1058E, 0x105B5,},
2846 : {0x1058F, 0x105B6,},
2847 : {0x10590, 0x105B7,},
2848 : {0x10591, 0x105B8,},
2849 : {0x10592, 0x105B9,},
2850 : {0x10594, 0x105BB,},
2851 : {0x10595, 0x105BC,},
2852 : {0x10C80, 0x10CC0,},
2853 : {0x10C81, 0x10CC1,},
2854 : {0x10C82, 0x10CC2,},
2855 : {0x10C83, 0x10CC3,},
2856 : {0x10C84, 0x10CC4,},
2857 : {0x10C85, 0x10CC5,},
2858 : {0x10C86, 0x10CC6,},
2859 : {0x10C87, 0x10CC7,},
2860 : {0x10C88, 0x10CC8,},
2861 : {0x10C89, 0x10CC9,},
2862 : {0x10C8A, 0x10CCA,},
2863 : {0x10C8B, 0x10CCB,},
2864 : {0x10C8C, 0x10CCC,},
2865 : {0x10C8D, 0x10CCD,},
2866 : {0x10C8E, 0x10CCE,},
2867 : {0x10C8F, 0x10CCF,},
2868 : {0x10C90, 0x10CD0,},
2869 : {0x10C91, 0x10CD1,},
2870 : {0x10C92, 0x10CD2,},
2871 : {0x10C93, 0x10CD3,},
2872 : {0x10C94, 0x10CD4,},
2873 : {0x10C95, 0x10CD5,},
2874 : {0x10C96, 0x10CD6,},
2875 : {0x10C97, 0x10CD7,},
2876 : {0x10C98, 0x10CD8,},
2877 : {0x10C99, 0x10CD9,},
2878 : {0x10C9A, 0x10CDA,},
2879 : {0x10C9B, 0x10CDB,},
2880 : {0x10C9C, 0x10CDC,},
2881 : {0x10C9D, 0x10CDD,},
2882 : {0x10C9E, 0x10CDE,},
2883 : {0x10C9F, 0x10CDF,},
2884 : {0x10CA0, 0x10CE0,},
2885 : {0x10CA1, 0x10CE1,},
2886 : {0x10CA2, 0x10CE2,},
2887 : {0x10CA3, 0x10CE3,},
2888 : {0x10CA4, 0x10CE4,},
2889 : {0x10CA5, 0x10CE5,},
2890 : {0x10CA6, 0x10CE6,},
2891 : {0x10CA7, 0x10CE7,},
2892 : {0x10CA8, 0x10CE8,},
2893 : {0x10CA9, 0x10CE9,},
2894 : {0x10CAA, 0x10CEA,},
2895 : {0x10CAB, 0x10CEB,},
2896 : {0x10CAC, 0x10CEC,},
2897 : {0x10CAD, 0x10CED,},
2898 : {0x10CAE, 0x10CEE,},
2899 : {0x10CAF, 0x10CEF,},
2900 : {0x10CB0, 0x10CF0,},
2901 : {0x10CB1, 0x10CF1,},
2902 : {0x10CB2, 0x10CF2,},
2903 : {0x118A0, 0x118C0,},
2904 : {0x118A1, 0x118C1,},
2905 : {0x118A2, 0x118C2,},
2906 : {0x118A3, 0x118C3,},
2907 : {0x118A4, 0x118C4,},
2908 : {0x118A5, 0x118C5,},
2909 : {0x118A6, 0x118C6,},
2910 : {0x118A7, 0x118C7,},
2911 : {0x118A8, 0x118C8,},
2912 : {0x118A9, 0x118C9,},
2913 : {0x118AA, 0x118CA,},
2914 : {0x118AB, 0x118CB,},
2915 : {0x118AC, 0x118CC,},
2916 : {0x118AD, 0x118CD,},
2917 : {0x118AE, 0x118CE,},
2918 : {0x118AF, 0x118CF,},
2919 : {0x118B0, 0x118D0,},
2920 : {0x118B1, 0x118D1,},
2921 : {0x118B2, 0x118D2,},
2922 : {0x118B3, 0x118D3,},
2923 : {0x118B4, 0x118D4,},
2924 : {0x118B5, 0x118D5,},
2925 : {0x118B6, 0x118D6,},
2926 : {0x118B7, 0x118D7,},
2927 : {0x118B8, 0x118D8,},
2928 : {0x118B9, 0x118D9,},
2929 : {0x118BA, 0x118DA,},
2930 : {0x118BB, 0x118DB,},
2931 : {0x118BC, 0x118DC,},
2932 : {0x118BD, 0x118DD,},
2933 : {0x118BE, 0x118DE,},
2934 : {0x118BF, 0x118DF,},
2935 : {0x16E40, 0x16E60,},
2936 : {0x16E41, 0x16E61,},
2937 : {0x16E42, 0x16E62,},
2938 : {0x16E43, 0x16E63,},
2939 : {0x16E44, 0x16E64,},
2940 : {0x16E45, 0x16E65,},
2941 : {0x16E46, 0x16E66,},
2942 : {0x16E47, 0x16E67,},
2943 : {0x16E48, 0x16E68,},
2944 : {0x16E49, 0x16E69,},
2945 : {0x16E4A, 0x16E6A,},
2946 : {0x16E4B, 0x16E6B,},
2947 : {0x16E4C, 0x16E6C,},
2948 : {0x16E4D, 0x16E6D,},
2949 : {0x16E4E, 0x16E6E,},
2950 : {0x16E4F, 0x16E6F,},
2951 : {0x16E50, 0x16E70,},
2952 : {0x16E51, 0x16E71,},
2953 : {0x16E52, 0x16E72,},
2954 : {0x16E53, 0x16E73,},
2955 : {0x16E54, 0x16E74,},
2956 : {0x16E55, 0x16E75,},
2957 : {0x16E56, 0x16E76,},
2958 : {0x16E57, 0x16E77,},
2959 : {0x16E58, 0x16E78,},
2960 : {0x16E59, 0x16E79,},
2961 : {0x16E5A, 0x16E7A,},
2962 : {0x16E5B, 0x16E7B,},
2963 : {0x16E5C, 0x16E7C,},
2964 : {0x16E5D, 0x16E7D,},
2965 : {0x16E5E, 0x16E7E,},
2966 : {0x16E5F, 0x16E7F,},
2967 : {0x1E900, 0x1E922,},
2968 : {0x1E901, 0x1E923,},
2969 : {0x1E902, 0x1E924,},
2970 : {0x1E903, 0x1E925,},
2971 : {0x1E904, 0x1E926,},
2972 : {0x1E905, 0x1E927,},
2973 : {0x1E906, 0x1E928,},
2974 : {0x1E907, 0x1E929,},
2975 : {0x1E908, 0x1E92A,},
2976 : {0x1E909, 0x1E92B,},
2977 : {0x1E90A, 0x1E92C,},
2978 : {0x1E90B, 0x1E92D,},
2979 : {0x1E90C, 0x1E92E,},
2980 : {0x1E90D, 0x1E92F,},
2981 : {0x1E90E, 0x1E930,},
2982 : {0x1E90F, 0x1E931,},
2983 : {0x1E910, 0x1E932,},
2984 : {0x1E911, 0x1E933,},
2985 : {0x1E912, 0x1E934,},
2986 : {0x1E913, 0x1E935,},
2987 : {0x1E914, 0x1E936,},
2988 : {0x1E915, 0x1E937,},
2989 : {0x1E916, 0x1E938,},
2990 : {0x1E917, 0x1E939,},
2991 : {0x1E918, 0x1E93A,},
2992 : {0x1E919, 0x1E93B,},
2993 : {0x1E91A, 0x1E93C,},
2994 : {0x1E91B, 0x1E93D,},
2995 : {0x1E91C, 0x1E93E,},
2996 : {0x1E91D, 0x1E93F,},
2997 : {0x1E91E, 0x1E940,},
2998 : {0x1E91F, 0x1E941,},
2999 : {0x1E920, 0x1E942,},
3000 : {0x1E921, 0x1E943,},
3001 : };
3002 :
3003 : static BAT *UTF8_toUpperFrom = NULL, *UTF8_toUpperTo = NULL,
3004 : *UTF8_toLowerFrom = NULL, *UTF8_toLowerTo = NULL;
3005 :
3006 : static str
3007 341 : STRprelude(void)
3008 : {
3009 341 : if (UTF8_toUpperFrom == NULL) {
3010 341 : size_t i;
3011 :
3012 341 : UTF8_toUpperFrom = COLnew(0, TYPE_int,
3013 : sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]),
3014 : SYSTRANS);
3015 341 : UTF8_toUpperTo = COLnew(0, TYPE_int,
3016 : sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]),
3017 : SYSTRANS);
3018 341 : UTF8_toLowerFrom = COLnew(0, TYPE_int,
3019 : sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]),
3020 : SYSTRANS);
3021 341 : UTF8_toLowerTo = COLnew(0, TYPE_int,
3022 : sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]),
3023 : SYSTRANS);
3024 341 : if (UTF8_toUpperFrom == NULL || UTF8_toUpperTo == NULL
3025 341 : || UTF8_toLowerFrom == NULL || UTF8_toLowerTo == NULL) {
3026 0 : goto bailout;
3027 : }
3028 :
3029 341 : int *fp = (int *) Tloc(UTF8_toUpperFrom, 0);
3030 341 : int *tp = (int *) Tloc(UTF8_toUpperTo, 0);
3031 494791 : for (i = 0; i < sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]); i++) {
3032 494450 : fp[i] = UTF8_toUpper[i].from;
3033 494450 : tp[i] = UTF8_toUpper[i].to;
3034 : }
3035 341 : BATsetcount(UTF8_toUpperFrom, i);
3036 341 : UTF8_toUpperFrom->tkey = true;
3037 341 : UTF8_toUpperFrom->tsorted = true;
3038 341 : UTF8_toUpperFrom->trevsorted = false;
3039 341 : UTF8_toUpperFrom->tnil = false;
3040 341 : UTF8_toUpperFrom->tnonil = true;
3041 341 : BATsetcount(UTF8_toUpperTo, i);
3042 341 : UTF8_toUpperTo->tkey = false;
3043 341 : UTF8_toUpperTo->tsorted = false;
3044 341 : UTF8_toUpperTo->trevsorted = false;
3045 341 : UTF8_toUpperTo->tnil = false;
3046 341 : UTF8_toUpperTo->tnonil = true;
3047 :
3048 341 : fp = (int *) Tloc(UTF8_toLowerFrom, 0);
3049 341 : tp = (int *) Tloc(UTF8_toLowerTo, 0);
3050 488994 : for (i = 0; i < sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]); i++) {
3051 488653 : fp[i] = UTF8_toLower[i].from;
3052 488653 : tp[i] = UTF8_toLower[i].to;
3053 : }
3054 341 : BATsetcount(UTF8_toLowerFrom, i);
3055 341 : UTF8_toLowerFrom->tkey = true;
3056 341 : UTF8_toLowerFrom->tsorted = true;
3057 341 : UTF8_toLowerFrom->trevsorted = false;
3058 341 : UTF8_toLowerFrom->tnil = false;
3059 341 : UTF8_toLowerFrom->tnonil = true;
3060 341 : BATsetcount(UTF8_toLowerTo, i);
3061 341 : UTF8_toLowerTo->tkey = false;
3062 341 : UTF8_toLowerTo->tsorted = false;
3063 341 : UTF8_toLowerTo->trevsorted = false;
3064 341 : UTF8_toLowerTo->tnil = false;
3065 341 : UTF8_toLowerTo->tnonil = true;
3066 :
3067 682 : if (BBPrename(UTF8_toUpperFrom, "monet_unicode_upper_from") != 0 ||
3068 682 : BBPrename(UTF8_toUpperTo, "monet_unicode_upper_to") != 0 ||
3069 682 : BBPrename(UTF8_toLowerFrom, "monet_unicode_lower_from") != 0 ||
3070 341 : BBPrename(UTF8_toLowerTo, "monet_unicode_lower_to") != 0) {
3071 0 : goto bailout;
3072 : }
3073 341 : BBP_pid(UTF8_toUpperFrom->batCacheid) = 0;
3074 341 : BBP_pid(UTF8_toUpperTo->batCacheid) = 0;
3075 341 : BBP_pid(UTF8_toLowerFrom->batCacheid) = 0;
3076 341 : BBP_pid(UTF8_toLowerTo->batCacheid) = 0;
3077 : }
3078 : return MAL_SUCCEED;
3079 :
3080 0 : bailout:
3081 0 : BBPreclaim(UTF8_toUpperFrom);
3082 0 : BBPreclaim(UTF8_toUpperTo);
3083 0 : BBPreclaim(UTF8_toLowerFrom);
3084 0 : BBPreclaim(UTF8_toLowerTo);
3085 0 : UTF8_toUpperFrom = NULL;
3086 0 : UTF8_toUpperTo = NULL;
3087 0 : UTF8_toLowerFrom = NULL;
3088 0 : UTF8_toLowerTo = NULL;
3089 0 : throw(MAL, "str.prelude", GDK_EXCEPTION);
3090 : }
3091 :
3092 : static str
3093 339 : STRepilogue(void *ret)
3094 : {
3095 339 : (void) ret;
3096 339 : BBPreclaim(UTF8_toUpperFrom);
3097 339 : BBPreclaim(UTF8_toUpperTo);
3098 339 : BBPreclaim(UTF8_toLowerFrom);
3099 339 : BBPreclaim(UTF8_toLowerTo);
3100 339 : UTF8_toUpperFrom = NULL;
3101 339 : UTF8_toUpperTo = NULL;
3102 339 : UTF8_toLowerFrom = NULL;
3103 339 : UTF8_toLowerTo = NULL;
3104 339 : return MAL_SUCCEED;
3105 : }
3106 :
3107 : #ifndef NDEBUG
3108 : static inline void
3109 38475162 : UTF8_assert(const char *s)
3110 : {
3111 76950324 : assert(strNil(s) || utf8valid(s) == 0);
3112 38475162 : }
3113 : #else
3114 : #define UTF8_assert(s) ((void) 0)
3115 : #endif
3116 :
3117 : /* return how many codepoints in the substring end in s starts */
3118 : static inline int
3119 725 : UTF8_strpos(const char *s, const char *end)
3120 : {
3121 725 : UTF8_assert(s);
3122 :
3123 725 : if (s > end) {
3124 : return -1;
3125 : }
3126 725 : return (int) utf8nlen(s, (size_t) (end - s));
3127 : }
3128 :
3129 : /* return a pointer to the byte that starts the pos'th (0-based)
3130 : * codepoint in s */
3131 : static inline str
3132 5452897 : UTF8_strtail(const char *s, int pos)
3133 : {
3134 5452897 : UTF8_assert(s);
3135 88013908 : while (*s) {
3136 88462786 : if ((*s & 0xC0) != 0x80) {
3137 88462716 : if (pos <= 0)
3138 : break;
3139 82560941 : pos--;
3140 : }
3141 82561011 : s++;
3142 : }
3143 6265550 : return (str) s;
3144 : }
3145 :
3146 : /* copy n Unicode codepoints from s to dst, return pointer to new end */
3147 : static inline str
3148 216 : UTF8_strncpy(char *restrict dst, const char *restrict s, int n)
3149 : {
3150 216 : UTF8_assert(s);
3151 1451 : while (*s && n) {
3152 1235 : if ((*s & 0xF8) == 0xF0) {
3153 : /* 4 byte UTF-8 sequence */
3154 0 : *dst++ = *s++;
3155 0 : *dst++ = *s++;
3156 0 : *dst++ = *s++;
3157 0 : *dst++ = *s++;
3158 1235 : } else if ((*s & 0xF0) == 0xE0) {
3159 : /* 3 byte UTF-8 sequence */
3160 6 : *dst++ = *s++;
3161 6 : *dst++ = *s++;
3162 6 : *dst++ = *s++;
3163 1229 : } else if ((*s & 0xE0) == 0xC0) {
3164 : /* 2 byte UTF-8 sequence */
3165 0 : *dst++ = *s++;
3166 0 : *dst++ = *s++;
3167 : } else {
3168 : /* 1 byte UTF-8 "sequence" */
3169 1229 : *dst++ = *s++;
3170 : }
3171 1235 : n--;
3172 : }
3173 216 : *dst = '\0';
3174 216 : return dst;
3175 : }
3176 :
3177 : /* return number of Unicode codepoints in s; s is not nil */
3178 : int
3179 32753862 : UTF8_strlen(const char *s)
3180 : { /* This function assumes s is never nil */
3181 32753862 : UTF8_assert(s);
3182 65352010 : assert(!strNil(s));
3183 :
3184 32676005 : return (int) utf8len(s);
3185 : }
3186 :
3187 : /* return (int) strlen(s); s is not nil */
3188 : int
3189 9483 : str_strlen(const char *s)
3190 : { /* This function assumes s is never nil */
3191 9483 : UTF8_assert(s);
3192 19622 : assert(!strNil(s));
3193 :
3194 9811 : return (int) strlen(s);
3195 : }
3196 :
3197 : /* return the display width of s */
3198 : int
3199 6304855 : UTF8_strwidth(const char *s)
3200 : {
3201 6304855 : int len = 0;
3202 6304855 : int c;
3203 6304855 : int n;
3204 :
3205 6304855 : if (strNil(s))
3206 283294 : return int_nil;
3207 : c = 0;
3208 : n = 0;
3209 160232373 : while (*s != 0) {
3210 154210812 : if ((*s & 0x80) == 0) {
3211 154196860 : assert(n == 0);
3212 154196860 : len++;
3213 154196860 : n = 0;
3214 13952 : } else if ((*s & 0xC0) == 0x80) {
3215 9180 : c = (c << 6) | (*s & 0x3F);
3216 9180 : if (--n == 0) {
3217 : /* last byte of a multi-byte character */
3218 : #ifdef HAVE_WCWIDTH
3219 4772 : n = wcwidth(c);
3220 4772 : if (n >= 0)
3221 4771 : len += n;
3222 : else
3223 1 : len++; /* assume width 1 if unprintable */
3224 : n = 0;
3225 : #else
3226 : len++;
3227 : /* this list was created by combining
3228 : * the code points marked as
3229 : * Emoji_Presentation in
3230 : * /usr/share/unicode/emoji/emoji-data.txt
3231 : * and code points marked either F or
3232 : * W in EastAsianWidth.txt; this list
3233 : * is up-to-date with Unicode 9.0 */
3234 : if ((0x1100 <= c && c <= 0x115F) ||
3235 : (0x231A <= c && c <= 0x231B) ||
3236 : (0x2329 <= c && c <= 0x232A) ||
3237 : (0x23E9 <= c && c <= 0x23EC) ||
3238 : c == 0x23F0 ||
3239 : c == 0x23F3 ||
3240 : (0x25FD <= c && c <= 0x25FE) ||
3241 : (0x2614 <= c && c <= 0x2615) ||
3242 : (0x2648 <= c && c <= 0x2653) ||
3243 : c == 0x267F ||
3244 : c == 0x2693 ||
3245 : c == 0x26A1 ||
3246 : (0x26AA <= c && c <= 0x26AB) ||
3247 : (0x26BD <= c && c <= 0x26BE) ||
3248 : (0x26C4 <= c && c <= 0x26C5) ||
3249 : c == 0x26CE ||
3250 : c == 0x26D4 ||
3251 : c == 0x26EA ||
3252 : (0x26F2 <= c && c <= 0x26F3) ||
3253 : c == 0x26F5 ||
3254 : c == 0x26FA ||
3255 : c == 0x26FD ||
3256 : c == 0x2705 ||
3257 : (0x270A <= c && c <= 0x270B) ||
3258 : c == 0x2728 ||
3259 : c == 0x274C ||
3260 : c == 0x274E ||
3261 : (0x2753 <= c && c <= 0x2755) ||
3262 : c == 0x2757 ||
3263 : (0x2795 <= c && c <= 0x2797) ||
3264 : c == 0x27B0 ||
3265 : c == 0x27BF ||
3266 : (0x2B1B <= c && c <= 0x2B1C) ||
3267 : c == 0x2B50 ||
3268 : c == 0x2B55 ||
3269 : (0x2E80 <= c && c <= 0x2E99) ||
3270 : (0x2E9B <= c && c <= 0x2EF3) ||
3271 : (0x2F00 <= c && c <= 0x2FD5) ||
3272 : (0x2FF0 <= c && c <= 0x2FFB) ||
3273 : (0x3000 <= c && c <= 0x303E) ||
3274 : (0x3041 <= c && c <= 0x3096) ||
3275 : (0x3099 <= c && c <= 0x30FF) ||
3276 : (0x3105 <= c && c <= 0x312D) ||
3277 : (0x3131 <= c && c <= 0x318E) ||
3278 : (0x3190 <= c && c <= 0x31BA) ||
3279 : (0x31C0 <= c && c <= 0x31E3) ||
3280 : (0x31F0 <= c && c <= 0x321E) ||
3281 : (0x3220 <= c && c <= 0x3247) ||
3282 : (0x3250 <= c && c <= 0x32FE) ||
3283 : (0x3300 <= c && c <= 0x4DBF) ||
3284 : (0x4E00 <= c && c <= 0xA48C) ||
3285 : (0xA490 <= c && c <= 0xA4C6) ||
3286 : (0xA960 <= c && c <= 0xA97C) ||
3287 : (0xAC00 <= c && c <= 0xD7A3) ||
3288 : (0xF900 <= c && c <= 0xFAFF) ||
3289 : (0xFE10 <= c && c <= 0xFE19) ||
3290 : (0xFE30 <= c && c <= 0xFE52) ||
3291 : (0xFE54 <= c && c <= 0xFE66) ||
3292 : (0xFE68 <= c && c <= 0xFE6B) ||
3293 : (0xFF01 <= c && c <= 0xFF60) ||
3294 : (0xFFE0 <= c && c <= 0xFFE6) ||
3295 : c == 0x16FE0 ||
3296 : (0x17000 <= c && c <= 0x187EC) ||
3297 : (0x18800 <= c && c <= 0x18AF2) ||
3298 : (0x1B000 <= c && c <= 0x1B001) ||
3299 : c == 0x1F004 ||
3300 : c == 0x1F0CF ||
3301 : c == 0x1F18E || (0x1F191 <= c && c <= 0x1F19A) ||
3302 : /* removed 0x1F1E6..0x1F1FF */
3303 : (0x1F200 <= c && c <= 0x1F202) ||
3304 : (0x1F210 <= c && c <= 0x1F23B) ||
3305 : (0x1F240 <= c && c <= 0x1F248) ||
3306 : (0x1F250 <= c && c <= 0x1F251) ||
3307 : (0x1F300 <= c && c <= 0x1F320) ||
3308 : (0x1F32D <= c && c <= 0x1F335) ||
3309 : (0x1F337 <= c && c <= 0x1F37C) ||
3310 : (0x1F37E <= c && c <= 0x1F393) ||
3311 : (0x1F3A0 <= c && c <= 0x1F3CA) ||
3312 : (0x1F3CF <= c && c <= 0x1F3D3) ||
3313 : (0x1F3E0 <= c && c <= 0x1F3F0) ||
3314 : c == 0x1F3F4 ||
3315 : (0x1F3F8 <= c && c <= 0x1F43E) ||
3316 : c == 0x1F440 ||
3317 : (0x1F442 <= c && c <= 0x1F4FC) ||
3318 : (0x1F4FF <= c && c <= 0x1F53D) ||
3319 : (0x1F54B <= c && c <= 0x1F54E) ||
3320 : (0x1F550 <= c && c <= 0x1F567) ||
3321 : c == 0x1F57A ||
3322 : (0x1F595 <= c && c <= 0x1F596) ||
3323 : c == 0x1F5A4 ||
3324 : (0x1F5FB <= c && c <= 0x1F64F) ||
3325 : (0x1F680 <= c && c <= 0x1F6C5) ||
3326 : c == 0x1F6CC ||
3327 : (0x1F6D0 <= c && c <= 0x1F6D2) ||
3328 : (0x1F6EB <= c && c <= 0x1F6EC) ||
3329 : (0x1F6F4 <= c && c <= 0x1F6F6) ||
3330 : (0x1F910 <= c && c <= 0x1F91E) ||
3331 : (0x1F920 <= c && c <= 0x1F927) ||
3332 : c == 0x1F930 ||
3333 : (0x1F933 <= c && c <= 0x1F93E) ||
3334 : (0x1F940 <= c && c <= 0x1F94B) ||
3335 : (0x1F950 <= c && c <= 0x1F95E) ||
3336 : (0x1F980 <= c && c <= 0x1F991) ||
3337 : c == 0x1F9C0 ||
3338 : (0x20000 <= c && c <= 0x2FFFD) ||
3339 : (0x30000 <= c && c <= 0x3FFFD))
3340 : len++;
3341 : #endif
3342 : }
3343 4772 : } else if ((*s & 0xE0) == 0xC0) {
3344 411 : assert(n == 0);
3345 411 : n = 1;
3346 411 : c = *s & 0x1F;
3347 4361 : } else if ((*s & 0xF0) == 0xE0) {
3348 4314 : assert(n == 0);
3349 4314 : n = 2;
3350 4314 : c = *s & 0x0F;
3351 47 : } else if ((*s & 0xF8) == 0xF0) {
3352 47 : assert(n == 0);
3353 47 : n = 3;
3354 47 : c = *s & 0x07;
3355 0 : } else if ((*s & 0xFC) == 0xF8) {
3356 0 : assert(n == 0);
3357 0 : n = 4;
3358 0 : c = *s & 0x03;
3359 : } else {
3360 0 : assert(0);
3361 : n = 0;
3362 : }
3363 154210812 : s++;
3364 : }
3365 : return len;
3366 : }
3367 :
3368 : str
3369 103448 : str_case_hash_lock(bool upper)
3370 : {
3371 103448 : BAT *b = upper ? UTF8_toUpperFrom : UTF8_toLowerFrom;
3372 :
3373 103448 : if (BAThash(b) != GDK_SUCCEED)
3374 0 : throw(MAL, "str.str_case_hash_lock", GDK_EXCEPTION);
3375 103448 : MT_rwlock_rdlock(&b->thashlock);
3376 103451 : if (b->thash)
3377 : return MAL_SUCCEED;
3378 0 : MT_rwlock_rdunlock(&b->thashlock);
3379 0 : throw(MAL, "str.str_case_hash_lock", "Lost hash");
3380 : }
3381 :
3382 : void
3383 103445 : str_case_hash_unlock(bool upper)
3384 : {
3385 106 : BAT *b = upper ? UTF8_toUpperFrom : UTF8_toLowerFrom;
3386 106 : MT_rwlock_rdunlock(&b->thashlock);
3387 106 : }
3388 :
3389 : static const char upper2lower[128] = {
3390 : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
3391 : 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
3392 : 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
3393 : 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
3394 : 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
3395 : 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
3396 : 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
3397 : 126,127
3398 : };
3399 : static const char lower2upper[128] = {
3400 : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
3401 : 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
3402 : 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
3403 : 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
3404 : 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
3405 : 90, 91, 92, 93, 94, 95, 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
3406 : 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,
3407 : 126,127
3408 : };
3409 :
3410 : static inline str
3411 278326 : convertCase(BAT *from, BAT *to, str *buf, size_t *buflen, const char *src,
3412 : const char *malfunc)
3413 : {
3414 278326 : size_t len = strlen(src);
3415 278326 : char *dst;
3416 278326 : const char *end = src + len;
3417 278326 : bool lower_to_upper = from == UTF8_toUpperFrom;
3418 278326 : const Hash *h = from->thash;
3419 278326 : const int *restrict fromb = (const int *restrict) from->theap->base;
3420 278326 : const int *restrict tob = (const int *restrict) to->theap->base;
3421 :
3422 : /* the from and to bats are not views */
3423 278326 : assert(from->tbaseoff == 0);
3424 278326 : assert(to->tbaseoff == 0);
3425 :
3426 278326 : CHECK_STR_BUFFER_LENGTH(buf, buflen, 2 * len + 1, malfunc);
3427 : /* use len*2 because only case changing code points exists which change from length 2 to 3 */
3428 278326 : dst = *buf;
3429 :
3430 834847 : while (src < end) {
3431 278195 : if (lower_to_upper) {
3432 3364336 : for (; src < end && (src[0] & 0x80) == 0; )
3433 3165733 : *dst++ = lower2upper[*src++];
3434 : } else {
3435 721268 : for (; src < end && (src[0] & 0x80) == 0; )
3436 641676 : *dst++ = upper2lower[*src++];
3437 : }
3438 278195 : if (src < end) { /* fall back code for complex codepoints */
3439 86 : int c;
3440 :
3441 86 : UTF8_GETCHAR(c, src);
3442 86 : if (c < 192) { /* the first 191 characters in unicode are trivial to convert */
3443 : /* for ASCII characters we don't need to do a hash lookup */
3444 0 : if (lower_to_upper) {
3445 0 : if ('a' <= c && c <= 'z')
3446 0 : c += 'A' - 'a';
3447 : } else {
3448 0 : if (c <= 'Z' && 'A' <= c)
3449 0 : c += 'a' - 'A';
3450 : }
3451 : } else {
3452 : /* use hash, even though BAT is sorted */
3453 89 : for (BUN hb = HASHget(h, hash_int(h, &c));
3454 146 : hb != BUN_NONE; hb = HASHgetlink(h, hb)) {
3455 71 : if (c == fromb[hb]) {
3456 11 : c = tob[hb];
3457 11 : break;
3458 : }
3459 : }
3460 : }
3461 86 : UTF8_PUTCHAR(c, dst);
3462 : }
3463 : }
3464 278326 : *dst = 0;
3465 278326 : return MAL_SUCCEED;
3466 0 : illegal:
3467 0 : throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
3468 : }
3469 :
3470 : /*
3471 : * Here you find the wrappers around the version 4 library code
3472 : * It also contains the direct implementation of the string
3473 : * matching support routines.
3474 : */
3475 : #include "mal_exception.h"
3476 :
3477 : /*
3478 : * The SQL like function return a boolean
3479 : */
3480 : static bool
3481 0 : STRlike(const char *s, const char *pat, const char *esc)
3482 : {
3483 0 : const char *t, *p;
3484 :
3485 0 : t = s;
3486 0 : for (p = pat; *p && *t; p++) {
3487 0 : if (esc && *p == *esc) {
3488 0 : p++;
3489 0 : if (*p != *t)
3490 : return false;
3491 0 : t++;
3492 0 : } else if (*p == '_')
3493 0 : t++;
3494 0 : else if (*p == '%') {
3495 0 : p++;
3496 0 : while (*p == '%')
3497 0 : p++;
3498 0 : if (*p == 0)
3499 : return true; /* tail is acceptable */
3500 0 : for (; *p && *t; t++)
3501 0 : if (STRlike(t, p, esc))
3502 : return true;
3503 0 : if (*p == 0 && *t == 0)
3504 : return true;
3505 : return false;
3506 0 : } else if (*p == *t)
3507 0 : t++;
3508 : else
3509 : return false;
3510 : }
3511 0 : if (*p == '%' && *(p + 1) == 0)
3512 : return true;
3513 0 : return *t == 0 && *p == 0;
3514 : }
3515 :
3516 : static str
3517 0 : STRlikewrap3(bit *ret, const str *s, const str *pat, const str *esc)
3518 : {
3519 0 : if (strNil(*s) || strNil(*pat) || strNil(*esc))
3520 0 : *ret = bit_nil;
3521 : else
3522 0 : *ret = (bit) STRlike(*s, *pat, *esc);
3523 0 : return MAL_SUCCEED;
3524 : }
3525 :
3526 : static str
3527 0 : STRlikewrap(bit *ret, const str *s, const str *pat)
3528 : {
3529 0 : if (strNil(*s) || strNil(*pat))
3530 0 : *ret = bit_nil;
3531 : else
3532 0 : *ret = (bit) STRlike(*s, *pat, NULL);
3533 0 : return MAL_SUCCEED;
3534 : }
3535 :
3536 : static str
3537 0 : STRtostr(str *res, const str *src)
3538 : {
3539 0 : if (*src == 0)
3540 0 : *res = GDKstrdup(str_nil);
3541 : else
3542 0 : *res = GDKstrdup(*src);
3543 0 : if (*res == NULL)
3544 0 : throw(MAL, "str.str", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3545 : return MAL_SUCCEED;
3546 : }
3547 :
3548 : static str
3549 87 : STRLength(int *res, const str *arg1)
3550 : {
3551 87 : const char *s = *arg1;
3552 :
3553 174 : *res = strNil(s) ? int_nil : UTF8_strlen(s);
3554 87 : return MAL_SUCCEED;
3555 : }
3556 :
3557 : static str
3558 3 : STRBytes(int *res, const str *arg1)
3559 : {
3560 3 : const char *s = *arg1;
3561 :
3562 6 : *res = strNil(s) ? int_nil : str_strlen(s);
3563 3 : return MAL_SUCCEED;
3564 : }
3565 :
3566 : str
3567 3818 : str_tail(str *buf, size_t *buflen, const char *s, int off)
3568 : {
3569 3818 : if (off < 0) {
3570 1 : off += UTF8_strlen(s);
3571 1 : if (off < 0)
3572 : off = 0;
3573 : }
3574 3818 : char *tail = UTF8_strtail(s, off);
3575 3820 : size_t nextlen = strlen(tail) + 1;
3576 3820 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.tail");
3577 3820 : strcpy(*buf, tail);
3578 3820 : return MAL_SUCCEED;
3579 : }
3580 :
3581 : static str
3582 1 : STRTail(str *res, const str *arg1, const int *offset)
3583 : {
3584 1 : str buf = NULL, msg = MAL_SUCCEED;
3585 1 : const char *s = *arg1;
3586 1 : int off = *offset;
3587 :
3588 2 : if (strNil(s) || is_int_nil(off)) {
3589 0 : *res = GDKstrdup(str_nil);
3590 : } else {
3591 1 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3592 :
3593 1 : *res = NULL;
3594 1 : if (!(buf = GDKmalloc(buflen)))
3595 0 : throw(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3596 1 : if ((msg = str_tail(&buf, &buflen, s, off)) != MAL_SUCCEED) {
3597 0 : GDKfree(buf);
3598 0 : return msg;
3599 : }
3600 1 : *res = GDKstrdup(buf);
3601 : }
3602 :
3603 1 : GDKfree(buf);
3604 1 : if (!*res)
3605 0 : msg = createException(MAL, "str.tail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3606 : return msg;
3607 : }
3608 :
3609 : /* copy the substring s[off:off+l] into *buf, replacing *buf with a
3610 : * freshly allocated buffer if the substring doesn't fit; off is 0
3611 : * based, and both off and l count in Unicode codepoints (i.e. not
3612 : * bytes); if off < 0, off counts from the end of the string */
3613 : str
3614 3548621 : str_Sub_String(str *buf, size_t *buflen, const char *s, int off, int l)
3615 : {
3616 3548621 : size_t len;
3617 :
3618 3548621 : if (off < 0) {
3619 4 : off += UTF8_strlen(s);
3620 4 : if (off < 0) {
3621 3 : l += off;
3622 3 : off = 0;
3623 : }
3624 : }
3625 : /* here, off >= 0 */
3626 3548621 : if (l < 0) {
3627 1036 : strcpy(*buf, "");
3628 1036 : return MAL_SUCCEED;
3629 : }
3630 3547585 : s = UTF8_strtail(s, off);
3631 3582553 : len = (size_t) (UTF8_strtail(s, l) - s + 1);
3632 3592756 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.substring");
3633 3592756 : strcpy_len(*buf, s, len);
3634 3592756 : return MAL_SUCCEED;
3635 : }
3636 :
3637 : static str
3638 4 : STRSubString(str *res, const str *arg1, const int *offset, const int *length)
3639 : {
3640 4 : str buf = NULL, msg = MAL_SUCCEED;
3641 4 : const char *s = *arg1;
3642 4 : int off = *offset, len = *length;
3643 :
3644 7 : if (strNil(s) || is_int_nil(off) || is_int_nil(len)) {
3645 1 : *res = GDKstrdup(str_nil);
3646 : } else {
3647 3 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3648 :
3649 3 : *res = NULL;
3650 3 : if (!(buf = GDKmalloc(buflen)))
3651 0 : throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3652 3 : if ((msg = str_Sub_String(&buf, &buflen, s, off, len)) != MAL_SUCCEED) {
3653 0 : GDKfree(buf);
3654 0 : return msg;
3655 : }
3656 3 : *res = GDKstrdup(buf);
3657 : }
3658 :
3659 4 : GDKfree(buf);
3660 4 : if (!*res)
3661 0 : msg = createException(MAL, "str.substring",
3662 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3663 : return msg;
3664 : }
3665 :
3666 : str
3667 4 : str_from_wchr(str *buf, size_t *buflen, int c)
3668 : {
3669 4 : CHECK_STR_BUFFER_LENGTH(buf, buflen, 5, "str.unicode");
3670 4 : str s = *buf;
3671 4 : UTF8_PUTCHAR(c, s);
3672 4 : *s = 0;
3673 4 : return MAL_SUCCEED;
3674 0 : illegal:
3675 0 : throw(MAL, "str.unicode", SQLSTATE(42000) "Illegal Unicode code point");
3676 : }
3677 :
3678 : static str
3679 2 : STRFromWChr(str *res, const int *c)
3680 : {
3681 2 : str buf = NULL, msg = MAL_SUCCEED;
3682 2 : int cc = *c;
3683 :
3684 2 : if (is_int_nil(cc)) {
3685 0 : *res = GDKstrdup(str_nil);
3686 : } else {
3687 2 : size_t buflen = MAX(strlen(str_nil) + 1, 8);
3688 :
3689 2 : *res = NULL;
3690 2 : if (!(buf = GDKmalloc(buflen)))
3691 0 : throw(MAL, "str.unicode", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3692 2 : if ((msg = str_from_wchr(&buf, &buflen, cc)) != MAL_SUCCEED) {
3693 0 : GDKfree(buf);
3694 0 : return msg;
3695 : }
3696 2 : *res = GDKstrdup(buf);
3697 : }
3698 :
3699 2 : GDKfree(buf);
3700 2 : if (!*res)
3701 0 : msg = createException(MAL, "str.unicode",
3702 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3703 : return msg;
3704 : }
3705 :
3706 : /* return the Unicode code point of arg1 at position at */
3707 : str
3708 31 : str_wchr_at(int *res, const char *s, int at)
3709 : {
3710 : /* 64bit: should have lng arg */
3711 60 : if (strNil(s) || is_int_nil(at) || at < 0) {
3712 2 : *res = int_nil;
3713 2 : return MAL_SUCCEED;
3714 : }
3715 29 : s = UTF8_strtail(s, at);
3716 29 : if (s == NULL || *s == 0) {
3717 6 : *res = int_nil;
3718 6 : return MAL_SUCCEED;
3719 : }
3720 23 : UTF8_GETCHAR(*res, s);
3721 : return MAL_SUCCEED;
3722 0 : illegal:
3723 0 : throw(MAL, "str.unicodeAt", SQLSTATE(42000) "Illegal Unicode code point");
3724 : }
3725 :
3726 : static str
3727 0 : STRWChrAt(int *res, const str *arg1, const int *at)
3728 : {
3729 0 : return str_wchr_at(res, *arg1, *at);
3730 : }
3731 :
3732 : str
3733 79694 : str_lower(str *buf, size_t *buflen, const char *s)
3734 : {
3735 79694 : return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, buf, buflen, s,
3736 : "str.lower");
3737 : }
3738 :
3739 : static inline str
3740 2040 : STRlower(str *res, const str *arg1)
3741 : {
3742 2040 : str buf = NULL, msg = MAL_SUCCEED;
3743 2040 : const char *s = *arg1;
3744 :
3745 2040 : if (strNil(s)) {
3746 312 : *res = GDKstrdup(str_nil);
3747 : } else {
3748 1728 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3749 :
3750 1728 : *res = NULL;
3751 1728 : if (!(buf = GDKmalloc(buflen)))
3752 0 : throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3753 1726 : if ((msg = str_case_hash_lock(false))) {
3754 0 : GDKfree(buf);
3755 0 : return msg;
3756 : }
3757 1730 : msg = str_lower(&buf, &buflen, s);
3758 1723 : str_case_hash_unlock(false);
3759 1729 : if (msg != MAL_SUCCEED) {
3760 0 : GDKfree(buf);
3761 0 : return msg;
3762 : }
3763 1729 : *res = GDKstrdup(buf);
3764 : }
3765 :
3766 2041 : GDKfree(buf);
3767 2042 : if (!*res)
3768 0 : msg = createException(MAL, "str.lower",
3769 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3770 : return msg;
3771 : }
3772 :
3773 : str
3774 198623 : str_upper(str *buf, size_t *buflen, const char *s)
3775 : {
3776 198623 : return convertCase(UTF8_toUpperFrom, UTF8_toUpperTo, buf, buflen, s,
3777 : "str.upper");
3778 : }
3779 :
3780 : static str
3781 101918 : STRupper(str *res, const str *arg1)
3782 : {
3783 101918 : str buf = NULL, msg = MAL_SUCCEED;
3784 101918 : const char *s = *arg1;
3785 :
3786 101918 : if (strNil(s)) {
3787 302 : *res = GDKstrdup(str_nil);
3788 : } else {
3789 101616 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
3790 :
3791 101616 : *res = NULL;
3792 101616 : if (!(buf = GDKmalloc(buflen)))
3793 0 : throw(MAL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL);
3794 101616 : if ((msg = str_case_hash_lock(true))) {
3795 0 : GDKfree(buf);
3796 0 : return msg;
3797 : }
3798 101616 : msg = str_upper(&buf, &buflen, s);
3799 101616 : str_case_hash_unlock(true);
3800 101616 : if (msg != MAL_SUCCEED) {
3801 0 : GDKfree(buf);
3802 0 : return msg;
3803 : }
3804 101616 : *res = GDKstrdup(buf);
3805 : }
3806 :
3807 101918 : GDKfree(buf);
3808 101918 : if (!*res)
3809 0 : msg = createException(MAL, "str.upper",
3810 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
3811 : return msg;
3812 : }
3813 :
3814 : /* returns whether arg1 starts with arg2 */
3815 : int
3816 1658 : str_is_prefix(const char *s, const char *prefix, int plen)
3817 : {
3818 1658 : return strncmp(s, prefix, plen);
3819 : }
3820 :
3821 : int
3822 71 : str_is_iprefix(const char *s, const char *prefix, int plen)
3823 : {
3824 71 : return utf8ncasecmp(s, prefix, plen);
3825 : }
3826 :
3827 : int
3828 2074 : str_is_suffix(const char *s, const char *suffix, int sul)
3829 : {
3830 2074 : int sl = str_strlen(s);
3831 :
3832 2074 : if (sl < sul)
3833 : return -1;
3834 : else
3835 2065 : return strcmp(s + sl - sul, suffix);
3836 : }
3837 :
3838 : /* case insensitive endswith check */
3839 : int
3840 65 : str_is_isuffix(const char *s, const char *suffix, int sul)
3841 : {
3842 65 : const char *e = s + strlen(s);
3843 65 : const char *sf;
3844 :
3845 65 : (void) sul;
3846 : /* note that the uppercase and lowercase forms of a character aren't
3847 : * necessarily the same length in their UTF-8 encodings */
3848 451 : for (sf = suffix; *sf && e > s; sf++) {
3849 386 : if ((*sf & 0xC0) != 0x80) {
3850 337 : while ((*--e & 0xC0) == 0x80)
3851 : ;
3852 : }
3853 : }
3854 67 : while ((*sf & 0xC0) == 0x80)
3855 2 : sf++;
3856 65 : return *sf != 0 || utf8casecmp(e, suffix) != 0;
3857 : }
3858 :
3859 : int
3860 6971 : str_contains(const char *h, const char *n, int nlen)
3861 : {
3862 6971 : (void) nlen;
3863 6971 : return strstr(h, n) == NULL;
3864 : }
3865 :
3866 : int
3867 1247 : str_icontains(const char *h, const char *n, int nlen)
3868 : {
3869 1247 : (void) nlen;
3870 1247 : return utf8casestr(h, n) == NULL;
3871 : }
3872 :
3873 : #define STR_MAPARGS(STK, PCI, R, S1, S2, ICASE) \
3874 : do{ \
3875 : R = getArgReference(STK, PCI, 0); \
3876 : S1 = *getArgReference_str(STK, PCI, 1); \
3877 : S2 = *getArgReference_str(STK, PCI, 2); \
3878 : icase = PCI->argc == 4 && *getArgReference_bit(STK, PCI, 3); \
3879 : } while(0)
3880 :
3881 : static str
3882 2 : STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3883 : {
3884 2 : (void) cntxt;
3885 2 : (void) mb;
3886 :
3887 2 : str s1, s2;
3888 2 : bit *r, icase;
3889 :
3890 2 : STR_MAPARGS(stk, pci, r, s1, s2, icase);
3891 :
3892 4 : if (strNil(s1) || strNil(s2)) {
3893 0 : *r = bit_nil;
3894 : } else {
3895 2 : int s2_len = str_strlen(s2);
3896 4 : *r = icase ?
3897 0 : str_is_iprefix(s1, s2, s2_len) == 0 :
3898 2 : str_is_prefix(s1, s2, s2_len) == 0;
3899 : }
3900 2 : return MAL_SUCCEED;
3901 : }
3902 :
3903 : static str
3904 0 : STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3905 : {
3906 0 : (void) cntxt;
3907 0 : (void) mb;
3908 :
3909 0 : str s1, s2;
3910 0 : bit *r, icase;
3911 :
3912 0 : STR_MAPARGS(stk, pci, r, s1, s2, icase);
3913 :
3914 0 : if (strNil(s1) || strNil(s2)) {
3915 0 : *r = bit_nil;
3916 : } else {
3917 0 : int s2_len = str_strlen(s2);
3918 0 : *r = icase ?
3919 0 : str_is_isuffix(s1, s2, s2_len) == 0 :
3920 0 : str_is_suffix(s1, s2, s2_len) == 0;
3921 : }
3922 0 : return MAL_SUCCEED;
3923 : }
3924 :
3925 : /* returns whether haystack contains needle */
3926 : static str
3927 0 : STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3928 : {
3929 0 : (void) cntxt;
3930 0 : (void) mb;
3931 :
3932 0 : str s1, s2;
3933 0 : bit *r, icase;
3934 :
3935 0 : STR_MAPARGS(stk, pci, r, s1, s2, icase);
3936 :
3937 0 : if (strNil(s1) || strNil(s2)) {
3938 0 : *r = bit_nil;
3939 : } else {
3940 0 : int s2_len = str_strlen(s2);
3941 0 : *r = icase ?
3942 0 : str_icontains(s1, s2, s2_len) == 0 :
3943 0 : str_contains(s1, s2, s2_len) == 0;
3944 : }
3945 0 : return MAL_SUCCEED;
3946 : }
3947 :
3948 : int
3949 3705 : str_search(const char *s, const char *s2, int slen)
3950 : {
3951 3705 : (void) slen;
3952 : /* 64bit: should return lng */
3953 3705 : if ((s2 = strstr(s, s2)) != NULL)
3954 725 : return UTF8_strpos(s, s2);
3955 : else
3956 : return -1;
3957 : }
3958 :
3959 : int
3960 0 : str_isearch(const char *s, const char *s2, int slen)
3961 : {
3962 0 : (void) slen;
3963 : /* 64bit: should return lng */
3964 0 : if ((s2 = utf8casestr(s, s2)) != NULL)
3965 0 : return UTF8_strpos(s, s2);
3966 : else
3967 : return -1;
3968 : }
3969 :
3970 : /* find first occurrence of needle in haystack */
3971 : static str
3972 0 : STRstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
3973 : {
3974 0 : (void) cntxt;
3975 0 : (void) mb;
3976 0 : bit *res = getArgReference(stk, pci, 0);
3977 0 : const str *haystack = getArgReference(stk, pci, 1),
3978 0 : *needle = getArgReference(stk, pci, 2);
3979 0 : bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
3980 0 : str s = *haystack, h = *needle, msg = MAL_SUCCEED;
3981 0 : if (strNil(s) || strNil(h)) {
3982 0 : *res = bit_nil;
3983 : } else {
3984 0 : int needle_len = str_strlen(h);
3985 :
3986 0 : *res = icase ?
3987 0 : str_isearch(s, h, needle_len) :
3988 0 : str_search(s, h, needle_len);
3989 : }
3990 0 : return msg;
3991 : }
3992 :
3993 : int
3994 0 : str_reverse_str_search(const char *s, const char *s2, int slen)
3995 : {
3996 : /* 64bit: should return lng */
3997 0 : int len = str_strlen(s);
3998 0 : int res = -1; /* changed if found */
3999 :
4000 0 : if (len >= slen) {
4001 0 : const char *p = s + len - slen;
4002 0 : do {
4003 0 : if (strncmp(p, s2, slen) == 0) {
4004 0 : res = UTF8_strpos(s, p);
4005 0 : break;
4006 : }
4007 0 : } while (p-- > s);
4008 : }
4009 0 : return res;
4010 : }
4011 :
4012 : int
4013 0 : str_reverse_str_isearch(const char *s, const char *s2, int slen)
4014 : {
4015 : /* 64bit: should return lng */
4016 0 : int len = str_strlen(s);
4017 0 : int res = -1; /* changed if found */
4018 :
4019 0 : if (len >= slen) {
4020 0 : const char *p = s + len - slen;
4021 0 : do {
4022 0 : if (utf8ncasecmp(p, s2, slen) == 0) {
4023 0 : res = UTF8_strpos(s, p);
4024 0 : break;
4025 : }
4026 0 : } while (p-- > s);
4027 : }
4028 0 : return res;
4029 : }
4030 :
4031 : /* find last occurrence of arg2 in arg1 */
4032 : static str
4033 0 : STRrevstr_search(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
4034 : {
4035 0 : (void) cntxt;
4036 0 : (void) mb;
4037 0 : bit *res = getArgReference(stk, pci, 0);
4038 0 : const str *haystack = getArgReference(stk, pci, 1);
4039 0 : const str *needle = getArgReference(stk, pci, 2);
4040 0 : bit icase = pci->argc == 4 && *getArgReference_bit(stk, pci, 3);
4041 0 : str s = *haystack, h = *needle, msg = MAL_SUCCEED;
4042 0 : if (strNil(s) || strNil(h)) {
4043 0 : *res = bit_nil;
4044 : } else {
4045 0 : int needle_len = str_strlen(h);
4046 :
4047 0 : *res = icase ?
4048 0 : str_reverse_str_isearch(s, h, needle_len) :
4049 0 : str_reverse_str_search(s, h, needle_len);
4050 : }
4051 0 : return msg;
4052 : }
4053 :
4054 : str
4055 17 : str_splitpart(str *buf, size_t *buflen, const char *s, const char *s2, int f)
4056 : {
4057 17 : size_t len;
4058 17 : char *p = NULL;
4059 :
4060 17 : if (f <= 0)
4061 2 : throw(MAL, "str.splitpart",
4062 : SQLSTATE(42000) "field position must be greater than zero");
4063 :
4064 15 : len = strlen(s2);
4065 15 : if (len) {
4066 17 : while ((p = strstr(s, s2)) != NULL && f > 1) {
4067 6 : s = p + len;
4068 6 : f--;
4069 : }
4070 : }
4071 :
4072 15 : if (f != 1) {
4073 8 : strcpy(*buf, "");
4074 8 : return MAL_SUCCEED;
4075 : }
4076 :
4077 7 : if (p == NULL) {
4078 4 : len = strlen(s);
4079 : } else {
4080 3 : len = (size_t) (p - s);
4081 : }
4082 :
4083 7 : len++;
4084 7 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.splitpart");
4085 7 : strcpy_len(*buf, s, len);
4086 7 : return MAL_SUCCEED;
4087 : }
4088 :
4089 : static str
4090 17 : STRsplitpart(str *res, str *haystack, str *needle, int *field)
4091 : {
4092 17 : str buf = NULL, msg = MAL_SUCCEED;
4093 17 : const char *s = *haystack, *s2 = *needle;
4094 17 : int f = *field;
4095 :
4096 51 : if (strNil(s) || strNil(s2) || is_int_nil(f)) {
4097 0 : *res = GDKstrdup(str_nil);
4098 : } else {
4099 17 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4100 :
4101 17 : *res = NULL;
4102 17 : if (!(buf = GDKmalloc(buflen)))
4103 2 : throw(MAL, "str.splitpart", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4104 17 : if ((msg = str_splitpart(&buf, &buflen, s, s2, f)) != MAL_SUCCEED) {
4105 2 : GDKfree(buf);
4106 2 : return msg;
4107 : }
4108 15 : *res = GDKstrdup(buf);
4109 : }
4110 :
4111 15 : GDKfree(buf);
4112 15 : if (!*res)
4113 0 : msg = createException(MAL, "str.splitpart",
4114 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4115 : return msg;
4116 : }
4117 :
4118 : /* returns number of bytes to remove from left to strip the codepoints in rm */
4119 : static size_t
4120 327 : lstrip(const char *s, size_t len, const int *rm, size_t nrm)
4121 : {
4122 327 : int c;
4123 327 : size_t i, n, skip = 0;
4124 :
4125 469 : while (len > 0) {
4126 454 : UTF8_NEXTCHAR(c, n, s);
4127 454 : assert(n > 0 && n <= len);
4128 6846 : for (i = 0; i < nrm; i++) {
4129 6534 : if (rm[i] == c) {
4130 142 : s += n;
4131 142 : skip += n;
4132 142 : len -= n;
4133 142 : break;
4134 : }
4135 : }
4136 454 : if (i == nrm)
4137 : break;
4138 : }
4139 327 : return skip;
4140 : }
4141 :
4142 : /* returns the resulting length of s after stripping codepoints in rm
4143 : * from the right */
4144 : static size_t
4145 402 : rstrip(const char *s, size_t len, const int *rm, size_t nrm)
4146 : {
4147 402 : int c;
4148 402 : size_t i, n;
4149 :
4150 571 : while (len > 0) {
4151 561 : UTF8_LASTCHAR(c, n, s, len);
4152 561 : assert(n > 0 && n <= len);
4153 8907 : for (i = 0; i < nrm; i++) {
4154 8515 : if (rm[i] == c) {
4155 169 : len -= n;
4156 169 : break;
4157 : }
4158 : }
4159 561 : if (i == nrm)
4160 : break;
4161 : }
4162 402 : return len;
4163 : }
4164 :
4165 : const int whitespace[] = {
4166 : ' ', /* space */
4167 : '\t', /* tab (character tabulation) */
4168 : '\n', /* line feed */
4169 : '\r', /* carriage return */
4170 : '\f', /* form feed */
4171 : '\v', /* vertical tab (line tabulation) */
4172 : /* below the code points that have the Unicode Zs (space separator) property */
4173 : 0x00A0, /* no-break space */
4174 : 0x1680, /* ogham space mark */
4175 : 0x2000, /* en quad */
4176 : 0x2001, /* em quad */
4177 : 0x2002, /* en space */
4178 : 0x2003, /* em space */
4179 : 0x2004, /* three-per-em space */
4180 : 0x2005, /* four-per-em space */
4181 : 0x2006, /* six-per-em space */
4182 : 0x2007, /* figure space */
4183 : 0x2008, /* punctuation space */
4184 : 0x2009, /* thin space */
4185 : 0x200A, /* hair space */
4186 : 0x202F, /* narrow no-break space */
4187 : 0x205F, /* medium mathematical space */
4188 : 0x3000, /* ideographic space */
4189 : };
4190 :
4191 : #define NSPACES (sizeof(whitespace) / sizeof(whitespace[0]))
4192 :
4193 : str
4194 275 : str_strip(str *buf, size_t *buflen, const char *s)
4195 : {
4196 275 : size_t len = strlen(s);
4197 275 : size_t n = lstrip(s, len, whitespace, NSPACES);
4198 274 : s += n;
4199 274 : len -= n;
4200 274 : n = rstrip(s, len, whitespace, NSPACES);
4201 :
4202 274 : n++;
4203 274 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip");
4204 274 : strcpy_len(*buf, s, n);
4205 274 : return MAL_SUCCEED;
4206 : }
4207 :
4208 : /* remove all whitespace from either side of arg1 */
4209 : static str
4210 4 : STRStrip(str *res, const str *arg1)
4211 : {
4212 4 : str buf = NULL, msg = MAL_SUCCEED;
4213 4 : const char *s = *arg1;
4214 :
4215 4 : if (strNil(s)) {
4216 0 : *res = GDKstrdup(str_nil);
4217 : } else {
4218 4 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4219 :
4220 4 : *res = NULL;
4221 4 : if (!(buf = GDKmalloc(buflen)))
4222 0 : throw(MAL, "str.strip", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4223 4 : if ((msg = str_strip(&buf, &buflen, s)) != MAL_SUCCEED) {
4224 0 : GDKfree(buf);
4225 0 : return msg;
4226 : }
4227 4 : *res = GDKstrdup(buf);
4228 : }
4229 :
4230 4 : GDKfree(buf);
4231 4 : if (!*res)
4232 0 : msg = createException(MAL, "str.strip",
4233 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4234 : return msg;
4235 : }
4236 :
4237 : str
4238 18 : str_ltrim(str *buf, size_t *buflen, const char *s)
4239 : {
4240 18 : size_t len = strlen(s);
4241 18 : size_t n = lstrip(s, len, whitespace, NSPACES);
4242 18 : size_t nallocate = len - n + 1;
4243 :
4244 18 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim");
4245 18 : strcpy_len(*buf, s + n, nallocate);
4246 18 : return MAL_SUCCEED;
4247 : }
4248 :
4249 : /* remove all whitespace from the start (left) of arg1 */
4250 : static str
4251 10 : STRLtrim(str *res, const str *arg1)
4252 : {
4253 10 : str buf = NULL, msg = MAL_SUCCEED;
4254 10 : const char *s = *arg1;
4255 :
4256 10 : if (strNil(s)) {
4257 0 : *res = GDKstrdup(str_nil);
4258 : } else {
4259 10 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4260 :
4261 10 : *res = NULL;
4262 10 : if (!(buf = GDKmalloc(buflen)))
4263 0 : throw(MAL, "str.ltrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4264 10 : if ((msg = str_ltrim(&buf, &buflen, s)) != MAL_SUCCEED) {
4265 0 : GDKfree(buf);
4266 0 : return msg;
4267 : }
4268 10 : *res = GDKstrdup(buf);
4269 : }
4270 :
4271 10 : GDKfree(buf);
4272 10 : if (!*res)
4273 0 : msg = createException(MAL, "str.ltrim",
4274 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4275 : return msg;
4276 : }
4277 :
4278 : str
4279 96 : str_rtrim(str *buf, size_t *buflen, const char *s)
4280 : {
4281 96 : size_t len = strlen(s);
4282 96 : size_t n = rstrip(s, len, whitespace, NSPACES);
4283 :
4284 96 : n++;
4285 96 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim");
4286 96 : strcpy_len(*buf, s, n);
4287 96 : return MAL_SUCCEED;
4288 : }
4289 :
4290 : /* remove all whitespace from the end (right) of arg1 */
4291 : static str
4292 6 : STRRtrim(str *res, const str *arg1)
4293 : {
4294 6 : str buf = NULL, msg = MAL_SUCCEED;
4295 6 : const char *s = *arg1;
4296 :
4297 6 : if (strNil(s)) {
4298 0 : *res = GDKstrdup(str_nil);
4299 : } else {
4300 6 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4301 :
4302 6 : *res = NULL;
4303 6 : if (!(buf = GDKmalloc(buflen)))
4304 0 : throw(MAL, "str.rtrim", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4305 6 : if ((msg = str_rtrim(&buf, &buflen, s)) != MAL_SUCCEED) {
4306 0 : GDKfree(buf);
4307 0 : return msg;
4308 : }
4309 6 : *res = GDKstrdup(buf);
4310 : }
4311 :
4312 6 : GDKfree(buf);
4313 6 : if (!*res)
4314 0 : msg = createException(MAL, "str.rtrim",
4315 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4316 : return msg;
4317 : }
4318 :
4319 : /* return a list of codepoints in s */
4320 : static str
4321 45 : trimchars(str *buf, size_t *buflen, size_t *n, const char *s, size_t len_s,
4322 : const char *malfunc)
4323 : {
4324 45 : size_t len = 0, nlen = len_s * sizeof(int);
4325 45 : int c, *cbuf;
4326 :
4327 45 : assert(s);
4328 45 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
4329 45 : cbuf = *(int **) buf;
4330 :
4331 221 : while (*s) {
4332 176 : UTF8_GETCHAR(c, s);
4333 176 : assert(!is_int_nil(c));
4334 176 : cbuf[len++] = c;
4335 : }
4336 45 : *n = len;
4337 45 : return MAL_SUCCEED;
4338 0 : illegal:
4339 0 : throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
4340 : }
4341 :
4342 : str
4343 22 : str_strip2(str *buf, size_t *buflen, const char *s, const char *s2)
4344 : {
4345 22 : str msg = MAL_SUCCEED;
4346 22 : size_t len, n, n2, n3;
4347 :
4348 22 : if ((n2 = strlen(s2)) == 0) {
4349 1 : len = strlen(s) + 1;
4350 1 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.strip2");
4351 1 : strcpy(*buf, s);
4352 1 : return MAL_SUCCEED;
4353 : } else {
4354 21 : if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) != MAL_SUCCEED)
4355 : return msg;
4356 21 : len = strlen(s);
4357 21 : n = lstrip(s, len, *(int **) buf, n3);
4358 21 : s += n;
4359 21 : len -= n;
4360 21 : n = rstrip(s, len, *(int **) buf, n3);
4361 :
4362 21 : n++;
4363 21 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
4364 21 : strcpy_len(*buf, s, n);
4365 21 : return MAL_SUCCEED;
4366 : }
4367 : }
4368 :
4369 : /* remove the longest string containing only characters from arg2 from
4370 : * either side of arg1 */
4371 : static str
4372 19 : STRStrip2(str *res, const str *arg1, const str *arg2)
4373 : {
4374 19 : str buf = NULL, msg = MAL_SUCCEED;
4375 19 : const char *s = *arg1, *s2 = *arg2;
4376 :
4377 36 : if (strNil(s) || strNil(s2)) {
4378 3 : *res = GDKstrdup(str_nil);
4379 : } else {
4380 16 : size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
4381 :
4382 16 : *res = NULL;
4383 16 : if (!(buf = GDKmalloc(buflen)))
4384 0 : throw(MAL, "str.strip2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4385 16 : if ((msg = str_strip2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
4386 0 : GDKfree(buf);
4387 0 : return msg;
4388 : }
4389 16 : *res = GDKstrdup(buf);
4390 : }
4391 :
4392 19 : GDKfree(buf);
4393 19 : if (!*res)
4394 0 : msg = createException(MAL, "str.strip2",
4395 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4396 : return msg;
4397 : }
4398 :
4399 : str
4400 14 : str_ltrim2(str *buf, size_t *buflen, const char *s, const char *s2)
4401 : {
4402 14 : str msg = MAL_SUCCEED;
4403 14 : size_t len, n, n2, n3, nallocate;
4404 :
4405 14 : if ((n2 = strlen(s2)) == 0) {
4406 1 : len = strlen(s) + 1;
4407 1 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.ltrim2");
4408 1 : strcpy(*buf, s);
4409 1 : return MAL_SUCCEED;
4410 : } else {
4411 13 : if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
4412 : return msg;
4413 13 : len = strlen(s);
4414 13 : n = lstrip(s, len, *(int **) buf, n3);
4415 13 : nallocate = len - n + 1;
4416 :
4417 13 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
4418 13 : strcpy_len(*buf, s + n, nallocate);
4419 13 : return MAL_SUCCEED;
4420 : }
4421 : }
4422 :
4423 : /* remove the longest string containing only characters from arg2 from
4424 : * the start (left) of arg1 */
4425 : static str
4426 8 : STRLtrim2(str *res, const str *arg1, const str *arg2)
4427 : {
4428 8 : str buf = NULL, msg = MAL_SUCCEED;
4429 8 : const char *s = *arg1, *s2 = *arg2;
4430 :
4431 16 : if (strNil(s) || strNil(s2)) {
4432 0 : *res = GDKstrdup(str_nil);
4433 : } else {
4434 8 : size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
4435 :
4436 8 : *res = NULL;
4437 8 : if (!(buf = GDKmalloc(buflen)))
4438 0 : throw(MAL, "str.ltrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4439 8 : if ((msg = str_ltrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
4440 0 : GDKfree(buf);
4441 0 : return msg;
4442 : }
4443 8 : *res = GDKstrdup(buf);
4444 : }
4445 :
4446 8 : GDKfree(buf);
4447 8 : if (!*res)
4448 0 : msg = createException(MAL, "str.ltrim2",
4449 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4450 : return msg;
4451 : }
4452 :
4453 : str
4454 13 : str_rtrim2(str *buf, size_t *buflen, const char *s, const char *s2)
4455 : {
4456 13 : str msg = MAL_SUCCEED;
4457 13 : size_t len, n, n2, n3;
4458 :
4459 13 : if ((n2 = strlen(s2)) == 0) {
4460 2 : len = strlen(s) + 1;
4461 2 : CHECK_STR_BUFFER_LENGTH(buf, buflen, len, "str.rtrim2");
4462 2 : strcpy(*buf, s);
4463 2 : return MAL_SUCCEED;
4464 : } else {
4465 11 : if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED)
4466 : return msg;
4467 11 : len = strlen(s);
4468 11 : n = rstrip(s, len, *(int **) buf, n3);
4469 11 : n++;
4470 :
4471 11 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
4472 11 : strcpy_len(*buf, s, n);
4473 11 : return MAL_SUCCEED;
4474 : }
4475 : }
4476 :
4477 : /* remove the longest string containing only characters from arg2 from
4478 : * the end (right) of arg1 */
4479 : static str
4480 7 : STRRtrim2(str *res, const str *arg1, const str *arg2)
4481 : {
4482 7 : str buf = NULL, msg = MAL_SUCCEED;
4483 7 : const char *s = *arg1, *s2 = *arg2;
4484 :
4485 14 : if (strNil(s) || strNil(s2)) {
4486 0 : *res = GDKstrdup(str_nil);
4487 : } else {
4488 7 : size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
4489 :
4490 7 : *res = NULL;
4491 7 : if (!(buf = GDKmalloc(buflen)))
4492 0 : throw(MAL, "str.rtrim2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4493 7 : if ((msg = str_rtrim2(&buf, &buflen, s, s2)) != MAL_SUCCEED) {
4494 0 : GDKfree(buf);
4495 0 : return msg;
4496 : }
4497 7 : *res = GDKstrdup(buf);
4498 : }
4499 :
4500 7 : GDKfree(buf);
4501 7 : if (!*res)
4502 0 : msg = createException(MAL, "str.rtrim2",
4503 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4504 : return msg;
4505 : }
4506 :
4507 : static str
4508 60 : pad(str *buf, size_t *buflen, const char *s, const char *pad, int len, int left,
4509 : const char *malfunc)
4510 : {
4511 60 : size_t slen, padlen, repeats, residual, i, nlen;
4512 60 : char *res;
4513 :
4514 60 : if (len < 0)
4515 : len = 0;
4516 :
4517 60 : slen = (size_t) UTF8_strlen(s);
4518 60 : if (slen > (size_t) len) {
4519 : /* truncate */
4520 20 : pad = UTF8_strtail(s, len);
4521 20 : slen = pad - s + 1;
4522 :
4523 20 : CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
4524 20 : strcpy_len(*buf, s, slen);
4525 20 : return MAL_SUCCEED;
4526 : }
4527 :
4528 40 : padlen = (size_t) UTF8_strlen(pad);
4529 40 : if (slen == (size_t) len || padlen == 0) {
4530 : /* nothing to do (no padding if there is no pad string) */
4531 0 : slen = strlen(s) + 1;
4532 0 : CHECK_STR_BUFFER_LENGTH(buf, buflen, slen, malfunc);
4533 0 : strcpy(*buf, s);
4534 0 : return MAL_SUCCEED;
4535 : }
4536 :
4537 40 : repeats = ((size_t) len - slen) / padlen;
4538 40 : residual = ((size_t) len - slen) % padlen;
4539 40 : if (residual > 0)
4540 20 : residual = (size_t) (UTF8_strtail(pad, (int) residual) - pad);
4541 40 : padlen = strlen(pad);
4542 40 : slen = strlen(s);
4543 :
4544 40 : nlen = slen + repeats * padlen + residual + 1;
4545 40 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
4546 40 : res = *buf;
4547 40 : if (left) {
4548 87 : for (i = 0; i < repeats; i++)
4549 67 : memcpy(res + i * padlen, pad, padlen);
4550 20 : if (residual > 0)
4551 10 : memcpy(res + repeats * padlen, pad, residual);
4552 20 : if (slen > 0)
4553 20 : memcpy(res + repeats * padlen + residual, s, slen);
4554 : } else {
4555 20 : if (slen > 0)
4556 20 : memcpy(res, s, slen);
4557 87 : for (i = 0; i < repeats; i++)
4558 67 : memcpy(res + slen + i * padlen, pad, padlen);
4559 20 : if (residual > 0)
4560 10 : memcpy(res + slen + repeats * padlen, pad, residual);
4561 : }
4562 40 : res[repeats * padlen + residual + slen] = 0;
4563 40 : return MAL_SUCCEED;
4564 : }
4565 :
4566 : str
4567 8 : str_lpad(str *buf, size_t *buflen, const char *s, int len)
4568 : {
4569 4 : return pad(buf, buflen, s, " ", len, 1, "str.lpad");
4570 : }
4571 :
4572 : /* Fill up 'arg1' to length 'len' by prepending whitespaces.
4573 : * If 'arg1' is already longer than 'len', then it's truncated on the right
4574 : * (NB: this is the PostgreSQL definition).
4575 : *
4576 : * Example: lpad('hi', 5)
4577 : * Result: ' hi'
4578 : */
4579 : static str
4580 4 : STRLpad(str *res, const str *arg1, const int *len)
4581 : {
4582 4 : str buf = NULL, msg = MAL_SUCCEED;
4583 4 : const char *s = *arg1;
4584 4 : int l = *len;
4585 :
4586 8 : if (strNil(s) || is_int_nil(l)) {
4587 0 : *res = GDKstrdup(str_nil);
4588 : } else {
4589 4 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4590 :
4591 4 : *res = NULL;
4592 4 : if (!(buf = GDKmalloc(buflen)))
4593 0 : throw(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4594 4 : if ((msg = str_lpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
4595 0 : GDKfree(buf);
4596 0 : return msg;
4597 : }
4598 4 : *res = GDKstrdup(buf);
4599 : }
4600 :
4601 4 : GDKfree(buf);
4602 4 : if (!*res)
4603 0 : msg = createException(MAL, "str.lpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4604 : return msg;
4605 : }
4606 :
4607 : str
4608 8 : str_rpad(str *buf, size_t *buflen, const char *s, int len)
4609 : {
4610 4 : return pad(buf, buflen, s, " ", len, 0, "str.lpad");
4611 : }
4612 :
4613 : /* Fill up 'arg1' to length 'len' by appending whitespaces.
4614 : * If 'arg1' is already longer than 'len', then it's truncated (on the right)
4615 : * (NB: this is the PostgreSQL definition).
4616 : *
4617 : * Example: rpad('hi', 5)
4618 : * Result: 'hi '
4619 : */
4620 : static str
4621 4 : STRRpad(str *res, const str *arg1, const int *len)
4622 : {
4623 4 : str buf = NULL, msg = MAL_SUCCEED;
4624 4 : const char *s = *arg1;
4625 4 : int l = *len;
4626 :
4627 8 : if (strNil(s) || is_int_nil(l)) {
4628 0 : *res = GDKstrdup(str_nil);
4629 : } else {
4630 4 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4631 :
4632 4 : *res = NULL;
4633 4 : if (!(buf = GDKmalloc(buflen)))
4634 0 : throw(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4635 4 : if ((msg = str_rpad(&buf, &buflen, s, l)) != MAL_SUCCEED) {
4636 0 : GDKfree(buf);
4637 0 : return msg;
4638 : }
4639 4 : *res = GDKstrdup(buf);
4640 : }
4641 :
4642 4 : GDKfree(buf);
4643 4 : if (!*res)
4644 0 : msg = createException(MAL, "str.rpad", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4645 : return msg;
4646 : }
4647 :
4648 : str
4649 22 : str_lpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
4650 : {
4651 16 : return pad(buf, buflen, s, s2, len, 1, "str.lpad2");
4652 : }
4653 :
4654 : /* Fill up 'arg1' to length 'len' by prepending characters from 'arg2'
4655 : * If 'arg1' is already longer than 'len', then it's truncated on the right
4656 : * (NB: this is the PostgreSQL definition).
4657 : *
4658 : * Example: lpad('hi', 5, 'xy')
4659 : * Result: xyxhi
4660 : */
4661 : static str
4662 6 : STRLpad3(str *res, const str *arg1, const int *len, const str *arg2)
4663 : {
4664 6 : str buf = NULL, msg = MAL_SUCCEED;
4665 6 : const char *s = *arg1, *s2 = *arg2;
4666 6 : int l = *len;
4667 :
4668 18 : if (strNil(s) || strNil(s2) || is_int_nil(l)) {
4669 0 : *res = GDKstrdup(str_nil);
4670 : } else {
4671 6 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4672 :
4673 6 : *res = NULL;
4674 6 : if (!(buf = GDKmalloc(buflen)))
4675 0 : throw(MAL, "str.lpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4676 6 : if ((msg = str_lpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
4677 0 : GDKfree(buf);
4678 0 : return msg;
4679 : }
4680 6 : *res = GDKstrdup(buf);
4681 : }
4682 :
4683 6 : GDKfree(buf);
4684 6 : if (!*res)
4685 0 : msg = createException(MAL, "str.lpad2",
4686 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4687 : return msg;
4688 : }
4689 :
4690 : str
4691 22 : str_rpad3(str *buf, size_t *buflen, const char *s, int len, const char *s2)
4692 : {
4693 16 : return pad(buf, buflen, s, s2, len, 0, "str.rpad2");
4694 : }
4695 :
4696 : /* Fill up 'arg1' to length 'len' by appending characters from 'arg2'
4697 : * If 'arg1' is already longer than 'len', then it's truncated (on the right)
4698 : * (NB: this is the PostgreSQL definition).
4699 : *
4700 : * Example: rpad('hi', 5, 'xy')
4701 : * Result: hixyx
4702 : */
4703 : static str
4704 6 : STRRpad3(str *res, const str *arg1, const int *len, const str *arg2)
4705 : {
4706 6 : str buf = NULL, msg = MAL_SUCCEED;
4707 6 : const char *s = *arg1, *s2 = *arg2;
4708 6 : int l = *len;
4709 :
4710 18 : if (strNil(s) || strNil(s2) || is_int_nil(l)) {
4711 0 : *res = GDKstrdup(str_nil);
4712 : } else {
4713 6 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4714 :
4715 6 : *res = NULL;
4716 6 : if (!(buf = GDKmalloc(buflen)))
4717 0 : throw(MAL, "str.rpad2", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4718 6 : if ((msg = str_rpad3(&buf, &buflen, s, l, s2)) != MAL_SUCCEED) {
4719 0 : GDKfree(buf);
4720 0 : return msg;
4721 : }
4722 6 : *res = GDKstrdup(buf);
4723 : }
4724 :
4725 6 : GDKfree(buf);
4726 6 : if (!*res)
4727 0 : msg = createException(MAL, "str.rpad2",
4728 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4729 : return msg;
4730 : }
4731 :
4732 : str
4733 94941 : str_substitute(str *buf, size_t *buflen, const char *s, const char *src,
4734 : const char *dst, bit repeat)
4735 : {
4736 94941 : size_t lsrc = strlen(src), ldst = strlen(dst), n, l = strlen(s);
4737 94941 : char *b, *fnd;
4738 94941 : const char *pfnd;
4739 :
4740 94941 : if (!lsrc || !l) { /* s/src is an empty string, there's nothing to substitute */
4741 7 : l++;
4742 7 : CHECK_STR_BUFFER_LENGTH(buf, buflen, l, "str.substitute");
4743 7 : strcpy(*buf, s);
4744 7 : return MAL_SUCCEED;
4745 : }
4746 :
4747 94934 : n = l + ldst;
4748 94934 : if (repeat && ldst > lsrc)
4749 73071 : n = (ldst * l) / lsrc; /* max length */
4750 :
4751 94934 : n++;
4752 94934 : CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.substitute");
4753 94934 : b = *buf;
4754 94934 : pfnd = s;
4755 100426 : do {
4756 100426 : fnd = strstr(pfnd, src);
4757 100426 : if (fnd == NULL)
4758 : break;
4759 5508 : n = fnd - pfnd;
4760 5508 : if (n > 0) {
4761 4646 : strcpy_len(b, pfnd, n + 1);
4762 4646 : b += n;
4763 : }
4764 5508 : if (ldst > 0) {
4765 401 : strcpy_len(b, dst, ldst + 1);
4766 385 : b += ldst;
4767 : }
4768 5492 : if (*fnd == 0)
4769 : break;
4770 5492 : pfnd = fnd + lsrc;
4771 5492 : } while (repeat);
4772 94918 : strcpy(b, pfnd);
4773 94918 : return MAL_SUCCEED;
4774 : }
4775 :
4776 : static str
4777 197 : STRSubstitute(str *res, const str *arg1, const str *arg2, const str *arg3,
4778 : const bit *g)
4779 : {
4780 197 : str buf = NULL, msg = MAL_SUCCEED;
4781 197 : const char *s = *arg1, *s2 = *arg2, *s3 = *arg3;
4782 :
4783 590 : if (strNil(s) || strNil(s2) || strNil(s3)) {
4784 2 : *res = GDKstrdup(str_nil);
4785 : } else {
4786 195 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4787 :
4788 195 : *res = NULL;
4789 195 : if (!(buf = GDKmalloc(buflen)))
4790 0 : throw(MAL, "str.substitute", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4791 195 : if ((msg = str_substitute(&buf, &buflen, s, s2, s3, *g)) != MAL_SUCCEED) {
4792 0 : GDKfree(buf);
4793 0 : return msg;
4794 : }
4795 195 : *res = GDKstrdup(buf);
4796 : }
4797 :
4798 197 : GDKfree(buf);
4799 197 : if (!*res)
4800 0 : msg = createException(MAL, "str.substitute",
4801 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4802 : return msg;
4803 : }
4804 :
4805 : static str
4806 9 : STRascii(int *ret, const str *s)
4807 : {
4808 9 : return str_wchr_at(ret, *s, 0);
4809 : }
4810 :
4811 : str
4812 3811 : str_substring_tail(str *buf, size_t *buflen, const char *s, int start)
4813 : {
4814 3811 : if (start < 1)
4815 : start = 1;
4816 3811 : start--;
4817 3803 : return str_tail(buf, buflen, s, start);
4818 : }
4819 :
4820 : static str
4821 8 : STRsubstringTail(str *res, const str *arg1, const int *start)
4822 : {
4823 8 : str buf = NULL, msg = MAL_SUCCEED;
4824 8 : const char *s = *arg1;
4825 8 : int st = *start;
4826 :
4827 16 : if (strNil(s) || is_int_nil(st)) {
4828 0 : *res = GDKstrdup(str_nil);
4829 : } else {
4830 8 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4831 :
4832 8 : *res = NULL;
4833 8 : if (!(buf = GDKmalloc(buflen)))
4834 0 : throw(MAL, "str.substringTail", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4835 8 : if ((msg = str_substring_tail(&buf, &buflen, s, st)) != MAL_SUCCEED) {
4836 0 : GDKfree(buf);
4837 0 : return msg;
4838 : }
4839 8 : *res = GDKstrdup(buf);
4840 : }
4841 :
4842 8 : GDKfree(buf);
4843 8 : if (!*res)
4844 0 : msg = createException(MAL, "str.substringTail",
4845 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4846 : return msg;
4847 : }
4848 :
4849 : str
4850 3662257 : str_sub_string(str *buf, size_t *buflen, const char *s, int start, int l)
4851 : {
4852 3662257 : if (start < 1)
4853 : start = 1;
4854 3662257 : start--;
4855 3662237 : return str_Sub_String(buf, buflen, s, start, l);
4856 : }
4857 :
4858 : static str
4859 23 : STRsubstring(str *res, const str *arg1, const int *start, const int *ll)
4860 : {
4861 23 : str buf = NULL, msg = MAL_SUCCEED;
4862 23 : const char *s = *arg1;
4863 23 : int st = *start, l = *ll;
4864 :
4865 46 : if (strNil(s) || is_int_nil(st) || is_int_nil(l)) {
4866 3 : *res = GDKstrdup(str_nil);
4867 : } else {
4868 20 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4869 :
4870 20 : *res = NULL;
4871 20 : if (!(buf = GDKmalloc(buflen)))
4872 0 : throw(MAL, "str.substring", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4873 20 : if ((msg = str_sub_string(&buf, &buflen, s, st, l)) != MAL_SUCCEED) {
4874 0 : GDKfree(buf);
4875 0 : return msg;
4876 : }
4877 20 : *res = GDKstrdup(buf);
4878 : }
4879 :
4880 23 : GDKfree(buf);
4881 23 : if (!*res)
4882 0 : msg = createException(MAL, "str.substring",
4883 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4884 : return msg;
4885 : }
4886 :
4887 : static str
4888 18 : STRprefix(str *res, const str *arg1, const int *ll)
4889 : {
4890 18 : str buf = NULL, msg = MAL_SUCCEED;
4891 18 : const char *s = *arg1;
4892 18 : int l = *ll;
4893 :
4894 36 : if (strNil(s) || is_int_nil(l)) {
4895 0 : *res = GDKstrdup(str_nil);
4896 : } else {
4897 18 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4898 :
4899 18 : *res = NULL;
4900 18 : if (!(buf = GDKmalloc(buflen)))
4901 0 : throw(MAL, "str.prefix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4902 18 : if ((msg = str_Sub_String(&buf, &buflen, s, 0, l)) != MAL_SUCCEED) {
4903 0 : GDKfree(buf);
4904 0 : return msg;
4905 : }
4906 18 : *res = GDKstrdup(buf);
4907 : }
4908 :
4909 18 : GDKfree(buf);
4910 18 : if (!*res)
4911 0 : msg = createException(MAL, "str.prefix",
4912 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4913 : return msg;
4914 : }
4915 :
4916 : str
4917 9 : str_suffix(str *buf, size_t *buflen, const char *s, int l)
4918 : {
4919 9 : int start = (int) (strlen(s) - l);
4920 9 : return str_Sub_String(buf, buflen, s, start, l);
4921 : }
4922 :
4923 : static str
4924 5 : STRsuffix(str *res, const str *arg1, const int *ll)
4925 : {
4926 5 : str buf = NULL, msg = MAL_SUCCEED;
4927 5 : const char *s = *arg1;
4928 5 : int l = *ll;
4929 :
4930 10 : if (strNil(s) || is_int_nil(l)) {
4931 0 : *res = GDKstrdup(str_nil);
4932 : } else {
4933 5 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
4934 :
4935 5 : *res = NULL;
4936 5 : if (!(buf = GDKmalloc(buflen)))
4937 0 : throw(MAL, "str.suffix", SQLSTATE(HY013) MAL_MALLOC_FAIL);
4938 5 : if ((msg = str_suffix(&buf, &buflen, s, l)) != MAL_SUCCEED) {
4939 0 : GDKfree(buf);
4940 0 : return msg;
4941 : }
4942 5 : *res = GDKstrdup(buf);
4943 : }
4944 :
4945 5 : GDKfree(buf);
4946 5 : if (!*res)
4947 0 : msg = createException(MAL, "str.suffix",
4948 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
4949 : return msg;
4950 : }
4951 :
4952 : int
4953 3691 : str_locate2(const char *needle, const char *haystack, int start)
4954 : {
4955 3691 : int off, res;
4956 3691 : char *s;
4957 :
4958 3691 : off = start <= 0 ? 1 : start;
4959 3691 : s = UTF8_strtail(haystack, off - 1);
4960 3700 : res = str_search(s, needle, str_strlen(needle));
4961 3713 : return res >= 0 ? res + off : 0;
4962 : }
4963 :
4964 : static str
4965 28357 : STRlocate3(int *ret, const str *needle, const str *haystack, const int *start)
4966 : {
4967 28357 : const char *s = *needle, *s2 = *haystack;
4968 28357 : int st = *start;
4969 :
4970 56782 : *ret = (strNil(s) || strNil(s2) || is_int_nil(st)) ?
4971 28357 : int_nil :
4972 68 : str_locate2(s, s2, st);
4973 28357 : return MAL_SUCCEED;
4974 : }
4975 :
4976 : static str
4977 16 : STRlocate(int *ret, const str *needle, const str *haystack)
4978 : {
4979 16 : const char *s = *needle, *s2 = *haystack;
4980 :
4981 45 : *ret = (strNil(s) || strNil(s2)) ? int_nil : str_locate2(s, s2, 1);
4982 16 : return MAL_SUCCEED;
4983 : }
4984 :
4985 : str
4986 223 : str_insert(str *buf, size_t *buflen, const char *s, int strt, int l,
4987 : const char *s2)
4988 : {
4989 223 : str v;
4990 223 : int l1 = UTF8_strlen(s);
4991 223 : size_t nextlen;
4992 :
4993 223 : if (l < 0)
4994 0 : throw(MAL, "str.insert",
4995 : SQLSTATE(42000)
4996 : "The number of characters for insert function must be non negative");
4997 223 : if (strt < 0) {
4998 0 : if (-strt <= l1)
4999 0 : strt = l1 + strt;
5000 : else
5001 : strt = 0;
5002 : }
5003 223 : if (strt > l1)
5004 : strt = l1;
5005 :
5006 223 : nextlen = strlen(s) + strlen(s2) + 1;
5007 223 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.insert");
5008 223 : v = *buf;
5009 223 : if (strt > 0)
5010 216 : v = UTF8_strncpy(v, s, strt);
5011 223 : strcpy(v, s2);
5012 223 : if (strt + l < l1)
5013 10 : strcat(v, UTF8_strtail((char *) s, strt + l));
5014 : return MAL_SUCCEED;
5015 : }
5016 :
5017 : static str
5018 225 : STRinsert(str *res, const str *input, const int *start, const int *nchars,
5019 : const str *input2)
5020 : {
5021 225 : str buf = NULL, msg = MAL_SUCCEED;
5022 225 : const char *s = *input, *s2 = *input2;
5023 225 : int st = *start, n = *nchars;
5024 :
5025 449 : if (strNil(s) || is_int_nil(st) || is_int_nil(n) || strNil(s2)) {
5026 2 : *res = GDKstrdup(str_nil);
5027 : } else {
5028 223 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
5029 :
5030 223 : *res = NULL;
5031 223 : if (!(buf = GDKmalloc(buflen)))
5032 0 : throw(MAL, "str.insert", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5033 223 : if ((msg = str_insert(&buf, &buflen, s, st, n, s2)) != MAL_SUCCEED) {
5034 0 : GDKfree(buf);
5035 0 : return msg;
5036 : }
5037 223 : *res = GDKstrdup(buf);
5038 : }
5039 :
5040 225 : GDKfree(buf);
5041 225 : if (!*res)
5042 0 : msg = createException(MAL, "str.insert",
5043 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
5044 : return msg;
5045 : }
5046 :
5047 : static str
5048 197 : STRreplace(str *ret, const str *s1, const str *s2, const str *s3)
5049 : {
5050 197 : bit flag = TRUE;
5051 197 : return STRSubstitute(ret, s1, s2, s3, &flag);
5052 : }
5053 :
5054 : str
5055 15 : str_repeat(str *buf, size_t *buflen, const char *s, int c)
5056 : {
5057 15 : size_t l = strlen(s), nextlen;
5058 :
5059 15 : if (l >= INT_MAX)
5060 0 : throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5061 15 : nextlen = (size_t) c *l + 1;
5062 :
5063 15 : CHECK_STR_BUFFER_LENGTH(buf, buflen, nextlen, "str.repeat");
5064 15 : str t = *buf;
5065 15 : *t = 0;
5066 160043 : for (int i = c; i > 0; i--, t += l)
5067 160028 : strcpy(t, s);
5068 : return MAL_SUCCEED;
5069 : }
5070 :
5071 : static str
5072 11 : STRrepeat(str *res, const str *arg1, const int *c)
5073 : {
5074 11 : str buf = NULL, msg = MAL_SUCCEED;
5075 11 : const char *s = *arg1;
5076 11 : int cc = *c;
5077 :
5078 21 : if (strNil(s) || is_int_nil(cc) || cc < 0) {
5079 1 : *res = GDKstrdup(str_nil);
5080 : } else {
5081 10 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
5082 :
5083 10 : *res = NULL;
5084 10 : if (!(buf = GDKmalloc(buflen)))
5085 0 : throw(MAL, "str.repeat", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5086 10 : if ((msg = str_repeat(&buf, &buflen, s, cc)) != MAL_SUCCEED) {
5087 0 : GDKfree(buf);
5088 0 : return msg;
5089 : }
5090 10 : *res = GDKstrdup(buf);
5091 : }
5092 :
5093 11 : GDKfree(buf);
5094 11 : if (!*res)
5095 0 : msg = createException(MAL, "str.repeat",
5096 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
5097 : return msg;
5098 : }
5099 :
5100 : static str
5101 1 : STRspace(str *res, const int *ll)
5102 : {
5103 1 : str buf = NULL, msg = MAL_SUCCEED;
5104 1 : int l = *ll;
5105 :
5106 1 : if (is_int_nil(l) || l < 0) {
5107 0 : *res = GDKstrdup(str_nil);
5108 : } else {
5109 1 : const char space[] = " ", *s = space;
5110 1 : size_t buflen = INITIAL_STR_BUFFER_LENGTH;
5111 :
5112 1 : *res = NULL;
5113 1 : if (!(buf = GDKmalloc(buflen)))
5114 0 : throw(MAL, "str.space", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5115 1 : if ((msg = str_repeat(&buf, &buflen, s, l)) != MAL_SUCCEED) {
5116 0 : GDKfree(buf);
5117 0 : return msg;
5118 : }
5119 1 : *res = GDKstrdup(buf);
5120 : }
5121 :
5122 1 : GDKfree(buf);
5123 1 : if (!*res)
5124 0 : msg = createException(MAL, "str.space",
5125 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
5126 : return msg;
5127 : }
5128 :
5129 : static str
5130 4 : STRasciify(str *r, const str *s)
5131 : {
5132 : #ifdef HAVE_ICONV
5133 :
5134 4 : if (strNil(*s)) {
5135 0 : if ((*r = GDKstrdup(str_nil)) == NULL)
5136 0 : throw(MAL, "str.asciify", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5137 : else
5138 : return MAL_SUCCEED;
5139 : }
5140 :
5141 4 : iconv_t cd;
5142 4 : const str f = "UTF-8", t = "ASCII//TRANSLIT";
5143 4 : str in = *s, out;
5144 4 : size_t in_len = strlen(in), out_len = in_len * 4; /* oversized as a single utf8 char could change into multiple ascii char */
5145 :
5146 4 : if ((cd = iconv_open(t, f)) == (iconv_t) (-1))
5147 0 : throw(MAL, "str.asciify", "ICONV: cannot convert from (%s) to (%s).", f, t);
5148 :
5149 4 : if ((*r = out = GDKmalloc(out_len)) == NULL) {
5150 0 : iconv_close(cd);
5151 0 : throw(MAL, "str.asciify", SQLSTATE(HY013) MAL_MALLOC_FAIL);
5152 : }
5153 :
5154 4 : str o = out;
5155 :
5156 4 : if (iconv(cd, &in, &in_len, &o, &out_len) == (size_t) -1) {
5157 0 : GDKfree(out);
5158 0 : *r = NULL;
5159 0 : iconv_close(cd);
5160 0 : throw(MAL, "str.asciify", "Conversion failed, possibly due to system locale %s.", setlocale(0, NULL));
5161 : }
5162 :
5163 4 : *o = '\0';
5164 4 : iconv_close(cd);
5165 4 : return MAL_SUCCEED;
5166 :
5167 : #else
5168 : throw(MAL, "str.asciify", "ICONV library not available.");
5169 : #endif
5170 : }
5171 :
5172 : static inline void
5173 306 : BBPnreclaim(int nargs, ...)
5174 : {
5175 306 : va_list valist;
5176 306 : va_start(valist, nargs);
5177 1346 : for (int i = 0; i < nargs; i++) {
5178 1030 : BAT *b = va_arg(valist, BAT *);
5179 1628 : BBPreclaim(b);
5180 : }
5181 316 : va_end(valist);
5182 316 : }
5183 :
5184 : /* scan select loop with or without candidates */
5185 : #define scanloop(TEST, KEEP_NULLS) \
5186 : do { \
5187 : TRC_DEBUG(ALGO, \
5188 : "scanselect(b=%s#"BUNFMT",anti=%d): " \
5189 : "scanselect %s\n", BATgetId(b), BATcount(b), \
5190 : anti, #TEST); \
5191 : if (!s || BATtdense(s)) { \
5192 : for (; p < q; p++) { \
5193 : GDK_CHECK_TIMEOUT(qry_ctx, counter, \
5194 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
5195 : const char *restrict v = BUNtvar(bi, p - off); \
5196 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5197 : vals[cnt++] = p; \
5198 : } \
5199 : } else { \
5200 : for (; p < ncands; p++) { \
5201 : GDK_CHECK_TIMEOUT(qry_ctx, counter, \
5202 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
5203 : oid o = canditer_next(ci); \
5204 : const char *restrict v = BUNtvar(bi, o - off); \
5205 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5206 : vals[cnt++] = o; \
5207 : } \
5208 : } \
5209 : } while (0)
5210 :
5211 : /* scan select loop with or without candidates */
5212 : #define scanloop_anti(TEST, KEEP_NULLS) \
5213 : do { \
5214 : TRC_DEBUG(ALGO, \
5215 : "scanselect(b=%s#"BUNFMT",anti=%d): " \
5216 : "scanselect %s\n", BATgetId(b), BATcount(b), \
5217 : anti, #TEST); \
5218 : if (!s || BATtdense(s)) { \
5219 : for (; p < q; p++) { \
5220 : GDK_CHECK_TIMEOUT(qry_ctx, counter, \
5221 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
5222 : const char *restrict v = BUNtvar(bi, p - off); \
5223 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5224 : vals[cnt++] = p; \
5225 : } \
5226 : } else { \
5227 : for (; p < ncands; p++) { \
5228 : GDK_CHECK_TIMEOUT(qry_ctx, counter, \
5229 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
5230 : oid o = canditer_next(ci); \
5231 : const char *restrict v = BUNtvar(bi, o - off); \
5232 : if ((TEST) || ((KEEP_NULLS) && *v == '\200')) \
5233 : vals[cnt++] = o; \
5234 : } \
5235 : } \
5236 : } while (0)
5237 :
5238 : static str
5239 54 : str_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
5240 : BUN *rcnt, const char *key, bool anti,
5241 : int (*str_cmp)(const char *, const char *, int),
5242 : bool keep_nulls)
5243 : {
5244 54 : if (strNil(key))
5245 : return MAL_SUCCEED;
5246 :
5247 54 : BATiter bi = bat_iterator(b);
5248 54 : BUN cnt = 0, ncands = ci->ncand;
5249 54 : oid off = b->hseqbase, *restrict vals = Tloc(bn, 0);
5250 54 : str msg = MAL_SUCCEED;
5251 54 : int klen = str_strlen(key);
5252 :
5253 53 : size_t counter = 0;
5254 53 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
5255 :
5256 54 : if (anti) /* keep nulls ? (use false for now) */
5257 0 : scanloop_anti(!strNil(v) && str_cmp(v, key, klen) != 0, keep_nulls);
5258 : else
5259 1635 : scanloop(!strNil(v) && str_cmp(v, key, klen) == 0, keep_nulls);
5260 :
5261 0 : bailout:
5262 54 : bat_iterator_end(&bi);
5263 54 : *rcnt = cnt;
5264 54 : return msg;
5265 : }
5266 :
5267 : static str
5268 54 : STRselect(bat *r_id, const bat *b_id, const bat *cb_id, const char *key,
5269 : const bit anti, int (*str_cmp)(const char *, const char *, int),
5270 : const str fname)
5271 : {
5272 54 : str msg = MAL_SUCCEED;
5273 :
5274 54 : BAT *b, *cb = NULL, *r = NULL, *old_s = NULL;;
5275 54 : BUN p = 0, q = 0, rcnt = 0;
5276 54 : struct canditer ci;
5277 54 : bool with_strimps = false,
5278 54 : with_strimps_anti = false;
5279 :
5280 54 : if (!(b = BATdescriptor(*b_id)))
5281 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
5282 :
5283 54 : if (cb_id && !is_bat_nil(*cb_id) && !(cb = BATdescriptor(*cb_id))) {
5284 0 : BBPreclaim(b);
5285 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
5286 : }
5287 :
5288 54 : assert(ATOMstorage(b->ttype) == TYPE_str);
5289 :
5290 54 : if (BAThasstrimps(b)) {
5291 0 : if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
5292 0 : BAT *tmp_s = STRMPfilter(b, cb, key, anti);
5293 0 : if (tmp_s) {
5294 0 : old_s = cb;
5295 0 : cb = tmp_s;
5296 0 : if (!anti)
5297 : with_strimps = true;
5298 : else
5299 0 : with_strimps_anti = true;
5300 : }
5301 : } else {
5302 0 : GDKclrerr();
5303 : }
5304 : }
5305 :
5306 108 : MT_thread_setalgorithm(with_strimps ?
5307 54 : "string_select: strcmp function using strimps" :
5308 : (with_strimps_anti ?
5309 : "string_select: strcmp function using strimps anti"
5310 : : "string_select: strcmp function with no accelerator"));
5311 :
5312 54 : canditer_init(&ci, b, cb);
5313 54 : if (!(r = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
5314 0 : BBPnreclaim(2, b, cb);
5315 0 : throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
5316 : }
5317 :
5318 54 : if (!cb || BATtdense(cb)) {
5319 32 : if (cb) {
5320 22 : assert(BATtdense(cb));
5321 22 : p = (BUN) cb->tseqbase;
5322 22 : q = p + BATcount(cb);
5323 22 : if ((oid) p < b->hseqbase)
5324 : p = b->hseqbase;
5325 22 : if ((oid) q > b->hseqbase + BATcount(b))
5326 : q = b->hseqbase + BATcount(b);
5327 : } else {
5328 32 : p = b->hseqbase;
5329 32 : q = BATcount(b) + b->hseqbase;
5330 : }
5331 : }
5332 :
5333 108 : msg = str_select(r, b, cb, &ci, p, q, &rcnt, key, anti
5334 54 : && !with_strimps_anti, str_cmp, with_strimps_anti);
5335 :
5336 54 : if (!msg) {
5337 54 : BATsetcount(r, rcnt);
5338 54 : r->tsorted = true;
5339 54 : r->trevsorted = r->batCount <= 1;
5340 54 : r->tkey = true;
5341 54 : r->tnil = false;
5342 54 : r->tnonil = true;
5343 108 : r->tseqbase = rcnt == 0 ?
5344 54 : 0 : rcnt == 1 ?
5345 11 : *(const oid *) Tloc(r, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
5346 :
5347 54 : if (with_strimps_anti) {
5348 0 : BAT *rev;
5349 0 : if (old_s) {
5350 0 : rev = BATdiffcand(old_s, r);
5351 : #ifndef NDEBUG
5352 0 : BAT *is = BATintersectcand(old_s, r);
5353 0 : if (is) {
5354 0 : assert(is->batCount == r->batCount);
5355 0 : BBPreclaim(is);
5356 : }
5357 0 : assert(rev->batCount == old_s->batCount - r->batCount);
5358 : #endif
5359 : } else
5360 0 : rev = BATnegcands(b->batCount, r);
5361 :
5362 0 : BBPreclaim(r);
5363 0 : r = rev;
5364 0 : if (r == NULL)
5365 0 : msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
5366 : }
5367 : }
5368 :
5369 54 : if (r && !msg) {
5370 54 : *r_id = r->batCacheid;
5371 54 : BBPkeepref(r);
5372 : } else {
5373 0 : BBPreclaim(r);
5374 : }
5375 :
5376 54 : BBPnreclaim(3, b, cb, old_s);
5377 54 : return msg;
5378 : }
5379 :
5380 : #define STRSELECT_MAPARGS(STK, PCI, R_ID, B_ID, CB_ID, KEY, ICASE, ANTI) \
5381 : do { \
5382 : R_ID = getArgReference(STK, PCI, 0); \
5383 : B_ID = getArgReference(STK, PCI, 1); \
5384 : CB_ID = getArgReference(STK, PCI, 2); \
5385 : KEY = *getArgReference_str(STK, PCI, 3); \
5386 : ICASE = PCI->argc != 5; \
5387 : ANTI = PCI->argc == 5 ? *getArgReference_bit(STK, PCI, 4) : \
5388 : *getArgReference_bit(STK, PCI, 5); \
5389 : } while (0)
5390 :
5391 : /**
5392 : * @r_id: result oid
5393 : * @b_id: input bat oid
5394 : * @cb_id: input bat candidates oid
5395 : * @key: input string
5396 : * @icase: ignore case
5397 : * @anti: anti join
5398 : */
5399 : static str
5400 15 : STRstartswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
5401 : {
5402 15 : (void) cntxt;
5403 15 : (void) mb;
5404 :
5405 15 : bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
5406 15 : char *key = NULL;
5407 15 : bit icase = 0, anti = 0;
5408 :
5409 15 : STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
5410 24 : return STRselect(r_id, b_id, cb_id, key, anti,
5411 : icase ? str_is_iprefix : str_is_prefix, "str.startswithselect");
5412 : }
5413 :
5414 : /**
5415 : * @r_id: result oid
5416 : * @b_id: input bat oid
5417 : * @cb_id: input bat candidates oid
5418 : * @key: input string
5419 : * @icase: ignore case
5420 : * @anti: anti join
5421 : */
5422 : static str
5423 15 : STRendswithselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
5424 : {
5425 15 : (void) cntxt;
5426 15 : (void) mb;
5427 :
5428 15 : bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
5429 15 : char *key = NULL;
5430 15 : bit icase = 0, anti = 0;
5431 :
5432 15 : STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
5433 24 : return STRselect(r_id, b_id, cb_id, key, anti,
5434 : icase ? str_is_isuffix : str_is_suffix, "str.endswithselect");
5435 : }
5436 :
5437 : /**
5438 : * @r_id: result oid
5439 : * @b_id: input bat oid
5440 : * @cb_id: input bat candidates oid
5441 : * @key: input string
5442 : * @icase: ignore case
5443 : * @anti: anti join
5444 : */
5445 : static str
5446 24 : STRcontainsselect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
5447 : {
5448 24 : (void) cntxt;
5449 24 : (void) mb;
5450 :
5451 24 : bat *r_id = NULL, *b_id = NULL, *cb_id = NULL;
5452 24 : char *key = NULL;
5453 24 : bit icase = 0, anti = 0;
5454 :
5455 24 : STRSELECT_MAPARGS(stk, pci, r_id, b_id, cb_id, key, icase, anti);
5456 33 : return STRselect(r_id, b_id, cb_id, key, anti,
5457 : icase ? str_icontains : str_contains, "str.containsselect");
5458 : }
5459 :
5460 : #define APPEND(b, o) (((oid *) b->theap->base)[b->batCount++] = (o))
5461 : #define VALUE(s, x) (s##vars + VarHeapVal(s##vals, (x), s##i.width))
5462 :
5463 : #define set_empty_bat_props(B) \
5464 : do { \
5465 : B->tnil = false; \
5466 : B->tnonil = true; \
5467 : B->tkey = true; \
5468 : B->tsorted = true; \
5469 : B->trevsorted = true; \
5470 : B->tseqbase = 0; \
5471 : } while (0)
5472 :
5473 : #define CONTAINS_JOIN_LOOP(STR_CMP, STR_LEN) \
5474 : do { \
5475 : canditer_init(&rci, r, cr); \
5476 : for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \
5477 : BAT *filtered_sl = NULL; \
5478 : GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit, qry_ctx)); \
5479 : ro = canditer_next(&rci); \
5480 : vr = VALUE(r, ro - rbase); \
5481 : matches = 0; \
5482 : if (!strNil(vr)) { \
5483 : vr_len = STR_LEN; \
5484 : if (with_strimps) \
5485 : filtered_sl = STRMPfilter(l, cl, vr, anti); \
5486 : if (filtered_sl) \
5487 : canditer_init(&lci, l, filtered_sl); \
5488 : else \
5489 : canditer_init(&lci, l, cl); \
5490 : for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \
5491 : lo = canditer_next(&lci); \
5492 : vl = VALUE(l, lo - lbase); \
5493 : if (strNil(vl)) \
5494 : continue; \
5495 : if (STR_CMP) \
5496 : continue; \
5497 : if (BATcount(rl) == BATcapacity(rl)) { \
5498 : newcap = BATgrows(rl); \
5499 : BATsetcount(rl, BATcount(rl)); \
5500 : if (rr) \
5501 : BATsetcount(rr, BATcount(rr)); \
5502 : if (BATextend(rl, newcap) != GDK_SUCCEED || \
5503 : (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
5504 : msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
5505 : goto exit; \
5506 : } \
5507 : assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
5508 : } \
5509 : if (BATcount(rl) > 0) { \
5510 : if (lastl + 1 != lo) \
5511 : rl->tseqbase = oid_nil; \
5512 : if (matches == 0) { \
5513 : if (rr) \
5514 : rr->trevsorted = false; \
5515 : if (lastl > lo) { \
5516 : rl->tsorted = false; \
5517 : rl->tkey = false; \
5518 : } else if (lastl < lo) { \
5519 : rl->trevsorted = false; \
5520 : } else { \
5521 : rl->tkey = false; \
5522 : } \
5523 : } \
5524 : } \
5525 : APPEND(rl, lo); \
5526 : if (rr) \
5527 : APPEND(rr, ro); \
5528 : lastl = lo; \
5529 : matches++; \
5530 : } \
5531 : BBPreclaim(filtered_sl); \
5532 : } \
5533 : if (rr) { \
5534 : if (matches > 1) { \
5535 : rr->tkey = false; \
5536 : rr->tseqbase = oid_nil; \
5537 : rl->trevsorted = false; \
5538 : } else if (matches == 0) { \
5539 : rskipped = BATcount(rr) > 0; \
5540 : } else if (rskipped) { \
5541 : rr->tseqbase = oid_nil; \
5542 : } \
5543 : } else if (matches > 1) { \
5544 : rl->trevsorted = false; \
5545 : } \
5546 : } \
5547 : } while (0)
5548 :
5549 : #define STR_JOIN_NESTED_LOOP(STR_CMP, STR_LEN, FNAME) \
5550 : do { \
5551 : canditer_init(&rci, r, cr); \
5552 : for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \
5553 : GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit, qry_ctx)); \
5554 : ro = canditer_next(&rci); \
5555 : vr = VALUE(r, ro - rbase); \
5556 : matches = 0; \
5557 : if (!strNil(vr)) { \
5558 : vr_len = STR_LEN; \
5559 : canditer_init(&lci, l, cl); \
5560 : for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \
5561 : lo = canditer_next(&lci); \
5562 : vl = VALUE(l, lo - lbase); \
5563 : if (strNil(vl)) \
5564 : continue; \
5565 : if (!(STR_CMP)) \
5566 : continue; \
5567 : if (BATcount(rl) == BATcapacity(rl)) { \
5568 : newcap = BATgrows(rl); \
5569 : BATsetcount(rl, BATcount(rl)); \
5570 : if (rr) \
5571 : BATsetcount(rr, BATcount(rr)); \
5572 : if (BATextend(rl, newcap) != GDK_SUCCEED || \
5573 : (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
5574 : msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
5575 : goto exit; \
5576 : } \
5577 : assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
5578 : } \
5579 : if (BATcount(rl) > 0) { \
5580 : if (last_lo + 1 != lo) \
5581 : rl->tseqbase = oid_nil; \
5582 : if (matches == 0) { \
5583 : if (rr) \
5584 : rr->trevsorted = false; \
5585 : if (last_lo > lo) { \
5586 : rl->tsorted = false; \
5587 : rl->tkey = false; \
5588 : } else if (last_lo < lo) { \
5589 : rl->trevsorted = false; \
5590 : } else { \
5591 : rl->tkey = false; \
5592 : } \
5593 : } \
5594 : } \
5595 : APPEND(rl, lo); \
5596 : if (rr) \
5597 : APPEND(rr, ro); \
5598 : last_lo = lo; \
5599 : matches++; \
5600 : } \
5601 : } \
5602 : if (rr) { \
5603 : if (matches > 1) { \
5604 : rr->tkey = false; \
5605 : rr->tseqbase = oid_nil; \
5606 : rl->trevsorted = false; \
5607 : } else if (matches == 0) { \
5608 : rskipped = BATcount(rr) > 0; \
5609 : } else if (rskipped) { \
5610 : rr->tseqbase = oid_nil; \
5611 : } \
5612 : } else if (matches > 1) { \
5613 : rl->trevsorted = false; \
5614 : } \
5615 : } \
5616 : } while (0)
5617 :
5618 : #define STARTSWITH_SORTED_LOOP(STR_CMP, STR_LEN, FNAME) \
5619 : do { \
5620 : canditer_init(&rci, sorted_r, sorted_cr); \
5621 : canditer_init(&lci, sorted_l, sorted_cl); \
5622 : for (lx = 0; lx < lci.ncand; lx++) { \
5623 : lo = canditer_next(&lci); \
5624 : vl = VALUE(l, lo - lbase); \
5625 : if (!strNil(vl)) \
5626 : break; \
5627 : } \
5628 : for (rx = 0; rx < rci.ncand; rx++) { \
5629 : ro = canditer_next(&rci); \
5630 : vr = VALUE(r, ro - rbase); \
5631 : if (!strNil(vr)) { \
5632 : canditer_setidx(&rci, rx); \
5633 : break; \
5634 : } \
5635 : } \
5636 : for (; rx < rci.ncand; rx++) { \
5637 : GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit, qry_ctx)); \
5638 : ro = canditer_next(&rci); \
5639 : vr = VALUE(r, ro - rbase); \
5640 : vr_len = STR_LEN; \
5641 : matches = 0; \
5642 : for (canditer_setidx(&lci, lx), n = lx; n < lci.ncand; n++) { \
5643 : lo = canditer_next_dense(&lci); \
5644 : vl = VALUE(l, lo - lbase); \
5645 : cmp = STR_CMP; \
5646 : if (cmp < 0) { \
5647 : lx++; \
5648 : continue; \
5649 : } \
5650 : else if (cmp > 0) \
5651 : break; \
5652 : if (BATcount(rl) == BATcapacity(rl)) { \
5653 : newcap = BATgrows(rl); \
5654 : BATsetcount(rl, BATcount(rl)); \
5655 : if (rr) \
5656 : BATsetcount(rr, BATcount(rr)); \
5657 : if (BATextend(rl, newcap) != GDK_SUCCEED || \
5658 : (rr && BATextend(rr, newcap) != GDK_SUCCEED)) { \
5659 : msg = createException(MAL, FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
5660 : goto exit; \
5661 : } \
5662 : assert(!rr || BATcapacity(rl) == BATcapacity(rr)); \
5663 : } \
5664 : if (BATcount(rl) > 0) { \
5665 : if (last_lo + 1 != lo) \
5666 : rl->tseqbase = oid_nil; \
5667 : if (matches == 0) { \
5668 : if (rr) \
5669 : rr->trevsorted = false; \
5670 : if (last_lo > lo) { \
5671 : rl->tsorted = false; \
5672 : rl->tkey = false; \
5673 : } else if (last_lo < lo) { \
5674 : rl->trevsorted = false; \
5675 : } else { \
5676 : rl->tkey = false; \
5677 : } \
5678 : } \
5679 : } \
5680 : APPEND(rl, lo); \
5681 : if (rr) \
5682 : APPEND(rr, ro); \
5683 : last_lo = lo; \
5684 : matches++; \
5685 : } \
5686 : if (rr) { \
5687 : if (matches > 1) { \
5688 : rr->tkey = false; \
5689 : rr->tseqbase = oid_nil; \
5690 : rl->trevsorted = false; \
5691 : } else if (matches == 0) { \
5692 : rskipped = BATcount(rr) > 0; \
5693 : } else if (rskipped) { \
5694 : rr->tseqbase = oid_nil; \
5695 : } \
5696 : } else if (matches > 1) { \
5697 : rl->trevsorted = false; \
5698 : } \
5699 : } \
5700 : } while (0)
5701 :
5702 : static void
5703 951 : do_strrev(char *dst, const char *src, size_t len)
5704 : {
5705 951 : dst[len] = 0;
5706 951 : if (strNil(src)) {
5707 8 : assert(len == strlen(str_nil));
5708 8 : strcpy(dst, str_nil);
5709 8 : return;
5710 : }
5711 7177 : while (*src) {
5712 6234 : if ((*src & 0xF8) == 0xF0) {
5713 0 : assert(len >= 4);
5714 0 : dst[len - 4] = *src++;
5715 0 : assert((*src & 0xC0) == 0x80);
5716 0 : dst[len - 3] = *src++;
5717 0 : assert((*src & 0xC0) == 0x80);
5718 0 : dst[len - 2] = *src++;
5719 0 : assert((*src & 0xC0) == 0x80);
5720 0 : dst[len - 1] = *src++;
5721 0 : len -= 4;
5722 6234 : } else if ((*src & 0xF0) == 0xE0) {
5723 0 : assert(len >= 3);
5724 0 : dst[len - 3] = *src++;
5725 0 : assert((*src & 0xC0) == 0x80);
5726 0 : dst[len - 2] = *src++;
5727 0 : assert((*src & 0xC0) == 0x80);
5728 0 : dst[len - 1] = *src++;
5729 0 : len -= 3;
5730 6234 : } else if ((*src & 0xE0) == 0xC0) {
5731 72 : assert(len >= 2);
5732 72 : dst[len - 2] = *src++;
5733 72 : assert((*src & 0xC0) == 0x80);
5734 72 : dst[len - 1] = *src++;
5735 72 : len -= 2;
5736 : } else {
5737 6162 : assert(len >= 1);
5738 6162 : assert((*src & 0x80) == 0);
5739 6162 : dst[--len] = *src++;
5740 : }
5741 : }
5742 943 : assert(len == 0);
5743 : }
5744 :
5745 : static BAT *
5746 50 : batstr_strrev(BAT *b)
5747 : {
5748 50 : BAT *bn = NULL;
5749 50 : BATiter bi;
5750 50 : BUN p, q;
5751 50 : const char *src;
5752 50 : size_t len;
5753 50 : char *dst;
5754 50 : size_t dstlen;
5755 :
5756 50 : dstlen = 1024;
5757 50 : dst = GDKmalloc(dstlen);
5758 51 : if (dst == NULL)
5759 : return NULL;
5760 :
5761 51 : assert(b->ttype == TYPE_str);
5762 :
5763 51 : bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
5764 50 : if (bn == NULL) {
5765 0 : GDKfree(dst);
5766 0 : return NULL;
5767 : }
5768 :
5769 50 : bi = bat_iterator(b);
5770 979 : BATloop(b, p, q) {
5771 928 : src = (const char *) BUNtail(bi, p);
5772 932 : len = strlen(src);
5773 932 : if (len >= dstlen) {
5774 0 : char *ndst;
5775 0 : dstlen = len + 1024;
5776 0 : ndst = GDKrealloc(dst, dstlen);
5777 0 : if (ndst == NULL) {
5778 0 : bat_iterator_end(&bi);
5779 0 : BBPreclaim(bn);
5780 0 : GDKfree(dst);
5781 0 : return NULL;
5782 : }
5783 : dst = ndst;
5784 : }
5785 932 : do_strrev(dst, src, len);
5786 938 : if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
5787 0 : bat_iterator_end(&bi);
5788 0 : BBPreclaim(bn);
5789 0 : GDKfree(dst);
5790 0 : return NULL;
5791 : }
5792 : }
5793 :
5794 51 : bat_iterator_end(&bi);
5795 51 : GDKfree(dst);
5796 51 : return bn;
5797 : }
5798 :
5799 : static BAT *
5800 33 : batstr_strlower(BAT *b)
5801 : {
5802 33 : BAT *bn = NULL;
5803 33 : BATiter bi;
5804 33 : BUN p, q;
5805 :
5806 33 : assert(b->ttype == TYPE_str);
5807 :
5808 33 : bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
5809 36 : if (bn == NULL)
5810 : return NULL;
5811 :
5812 36 : bi = bat_iterator(b);
5813 153 : BATloop(b, p, q) {
5814 118 : str vb = BUNtail(bi, p), vb_low = NULL;
5815 117 : if (STRlower(&vb_low, &vb)) {
5816 0 : bat_iterator_end(&bi);
5817 0 : BBPreclaim(bn);
5818 0 : return NULL;
5819 : }
5820 117 : if (BUNappend(bn, vb_low, false) != GDK_SUCCEED) {
5821 0 : GDKfree(vb_low);
5822 0 : bat_iterator_end(&bi);
5823 0 : BBPreclaim(bn);
5824 0 : return NULL;
5825 : }
5826 112 : GDKfree(vb_low);
5827 : }
5828 35 : bat_iterator_end(&bi);
5829 35 : return bn;
5830 : }
5831 :
5832 : static str
5833 28 : str_join_nested(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr,
5834 : bit anti, int (*str_cmp)(const char *, const char *, int), str fname)
5835 : {
5836 28 : str msg = MAL_SUCCEED;
5837 :
5838 28 : size_t counter = 0;
5839 28 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
5840 :
5841 28 : TRC_DEBUG(ALGO,
5842 : "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
5843 : "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
5844 : "sr=%s#" BUNFMT "%s%s)\n",
5845 : fname, "nested loop",
5846 : BATgetId(l), BATcount(l), ATOMname(l->ttype),
5847 : l->tsorted ? "-sorted" : "",
5848 : l->trevsorted ? "-revsorted" : "",
5849 : BATgetId(r), BATcount(r), ATOMname(r->ttype),
5850 : r->tsorted ? "-sorted" : "",
5851 : r->trevsorted ? "-revsorted" : "",
5852 : cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
5853 : cl && cl->tsorted ? "-sorted" : "",
5854 : cl && cl->trevsorted ? "-revsorted" : "",
5855 : cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
5856 : cr && cr->tsorted ? "-sorted" : "",
5857 : cr && cr->trevsorted ? "-revsorted" : "");
5858 :
5859 84 : assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
5860 28 : assert(ATOMtype(l->ttype) == TYPE_str);
5861 :
5862 28 : BATiter li = bat_iterator(l);
5863 28 : BATiter ri = bat_iterator(r);
5864 28 : assert(ri.vh && r->ttype);
5865 :
5866 28 : struct canditer lci, rci;
5867 28 : oid lbase = l->hseqbase,
5868 28 : rbase = r->hseqbase,
5869 28 : lo, ro, last_lo = 0;
5870 28 : const char *lvals = (const char *) li.base,
5871 28 : *rvals = (const char *) ri.base,
5872 28 : *lvars = li.vh->base,
5873 28 : *rvars = ri.vh->base,
5874 : *vl, *vr;
5875 28 : BUN matches, newcap;
5876 28 : int rskipped = 0, vr_len = 0;
5877 :
5878 28 : if (anti)
5879 0 : STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) != 0), str_strlen(vr), fname);
5880 : else
5881 615 : STR_JOIN_NESTED_LOOP((str_cmp(vl, vr, vr_len) == 0), str_strlen(vr), fname);
5882 :
5883 28 : assert(!rr || BATcount(rl) == BATcount(rr));
5884 28 : BATsetcount(rl, BATcount(rl));
5885 28 : if (rr)
5886 28 : BATsetcount(rr, BATcount(rr));
5887 :
5888 27 : if (BATcount(rl) > 0) {
5889 15 : if (BATtdense(rl))
5890 11 : rl->tseqbase = ((oid *) rl->theap->base)[0];
5891 15 : if (rr && BATtdense(rr))
5892 12 : rr->tseqbase = ((oid *) rr->theap->base)[0];
5893 : } else {
5894 12 : rl->tseqbase = 0;
5895 12 : if (rr)
5896 12 : rr->tseqbase = 0;
5897 : }
5898 :
5899 27 : TRC_DEBUG(ALGO,
5900 : "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
5901 : fname,
5902 : BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
5903 : rl->tsorted ? "-sorted" : "",
5904 : rl->trevsorted ? "-revsorted" : "",
5905 : rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
5906 : rr && rr->tsorted ? "-sorted" : "",
5907 : rr && rr->trevsorted ? "-revsorted" : "");
5908 :
5909 27 : exit:
5910 27 : bat_iterator_end(&li);
5911 28 : bat_iterator_end(&ri);
5912 26 : return msg;
5913 : }
5914 :
5915 : static str
5916 58 : contains_join(BAT *rl, BAT *rr, BAT *l, BAT *r, BAT *cl, BAT *cr, bit anti,
5917 : int (*str_cmp)(const char *, const char *, int), const str fname)
5918 : {
5919 58 : str msg = MAL_SUCCEED;
5920 :
5921 58 : size_t counter = 0;
5922 58 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
5923 :
5924 59 : TRC_DEBUG(ALGO,
5925 : "(%s, l=%s#" BUNFMT "[%s]%s%s,"
5926 : "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
5927 : "sr=%s#" BUNFMT "%s%s)\n",
5928 : fname,
5929 : BATgetId(l), BATcount(l), ATOMname(l->ttype),
5930 : l->tsorted ? "-sorted" : "",
5931 : l->trevsorted ? "-revsorted" : "",
5932 : BATgetId(r), BATcount(r), ATOMname(r->ttype),
5933 : r->tsorted ? "-sorted" : "",
5934 : r->trevsorted ? "-revsorted" : "",
5935 : cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
5936 : cl && cl->tsorted ? "-sorted" : "",
5937 : cl && cl->trevsorted ? "-revsorted" : "",
5938 : cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
5939 : cr && cr->tsorted ? "-sorted" : "",
5940 : cr && cr->trevsorted ? "-revsorted" : "");
5941 :
5942 59 : bool with_strimps = false;
5943 :
5944 59 : if (BAThasstrimps(l)) {
5945 16 : with_strimps = true;
5946 16 : if (STRMPcreate(l, NULL) != GDK_SUCCEED) {
5947 0 : GDKclrerr();
5948 0 : with_strimps = false;
5949 : }
5950 : }
5951 :
5952 192 : assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
5953 64 : assert(ATOMtype(l->ttype) == TYPE_str);
5954 :
5955 64 : BATiter li = bat_iterator(l);
5956 64 : BATiter ri = bat_iterator(r);
5957 64 : assert(ri.vh && r->ttype);
5958 :
5959 64 : struct canditer lci, rci;
5960 64 : oid lbase = l->hseqbase,
5961 64 : rbase = r->hseqbase,
5962 64 : lo, ro, lastl = 0;
5963 64 : const char *lvals = (const char *) li.base,
5964 64 : *rvals = (const char *) ri.base,
5965 64 : *lvars = li.vh->base,
5966 64 : *rvars = ri.vh->base,
5967 : *vl, *vr;
5968 64 : int rskipped = 0, vr_len = 0;
5969 64 : BUN matches, newcap;
5970 :
5971 64 : if (anti)
5972 0 : CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) == 0, str_strlen(vr));
5973 : else
5974 19571 : CONTAINS_JOIN_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr));
5975 :
5976 62 : assert(!rr || BATcount(rl) == BATcount(rr));
5977 62 : BATsetcount(rl, BATcount(rl));
5978 63 : if (rr)
5979 63 : BATsetcount(rr, BATcount(rr));
5980 61 : if (BATcount(rl) > 0) {
5981 49 : if (BATtdense(rl))
5982 15 : rl->tseqbase = ((oid *) rl->theap->base)[0];
5983 49 : if (rr && BATtdense(rr))
5984 15 : rr->tseqbase = ((oid *) rr->theap->base)[0];
5985 : } else {
5986 12 : rl->tseqbase = 0;
5987 12 : if (rr)
5988 12 : rr->tseqbase = 0;
5989 : }
5990 :
5991 61 : TRC_DEBUG(ALGO,
5992 : "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
5993 : fname,
5994 : BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
5995 : rl->tsorted ? "-sorted" : "",
5996 : rl->trevsorted ? "-revsorted" : "",
5997 : rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
5998 : rr && rr->tsorted ? "-sorted" : "",
5999 : rr && rr->trevsorted ? "-revsorted" : "");
6000 61 : exit:
6001 61 : bat_iterator_end(&li);
6002 63 : bat_iterator_end(&ri);
6003 64 : return msg;
6004 : }
6005 :
6006 : static str
6007 52 : startswith_join(BAT **rl_ptr, BAT **rr_ptr, BAT *l, BAT *r, BAT *cl, BAT *cr,
6008 : bit anti, int (*str_cmp)(const char *, const char *, int), str fname)
6009 : {
6010 52 : str msg = MAL_SUCCEED;
6011 52 : gdk_return rc;
6012 :
6013 52 : size_t counter = 0;
6014 52 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
6015 :
6016 52 : assert(*rl_ptr && *rr_ptr);
6017 :
6018 52 : BAT *sorted_l = NULL, *sorted_r = NULL,
6019 52 : *sorted_cl = NULL, *sorted_cr = NULL,
6020 52 : *ord_sorted_l = NULL, *ord_sorted_r = NULL,
6021 52 : *proj_rl = NULL, *proj_rr = NULL,
6022 52 : *rl = *rl_ptr, *rr = *rr_ptr;
6023 :
6024 52 : TRC_DEBUG(ALGO,
6025 : "(%s, %s, l=%s#" BUNFMT "[%s]%s%s,"
6026 : "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
6027 : "sr=%s#" BUNFMT "%s%s)\n",
6028 : fname, "sorted inputs",
6029 : BATgetId(l), BATcount(l), ATOMname(l->ttype),
6030 : l->tsorted ? "-sorted" : "",
6031 : l->trevsorted ? "-revsorted" : "",
6032 : BATgetId(r), BATcount(r), ATOMname(r->ttype),
6033 : r->tsorted ? "-sorted" : "",
6034 : r->trevsorted ? "-revsorted" : "",
6035 : cl ? BATgetId(cl) : "NULL", cl ? BATcount(cl) : 0,
6036 : cl && cl->tsorted ? "-sorted" : "",
6037 : cl && cl->trevsorted ? "-revsorted" : "",
6038 : cr ? BATgetId(cr) : "NULL", cr ? BATcount(cr) : 0,
6039 : cr && cr->tsorted ? "-sorted" : "",
6040 : cr && cr->trevsorted ? "-revsorted" : "");
6041 :
6042 52 : bool l_sorted = BATordered(l);
6043 52 : bool r_sorted = BATordered(r);
6044 :
6045 52 : if (l_sorted == FALSE) {
6046 38 : rc = BATsort(&sorted_l, &ord_sorted_l, NULL,
6047 : l, NULL, NULL, false, false, false);
6048 36 : if (rc != GDK_SUCCEED) {
6049 0 : throw(MAL, fname, "Sorting left input failed");
6050 : } else {
6051 36 : if (cl) {
6052 0 : rc = BATsort(&sorted_cl, NULL, NULL,
6053 : cl, ord_sorted_l, NULL, false, false, false);
6054 0 : if (rc != GDK_SUCCEED) {
6055 0 : BBPnreclaim(2, sorted_l, ord_sorted_l);
6056 0 : throw(MAL, fname, "Sorting left candidates input failed");
6057 : }
6058 : }
6059 : }
6060 : } else {
6061 14 : sorted_l = l;
6062 14 : sorted_cl = cl;
6063 : }
6064 :
6065 50 : if (r_sorted == FALSE) {
6066 34 : rc = BATsort(&sorted_r, &ord_sorted_r, NULL,
6067 : r, NULL, NULL, false, false, false);
6068 36 : if (rc != GDK_SUCCEED) {
6069 0 : BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
6070 0 : throw(MAL, fname, "Sorting right input failed");
6071 : } else {
6072 36 : if (cr) {
6073 0 : rc = BATsort(&sorted_cr, NULL, NULL,
6074 : cr, ord_sorted_r, NULL, false, false, false);
6075 0 : if (rc != GDK_SUCCEED) {
6076 0 : BBPnreclaim(5, sorted_l, ord_sorted_l, sorted_cl, sorted_r, ord_sorted_r);
6077 0 : throw(MAL, fname, "Sorting right candidates input failed");
6078 : }
6079 : }
6080 : }
6081 : } else {
6082 16 : sorted_r = r;
6083 16 : sorted_cr = cr;
6084 : }
6085 :
6086 52 : assert(BATordered(sorted_l) && BATordered(sorted_r));
6087 :
6088 52 : BATiter li = bat_iterator(sorted_l);
6089 51 : BATiter ri = bat_iterator(sorted_r);
6090 51 : assert(ri.vh && r->ttype);
6091 :
6092 51 : struct canditer lci, rci;
6093 51 : oid lbase = sorted_l->hseqbase,
6094 51 : rbase = sorted_r->hseqbase,
6095 51 : lo, ro, last_lo = 0;
6096 51 : const char *lvals = (const char *) li.base,
6097 51 : *rvals = (const char *) ri.base,
6098 51 : *lvars = li.vh->base,
6099 51 : *rvars = ri.vh->base,
6100 : *vl, *vr;
6101 51 : BUN matches, newcap, n = 0, rx = 0, lx = 0;
6102 51 : int rskipped = 0, vr_len = 0, cmp = 0;
6103 :
6104 51 : if (anti)
6105 0 : STR_JOIN_NESTED_LOOP(str_cmp(vl, vr, vr_len) != 0, str_strlen(vr), fname);
6106 : else
6107 1690 : STARTSWITH_SORTED_LOOP(str_cmp(vl, vr, vr_len), str_strlen(vr), fname);
6108 :
6109 52 : assert(!rr || BATcount(rl) == BATcount(rr));
6110 52 : BATsetcount(rl, BATcount(rl));
6111 52 : if (rr)
6112 52 : BATsetcount(rr, BATcount(rr));
6113 :
6114 52 : if (BATcount(rl) > 0) {
6115 28 : if (BATtdense(rl))
6116 16 : rl->tseqbase = ((oid *) rl->theap->base)[0];
6117 28 : if (rr && BATtdense(rr))
6118 14 : rr->tseqbase = ((oid *) rr->theap->base)[0];
6119 : } else {
6120 24 : rl->tseqbase = 0;
6121 24 : if (rr)
6122 24 : rr->tseqbase = 0;
6123 : }
6124 :
6125 52 : if (l_sorted == FALSE) {
6126 38 : proj_rl = BATproject(rl, ord_sorted_l);
6127 36 : if (!proj_rl) {
6128 0 : msg = createException(MAL, fname, "Project left pre-sort order failed");
6129 0 : goto exit;
6130 : } else {
6131 36 : BBPreclaim(rl);
6132 37 : *rl_ptr = proj_rl;
6133 : }
6134 : }
6135 :
6136 51 : if (rr && r_sorted == FALSE) {
6137 35 : proj_rr = BATproject(rr, ord_sorted_r);
6138 34 : if (!proj_rr) {
6139 0 : BBPreclaim(proj_rl);
6140 0 : msg = createException(MAL, fname, "Project right pre-sort order failed");
6141 0 : goto exit;
6142 : } else {
6143 34 : BBPreclaim(rr);
6144 36 : *rr_ptr = proj_rr;
6145 : }
6146 : }
6147 :
6148 52 : TRC_DEBUG(ALGO,
6149 : "(%s, l=%s,r=%s)=(%s#" BUNFMT "%s%s,%s#" BUNFMT "%s%s\n",
6150 : fname,
6151 : BATgetId(l), BATgetId(r), BATgetId(rl), BATcount(rl),
6152 : rl->tsorted ? "-sorted" : "",
6153 : rl->trevsorted ? "-revsorted" : "",
6154 : rr ? BATgetId(rr) : NULL, rr ? BATcount(rr) : 0,
6155 : rr && rr->tsorted ? "-sorted" : "",
6156 : rr && rr->trevsorted ? "-revsorted" : "");
6157 :
6158 52 : exit:
6159 52 : if (l_sorted == FALSE)
6160 37 : BBPnreclaim(3, sorted_l, ord_sorted_l, sorted_cl);
6161 :
6162 53 : if (r_sorted == FALSE)
6163 36 : BBPnreclaim(3, sorted_r, ord_sorted_r, sorted_cr);
6164 :
6165 52 : bat_iterator_end(&li);
6166 51 : bat_iterator_end(&ri);
6167 51 : return msg;
6168 : }
6169 :
6170 : static str
6171 135 : STRjoin(bat *rl_id, bat *rr_id, const bat l_id, const bat r_id,
6172 : const bat cl_id, const bat cr_id, const bit anti, bool icase,
6173 : int (*str_cmp)(const char *, const char *, int), const str fname)
6174 : {
6175 135 : str msg = MAL_SUCCEED;
6176 :
6177 135 : BAT *rl = NULL, *rr = NULL, *l = NULL, *r = NULL, *cl = NULL, *cr = NULL;
6178 :
6179 135 : if (!(l = BATdescriptor(l_id)) || !(r = BATdescriptor(r_id))) {
6180 0 : BBPnreclaim(2, l, r);
6181 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
6182 : }
6183 :
6184 144 : if ((cl_id && !is_bat_nil(cl_id) && (cl = BATdescriptor(cl_id)) == NULL) ||
6185 144 : (cr_id && !is_bat_nil(cr_id) && (cr = BATdescriptor(cr_id)) == NULL)) {
6186 0 : BBPnreclaim(4, l, r, cl, cr);
6187 0 : throw(MAL, fname, RUNTIME_OBJECT_MISSING);
6188 : }
6189 :
6190 144 : rl = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
6191 134 : if (rr_id)
6192 140 : rr = COLnew(0, TYPE_oid, BATcount(l), TRANSIENT);
6193 :
6194 137 : if (!rl || (rr_id && !rr)) {
6195 0 : BBPnreclaim(6, l, r, cl, cr, rl, rr);
6196 0 : throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
6197 : }
6198 :
6199 137 : set_empty_bat_props(rl);
6200 137 : if (rr_id)
6201 137 : set_empty_bat_props(rr);
6202 :
6203 413 : assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
6204 137 : assert(ATOMtype(l->ttype) == TYPE_str);
6205 :
6206 137 : BAT *nl = l, *nr = r;
6207 :
6208 137 : if (strcmp(fname, "str.containsjoin") == 0) {
6209 59 : msg = contains_join(rl, rr, l, r, cl, cr, anti, str_cmp, fname);
6210 63 : if (msg) {
6211 0 : BBPnreclaim(6, rl, rr, l, r, cl, cr);
6212 0 : return msg;
6213 : }
6214 : } else {
6215 78 : struct canditer lci, rci;
6216 78 : canditer_init(&lci, l, cl);
6217 78 : canditer_init(&rci, r, cr);
6218 78 : BUN lcnt = lci.ncand, rcnt = rci.ncand;
6219 78 : BUN nl_cost = lci.ncand * rci.ncand,
6220 78 : sorted_cost =
6221 78 : (BUN) floor(0.8 * (lcnt*log2((double)lcnt)
6222 78 : + rcnt*log2((double)rcnt)));
6223 :
6224 78 : if (nl_cost < sorted_cost) {
6225 28 : msg = str_join_nested(rl, rr, nl, nr, cl, cr, anti, str_cmp, fname);
6226 : } else {
6227 50 : BAT *l_low = NULL, *r_low = NULL, *l_rev = NULL, *r_rev = NULL;
6228 50 : if (icase) {
6229 16 : l_low = batstr_strlower(nl);
6230 18 : if (l_low == NULL) {
6231 0 : BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
6232 0 : throw(MAL, fname, "Failed lowering strings of left input");
6233 : }
6234 18 : r_low = batstr_strlower(nr);
6235 18 : if (r_low == NULL) {
6236 0 : BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_low);
6237 0 : throw(MAL, fname, "Failed lowering strings of right input");
6238 : }
6239 18 : BBPnreclaim(2, nl, nr);
6240 18 : nl = l_low;
6241 18 : nr = r_low;
6242 : }
6243 52 : if (strcmp(fname, "str.endswithjoin") == 0) {
6244 26 : l_rev = batstr_strrev(nl);
6245 25 : if (l_rev == NULL) {
6246 0 : BBPnreclaim(6, rl, rr, nl, nr, cl, cr);
6247 0 : throw(MAL, fname, "Failed reversing strings of left input");
6248 : }
6249 25 : r_rev = batstr_strrev(nr);
6250 26 : if (r_rev == NULL) {
6251 0 : BBPnreclaim(7, rl, rr, nl, nr, cl, cr, l_rev);
6252 0 : throw(MAL, fname, "Failed reversing strings of right input");
6253 : }
6254 26 : BBPnreclaim(2, nl, nr);
6255 26 : nl = l_rev;
6256 26 : nr = r_rev;
6257 : }
6258 52 : msg = startswith_join(&rl, &rr, nl, nr, cl, cr, anti, str_is_prefix, fname);
6259 : }
6260 : }
6261 :
6262 141 : if (!msg) {
6263 141 : *rl_id = rl->batCacheid;
6264 141 : BBPkeepref(rl);
6265 142 : if (rr_id) {
6266 142 : *rr_id = rr->batCacheid;
6267 142 : BBPkeepref(rr);
6268 : }
6269 : } else {
6270 0 : BBPnreclaim(2, rl, rr);
6271 : }
6272 :
6273 141 : BBPnreclaim(4, nl, nr, cl, cr);
6274 141 : return msg;
6275 : }
6276 :
6277 : #define STRJOIN_MAPARGS(STK, PCI, RL_ID, RR_ID, L_ID, R_ID, CL_ID, CR_ID, IC_ID, ANTI) \
6278 : do { \
6279 : RL_ID = getArgReference(STK, PCI, 0); \
6280 : RR_ID = PCI->retc == 1 ? 0 : getArgReference(STK, PCI, 1); \
6281 : int i = PCI->retc == 1 ? 1 : 2; \
6282 : L_ID = getArgReference(STK, PCI, i++); \
6283 : R_ID = getArgReference(STK, PCI, i++); \
6284 : IC_ID = PCI->argc - PCI->retc == 7 ? \
6285 : NULL : getArgReference(stk, pci, i++); \
6286 : CL_ID = getArgReference(STK, PCI, i++); \
6287 : CR_ID = getArgReference(STK, PCI, i++); \
6288 : ANTI = PCI->argc - PCI->retc == 7 ? \
6289 : getArgReference(STK, PCI, 8) : getArgReference(STK, PCI, 9);\
6290 : } while (0)
6291 :
6292 : static inline str
6293 81 : ignorecase(const bat *ic_id, bool *icase, str fname)
6294 : {
6295 81 : BAT *c = NULL;
6296 :
6297 81 : if ((c = BATdescriptor(*ic_id)) == NULL)
6298 0 : throw(MAL, fname, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
6299 :
6300 88 : assert(BATcount(c) == 1);
6301 :
6302 88 : BATiter bi = bat_iterator(c);
6303 88 : *icase = *(bit *) BUNtloc(bi, 0);
6304 88 : bat_iterator_end(&bi);
6305 :
6306 88 : BBPreclaim(c);
6307 88 : return MAL_SUCCEED;
6308 : }
6309 :
6310 : /**
6311 : * @rl_id: result left oid
6312 : * @rr_id: result right oid
6313 : * @l_id: left oid
6314 : * @r_id: right oid
6315 : * @cl_id: candidates left oid
6316 : * @cr_id: candidates right oid
6317 : * @ic_id: ignore case oid
6318 : * @anti: anti join oid
6319 : */
6320 : static str
6321 35 : STRstartswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
6322 : {
6323 35 : (void)cntxt;
6324 35 : (void)mb;
6325 :
6326 35 : str msg = MAL_SUCCEED;
6327 35 : bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
6328 35 : *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
6329 35 : bit *anti = NULL;
6330 35 : bool icase = false;
6331 :
6332 73 : STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
6333 :
6334 40 : if (pci->argc - pci->retc == 8)
6335 32 : msg = ignorecase(ic_id, &icase, "str.startswithjoin");
6336 :
6337 72 : return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
6338 : cl_id ? *cl_id : 0,
6339 : cr_id ? *cr_id : 0,
6340 64 : *anti, icase, icase ? str_is_iprefix : str_is_prefix,
6341 : "str.startswithjoin");
6342 : }
6343 :
6344 : /**
6345 : * @rl_id: result left oid
6346 : * @rr_id: result right oid
6347 : * @l_id: left oid
6348 : * @r_id: right oid
6349 : * @cl_id: candidates left oid
6350 : * @cr_id: candidates right oid
6351 : * @ic_id: ignore case oid
6352 : * @anti: anti join oid
6353 : */
6354 : static str
6355 37 : STRendswithjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
6356 : {
6357 37 : (void) cntxt;
6358 37 : (void) mb;
6359 :
6360 37 : str msg = MAL_SUCCEED;
6361 37 : bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
6362 37 : *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
6363 37 : bit *anti = NULL;
6364 37 : bool icase = false;
6365 :
6366 76 : STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
6367 :
6368 40 : if (pci->argc - pci->retc == 8)
6369 32 : msg = ignorecase(ic_id, &icase, "str.endswithjoin");
6370 :
6371 72 : return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
6372 : cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
6373 64 : *anti, icase, icase ? str_is_isuffix : str_is_suffix,
6374 : "str.endswithjoin");
6375 : }
6376 :
6377 : /**
6378 : * @rl_id: result left oid
6379 : * @rr_id: result right oid
6380 : * @l_id: left oid
6381 : * @r_id: right oid
6382 : * @cl_id: candidates left oid
6383 : * @cr_id: candidates right oid
6384 : * @ic_id: ignore case oid
6385 : * @anti: anti join oid
6386 : */
6387 : static str
6388 56 : STRcontainsjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
6389 : {
6390 56 : (void) cntxt;
6391 56 : (void) mb;
6392 :
6393 56 : str msg = MAL_SUCCEED;
6394 56 : bat *rl_id = NULL, *rr_id = NULL, *l_id = NULL, *r_id = NULL,
6395 56 : *cl_id = NULL, *cr_id = NULL, *ic_id = NULL;
6396 56 : bit *anti = NULL;
6397 56 : bool icase = false;
6398 :
6399 119 : STRJOIN_MAPARGS(stk, pci, rl_id, rr_id, l_id, r_id, cl_id, cr_id, ic_id, anti);
6400 :
6401 63 : if (pci->argc - pci->retc == 8)
6402 24 : msg = ignorecase(ic_id, &icase, "str.containsjoin");
6403 :
6404 87 : return msg ? msg : STRjoin(rl_id, rr_id, *l_id, *r_id,
6405 : cl_id ? *cl_id : 0, cr_id ? *cr_id : 0,
6406 109 : *anti, icase, icase ? str_icontains : str_contains,
6407 : "str.containsjoin");
6408 : }
6409 :
6410 : #include "mel.h"
6411 : mel_func str_init_funcs[] = {
6412 : command("str", "str", STRtostr, false, "Noop routine.", args(1,2, arg("",str),arg("s",str))),
6413 : command("str", "string", STRTail, false, "Return the tail s[offset..n]\nof a string s[0..n].", args(1,3, arg("",str),arg("s",str),arg("offset",int))),
6414 : command("str", "string3", STRSubString, false, "Return substring s[offset..offset+count] of a string s[0..n]", args(1,4, arg("",str),arg("s",str),arg("offset",int),arg("count",int))),
6415 : command("str", "length", STRLength, false, "Return the length of a string.", args(1,2, arg("",int),arg("s",str))),
6416 : command("str", "nbytes", STRBytes, false, "Return the string length in bytes.", args(1,2, arg("",int),arg("s",str))),
6417 : command("str", "unicodeAt", STRWChrAt, false, "get a unicode character\n(as an int) from a string position.", args(1,3, arg("",int),arg("s",str),arg("index",int))),
6418 : command("str", "unicode", STRFromWChr, false, "convert a unicode to a character.", args(1,2, arg("",str),arg("wchar",int))),
6419 : pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring.", args(1,3, arg("",bit),arg("s",str),arg("prefix",str))),
6420 : pattern("str", "startswith", STRstartswith, false, "Check if string starts with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("prefix",str),arg("icase",bit))),
6421 : pattern("str", "endswith", STRendswith, false, "Check if string ends with substring.", args(1,3, arg("",bit),arg("s",str),arg("suffix",str))),
6422 : pattern("str", "endswith", STRendswith, false, "Check if string ends with substring, icase flag.", args(1,4, arg("",bit),arg("s",str),arg("suffix",str),arg("icase",bit))),
6423 : pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle.", args(1,3, arg("",bit),arg("haystack",str),arg("needle",str))),
6424 : pattern("str", "contains", STRcontains, false, "Check if string haystack contains string needle, icase flag.", args(1,4, arg("",bit),arg("haystack",str),arg("needle",str),arg("icase",bit))),
6425 : command("str", "toLower", STRlower, false, "Convert a string to lower case.", args(1,2, arg("",str),arg("s",str))),
6426 : command("str", "toUpper", STRupper, false, "Convert a string to upper case.", args(1,2, arg("",str),arg("s",str))),
6427 : pattern("str", "search", STRstr_search, false, "Search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
6428 : pattern("str", "search", STRstr_search, false, "Search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
6429 : pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring. Returns\nposition, -1 if not found.", args(1,3, arg("",int),arg("s",str),arg("c",str))),
6430 : pattern("str", "r_search", STRrevstr_search, false, "Reverse search for a substring, icase flag. Returns\nposition, -1 if not found.", args(1,4, arg("",int),arg("s",str),arg("c",str),arg("icase",bit))),
6431 : command("str", "splitpart", STRsplitpart, false, "Split string on delimiter. Returns\ngiven field (counting from one.)", args(1,4, arg("",str),arg("s",str),arg("needle",str),arg("field",int))),
6432 : command("str", "trim", STRStrip, false, "Strip whitespaces around a string.", args(1,2, arg("",str),arg("s",str))),
6433 : command("str", "ltrim", STRLtrim, false, "Strip whitespaces from start of a string.", args(1,2, arg("",str),arg("s",str))),
6434 : command("str", "rtrim", STRRtrim, false, "Strip whitespaces from end of a string.", args(1,2, arg("",str),arg("s",str))),
6435 : command("str", "trim2", STRStrip2, false, "Remove the longest string containing only characters from the second string around the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
6436 : command("str", "ltrim2", STRLtrim2, false, "Remove the longest string containing only characters from the second string from the start of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
6437 : command("str", "rtrim2", STRRtrim2, false, "Remove the longest string containing only characters from the second string from the end of the first string.", args(1,3, arg("",str),arg("s",str),arg("s2",str))),
6438 : command("str", "lpad", STRLpad, false, "Fill up a string to the given length prepending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
6439 : command("str", "rpad", STRRpad, false, "Fill up a string to the given length appending the whitespace character.", args(1,3, arg("",str),arg("s",str),arg("len",int))),
6440 : command("str", "lpad3", STRLpad3, false, "Fill up the first string to the given length prepending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
6441 : command("str", "rpad3", STRRpad3, false, "Fill up the first string to the given length appending characters of the second string.", args(1,4, arg("",str),arg("s",str),arg("len",int),arg("s2",str))),
6442 : command("str", "substitute", STRSubstitute, false, "Substitute first occurrence of 'src' by\n'dst'. Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.", args(1,5, arg("",str),arg("s",str),arg("src",str),arg("dst",str),arg("rep",bit))),
6443 : command("str", "like", STRlikewrap, false, "SQL pattern match function", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
6444 : command("str", "like3", STRlikewrap3, false, "SQL pattern match function", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
6445 : command("str", "ascii", STRascii, false, "Return unicode of head of string", args(1,2, arg("",int),arg("s",str))),
6446 : command("str", "substring", STRsubstringTail, false, "Extract the tail of a string", args(1,3, arg("",str),arg("s",str),arg("start",int))),
6447 : command("str", "substring3", STRsubstring, false, "Extract a substring from str starting at start, for length len", args(1,4, arg("",str),arg("s",str),arg("start",int),arg("len",int))),
6448 : command("str", "prefix", STRprefix, false, "Extract the prefix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6449 : command("str", "suffix", STRsuffix, false, "Extract the suffix of a given length", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6450 : command("str", "stringleft", STRprefix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6451 : command("str", "stringright", STRsuffix, false, "", args(1,3, arg("",str),arg("s",str),arg("l",int))),
6452 : command("str", "locate", STRlocate, false, "Locate the start position of a string", args(1,3, arg("",int),arg("s1",str),arg("s2",str))),
6453 : command("str", "locate3", STRlocate3, false, "Locate the start position of a string", args(1,4, arg("",int),arg("s1",str),arg("s2",str),arg("start",int))),
6454 : command("str", "insert", STRinsert, false, "Insert a string into another", args(1,5, arg("",str),arg("s",str),arg("start",int),arg("l",int),arg("s2",str))),
6455 : command("str", "replace", STRreplace, false, "Insert a string into another", args(1,4, arg("",str),arg("s",str),arg("pat",str),arg("s2",str))),
6456 : command("str", "repeat", STRrepeat, false, "", args(1,3, arg("",str),arg("s2",str),arg("c",int))),
6457 : command("str", "space", STRspace, false, "", args(1,2, arg("",str),arg("l",int))),
6458 : command("str", "epilogue", STRepilogue, false, "", args(1,1, arg("",void))),
6459 : command("str", "asciify", STRasciify, false, "Transform string from UTF8 to ASCII", args(1, 2, arg("out",str), arg("in",str))),
6460 : pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("anti",bit))),
6461 : pattern("str", "startswithselect", STRstartswithselect, false, "Select all head values of the first input BAT for which the\ntail value starts with the given prefix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("prefix",str),arg("caseignore",bit),arg("anti",bit))),
6462 : pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("anti",bit))),
6463 : pattern("str", "endswithselect", STRendswithselect, false, "Select all head values of the first input BAT for which the\ntail value end with the given suffix + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("suffix",str),arg("caseignore",bit),arg("anti",bit))),
6464 : pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle.", args(1,5, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("anti",bit))),
6465 : pattern("str", "containsselect", STRcontainsselect, false, "Select all head values of the first input BAT for which the\ntail value contains the given needle + icase.", args(1,6, batarg("",oid),batarg("b",str),batarg("s",oid),arg("needle",str),arg("caseignore",bit),arg("anti",bit))),
6466 : pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6467 : pattern("str", "startswithjoin", STRstartswithjoin, false, "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6468 : pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6469 : pattern("str", "startswithjoin", STRstartswithjoin, false, "The same as STRstartswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6470 : pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6471 : pattern("str", "endswithjoin", STRendswithjoin, false, "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6472 : pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6473 : pattern("str", "endswithjoin", STRendswithjoin, false, "The same as STRendswithjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6474 : pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6475 : pattern("str", "containsjoin", STRcontainsjoin, false, "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
6476 : pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output.", args(1,8,batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6477 : pattern("str", "containsjoin", STRcontainsjoin, false, "The same as STRcontainsjoin, but only produce one output + icase.", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
6478 : { .imp=NULL }
6479 : };
6480 : #include "mal_import.h"
6481 : #ifdef _MSC_VER
6482 : #undef read
6483 : #pragma section(".CRT$XCU",read)
6484 : #endif
6485 334 : LIB_STARTUP_FUNC(init_str_mal)
6486 334 : { mal_module2("str", NULL, str_init_funcs, STRprelude, NULL); }
|