Strings

Str module

The str module contains string manipulation utilities such as upper/lower case conversion, trimming, computing substrings and string search.

MODULE str;

COMMAND str.ascii(X_0:str):int;
COMMENT "Return unicode of head of string";

COMMAND str.asciify(X_0:str):str;
COMMENT "Transform string from UTF8 to ASCII";

PATTERN str.contains(X_0:str, X_1:str):bit;
COMMENT "Check if string haystack contains string needle.";

PATTERN str.contains(X_0:str, X_1:str, X_2:bit):bit;
COMMENT "Check if string chaystack contains string needle, icase flag.";

PATTERN str.containsjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:bit], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit):bat[:oid];
COMMENT "The same as STRcontainsjoin, but only produce one output + icase.";

PATTERN str.containsjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng, X_6:bit):bat[:oid];
COMMENT "The same as STRcontainsjoin, but only produce one output.";

PATTERN str.containsjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:bit], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit) (X_8:bat[:oid], X_9:bat[:oid]);
COMMENT "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.";

PATTERN str.containsjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng, X_6:bit) (X_7:bat[:oid], X_8:bat[:oid]);
COMMENT "Join the string bat L with the bat R if L contains the string of R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.";

PATTERN str.containsselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit):bat[:oid];
COMMENT "Select all head values of the first input BAT for which the\ntail value contains the given needle.";

PATTERN str.containsselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit, X_4:bit):bat[:oid];
COMMENT "Select all head values of the first input BAT for which the\ntail value contains the given needle + icase.";

PATTERN str.endswith(X_0:str, X_1:str):bit;
COMMENT "Check if string ends with substring.";

PATTERN str.endswith(X_0:str, X_1:str, X_2:bit):bit;
COMMENT "Check if string ends with substring, icase flag.";

PATTERN str.endswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:bit], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit):bat[:oid];
COMMENT "The same as STRendswithjoin, but only produce one output + icase.";

PATTERN str.endswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng, X_6:bit):bat[:oid];
COMMENT "The same as STRendswithjoin, but only produce one output.";

PATTERN str.endswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:bit], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit) (X_8:bat[:oid], X_9:bat[:oid]);
COMMENT "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.";

PATTERN str.endswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng, X_6:bit) (X_7:bat[:oid], X_8:bat[:oid]);
COMMENT "Join the string bat L with the suffix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.";

PATTERN str.endswithselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit):bat[:oid];
COMMENT "Select all head values of the first input BAT for which the\ntail value end with the given suffix.";

PATTERN str.endswithselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit, X_4:bit):bat[:oid];
COMMENT "Select all head values of the first input BAT for which the\ntail value end with the given suffix + icase.";

COMMAND str.epilogue():void;
COMMENT "";

COMMAND str.insert(X_0:str, X_1:int, X_2:int, X_3:str):str;
COMMENT "Insert a string into another";

COMMAND str.length(X_0:str):int;
COMMENT "Return the length of a string.";

COMMAND str.like(X_0:str, X_1:str):bit;
COMMENT "SQL pattern match function";

COMMAND str.like3(X_0:str, X_1:str, X_2:str):bit;
COMMENT "SQL pattern match function";

COMMAND str.locate(X_0:str, X_1:str):int;
COMMENT "Locate the start position of a string";

COMMAND str.locate3(X_0:str, X_1:str, X_2:int):int;
COMMENT "Locate the start position of a string";

COMMAND str.lpad(X_0:str, X_1:int):str;
COMMENT "Fill up a string to the given length prepending the whitespace character.";

COMMAND str.lpad3(X_0:str, X_1:int, X_2:str):str;
COMMENT "Fill up the first string to the given length prepending characters of the second string.";

COMMAND str.ltrim(X_0:str):str;
COMMENT "Strip whitespaces from start of a string.";

COMMAND str.ltrim2(X_0:str, X_1:str):str;
COMMENT "Remove the longest string containing only characters from the second string from the start of the first string.";

COMMAND str.nbytes(X_0:str):int;
COMMENT "Return the string length in bytes.";

COMMAND str.prefix(X_0:str, X_1:int):str;
COMMENT "Extract the prefix of a given length";

PATTERN str.r_search(X_0:str, X_1:str):int;
COMMENT "Reverse search for a substring. Returns\nposition, -1 if not found.";

PATTERN str.r_search(X_0:str, X_1:str, X_2:bit):int;
COMMENT "Reverse search for a substring, icase flag. Returns\nposition, -1 if not found.";

COMMAND str.repeat(X_0:str, X_1:int):str;
COMMENT "";

COMMAND str.replace(X_0:str, X_1:str, X_2:str, X_3:str):str;
COMMENT "";

COMMAND str.replace(X_0:str, X_1:str, X_2:str):str;
COMMENT "Insert a string into another";

COMMAND str.rpad(X_0:str, X_1:int):str;
COMMENT "Fill up a string to the given length appending the whitespace character.";

COMMAND str.rpad3(X_0:str, X_1:int, X_2:str):str;
COMMENT "Fill up the first string to the given length appending characters of the second string.";

COMMAND str.rtrim(X_0:str):str;
COMMENT "Strip whitespaces from end of a string.";

COMMAND str.rtrim2(X_0:str, X_1:str):str;
COMMENT "Remove the longest string containing only characters from the second string from the end of the first string.";

PATTERN str.search(X_0:str, X_1:str):int;
COMMENT "Search for a substring. Returns\nposition, -1 if not found.";

PATTERN str.search(X_0:str, X_1:str, X_2:bit):int;
COMMENT "Search for a substring, icase flag. Returns\nposition, -1 if not found.";

COMMAND str.space(X_0:int):str;
COMMENT "";

COMMAND str.splitpart(X_0:str, X_1:str, X_2:int):str;
COMMENT "Split string on delimiter. Returns\ngiven field (counting from one.)";

PATTERN str.startswith(X_0:str, X_1:str):bit;
COMMENT "Check if string starts with substring.";

PATTERN str.startswith(X_0:str, X_1:str, X_2:bit):bit;
COMMENT "Check if string starts with substring, icase flag.";

PATTERN str.startswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:bit], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit):bat[:oid];
COMMENT "The same as STRstartswithjoin, but only produce one output + icase.";

PATTERN str.startswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng, X_6:bit):bat[:oid];
COMMENT "The same as STRstartswithjoin, but only produce one output.";

PATTERN str.startswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:bit], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit) (X_8:bat[:oid], X_9:bat[:oid]);
COMMENT "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows + icase.";

PATTERN str.startswithjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng, X_6:bit) (X_7:bat[:oid], X_8:bat[:oid]);
COMMENT "Join the string bat L with the prefix bat R\nwith optional candidate lists SL and SR\nThe result is two aligned bats with oids of matching rows.";

PATTERN str.startswithselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit):bat[:oid];
COMMENT "Select all head values of the first input BAT for which the\ntail value starts with the given prefix.";

PATTERN str.startswithselect(X_0:bat[:str], X_1:bat[:oid], X_2:str, X_3:bit, X_4:bit):bat[:oid];
COMMENT "Select all head values of the first input BAT for which the\ntail value starts with the given prefix + icase.";

COMMAND str.str(X_0:str):str;
COMMENT "Noop routine.";

COMMAND str.string(X_0:str, X_1:int):str;
COMMENT "Return the tail s[offset..n]\nof a string s[0..n].";

COMMAND str.string3(X_0:str, X_1:int, X_2:int):str;
COMMENT "Return substring s[offset..offset+count] of a string s[0..n]";

COMMAND str.stringleft(X_0:str, X_1:int):str;
COMMENT "";

COMMAND str.stringright(X_0:str, X_1:int):str;
COMMENT "";

COMMAND str.substitute(X_0:str, X_1:str, X_2:str, X_3:bit):str;
COMMENT "Substitute first occurrence of 'src' by\n'dst'.  Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.";

COMMAND str.substring(X_0:str, X_1:int):str;
COMMENT "Extract the tail of a string";

COMMAND str.substring3(X_0:str, X_1:int, X_2:int):str;
COMMENT "Extract a substring from str starting at start, for length len";

COMMAND str.suffix(X_0:str, X_1:int):str;
COMMENT "Extract the suffix of a given length";

COMMAND str.toLower(X_0:str):str;
COMMENT "Convert a string to lower case.";

COMMAND str.toUpper(X_0:str):str;
COMMENT "Convert a string to upper case.";

COMMAND str.trim(X_0:str):str;
COMMENT "Strip whitespaces around a string.";

COMMAND str.trim2(X_0:str, X_1:str):str;
COMMENT "Remove the longest string containing only characters from the second string around the first string.";

COMMAND str.unicode(X_0:int):str;
COMMENT "convert a unicode to a character.";

COMMAND str.unicodeAt(X_0:str, X_1:int):int;
COMMENT "get a unicode character\n(as an int) from a string position.";

Batstr module

MODULE batstr;

PATTERN batstr.ascii(X_0:bat[:str]):bat[:int];
COMMENT "Return unicode of head of string";

PATTERN batstr.ascii(X_0:bat[:str], X_1:bat[:oid]):bat[:int];
COMMENT "Return unicode of head of string";

PATTERN batstr.asciify(X_0:bat[:str]):bat[:str];
COMMENT "Transform BAT of strings from UTF8 to ASCII";

PATTERN batstr.asciify(X_0:bat[:str], X_1:bat[:oid]):bat[:str];
COMMENT "Transform BAT of strings from UTF8 to ASCII";

PATTERN batstr.contains(X_0:bat[:str], X_1:bat[:str]):bat[:bit];
COMMENT "Check if bat string haystack contains bat string needle.";

PATTERN batstr.contains(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:bit];
COMMENT "Check if bat string haystack contains bat string needle (with CLs).";

PATTERN batstr.contains(X_0:bat[:str], X_1:bat[:str], X_2:bit):bat[:bit];
COMMENT "Check if bat string haystack contains bat string needle, icase flag.";

PATTERN batstr.contains(X_0:bat[:str], X_1:bat[:str], X_2:bit, X_3:bat[:oid], X_4:bat[:oid]):bat[:bit];
COMMENT "Check if bat string haystack contains bat string needle (with CLs) + icase flag.";

PATTERN batstr.contains(X_0:bat[:str], X_1:str):bat[:bit];
COMMENT "Check if bat string haystack contains string needle.";

PATTERN batstr.contains(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:bit];
COMMENT "Check if bat string haystack contains string needle (with CL) ends with substring.";

PATTERN batstr.contains(X_0:bat[:str], X_1:str, X_2:bit):bat[:bit];
COMMENT "Check if bat string haystack contains string needle, icase flag.";

PATTERN batstr.contains(X_0:bat[:str], X_1:str, X_2:bit, X_3:bat[:oid]):bat[:bit];
COMMENT "Check if bat string haystack contains string needle (with CL) ends with substring + icase flag.";

PATTERN batstr.contains(X_0:str, X_1:bat[:str]):bat[:bit];
COMMENT "Check if string haystack contains bat string needle.";

PATTERN batstr.contains(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:bit];
COMMENT "Check if string haystack contains bat string needle (with CL).";

PATTERN batstr.contains(X_0:str, X_1:bat[:str], X_2:bit):bat[:bit];
COMMENT "Check if string haystack contains bat string needle + icase flag.";

PATTERN batstr.contains(X_0:str, X_1:bat[:str], X_2:bit, X_3:bat[:oid]):bat[:bit];
COMMENT "Check if string haystack contains bat string needle (with CL) + icase flag.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:bat[:str]):bat[:bit];
COMMENT "Check if bat string ends with bat substring.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:bit];
COMMENT "Check if bat string ends with bat substring (with CLs).";

PATTERN batstr.endswith(X_0:bat[:str], X_1:bat[:str], X_2:bit):bat[:bit];
COMMENT "Check if bat string ends with bat substring, icase flag.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:bat[:str], X_2:bit, X_3:bat[:oid], X_4:bat[:oid]):bat[:bit];
COMMENT "Check if bat string ends with bat substring (with CLs) + icase flag.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:str):bat[:bit];
COMMENT "Check if bat string ends with substring.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:bit];
COMMENT "Check if bat string(with CL) ends with substring.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:str, X_2:bit):bat[:bit];
COMMENT "Check if bat string ends with substring, icase flag.";

PATTERN batstr.endswith(X_0:bat[:str], X_1:str, X_2:bit, X_3:bat[:oid]):bat[:bit];
COMMENT "Check if bat string(with CL) ends with substring + icase flag.";

PATTERN batstr.endswith(X_0:str, X_1:bat[:str]):bat[:bit];
COMMENT "Check if string ends with bat substring.";

PATTERN batstr.endswith(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:bit];
COMMENT "Check if string ends with bat substring(with CL).";

PATTERN batstr.endswith(X_0:str, X_1:bat[:str], X_2:bit):bat[:bit];
COMMENT "Check if string ends with bat substring + icase flag.";

PATTERN batstr.endswith(X_0:str, X_1:bat[:str], X_2:bit, X_3:bat[:oid]):bat[:bit];
COMMENT "Check if string ends with bat substring(with CL) + icase flag.";

PATTERN batstr.insert(X_0:bat[:str], X_1:bat[:int], X_2:bat[:int], X_3:bat[:str]):bat[:str];
COMMENT "Insert a string into another";

PATTERN batstr.insert(X_0:bat[:str], X_1:int, X_2:int, X_3:str):bat[:str];
COMMENT "Insert a string into another";

PATTERN batstr.length(X_0:bat[:str]):bat[:int];
COMMENT "Return the length of a string.";

PATTERN batstr.length(X_0:bat[:str], X_1:bat[:oid]):bat[:int];
COMMENT "Return the length of a string.";

PATTERN batstr.locate(X_0:bat[:str], X_1:bat[:str]):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate(X_0:str, X_1:bat[:str]):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate(X_0:bat[:str], X_1:str):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate3(X_0:bat[:str], X_1:bat[:str], X_2:bat[:int]):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.locate3(X_0:bat[:str], X_1:str, X_2:int):bat[:int];
COMMENT "Locate the start position of a string";

PATTERN batstr.lpad(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "Prepend whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.lpad(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "Prepend whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.lpad(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "Prepend whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.lpad(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Prepend whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.lpad(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "Prepend whitespaces to the strings to reach the given length. Truncate the strings on the right if their lengths is larger than the given length.";

PATTERN batstr.lpad(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "Prepend whitespaces to the strings to reach the given length. Truncate the strings on the right if their lengths is larger than the given length.";

PATTERN batstr.lpad3(X_0:bat[:str], X_1:bat[:int], X_2:bat[:str]):bat[:str];
COMMENT "Prepend the second strings to the first strings to reach the given lengths. Truncate the first strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.lpad3(X_0:bat[:str], X_1:bat[:int], X_2:str):bat[:str];
COMMENT "Prepend the second string to the first strings to reach the given lengths. Truncate the first strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.lpad3(X_0:bat[:str], X_1:int, X_2:bat[:str]):bat[:str];
COMMENT "Prepend the second strings to the first strings to reach the given length. Truncate the first strings on the right if their lengths is larger than the given length.";

PATTERN batstr.lpad3(X_0:bat[:str], X_1:int, X_2:str):bat[:str];
COMMENT "Prepend the second string to the first strings to reach the given length. Truncate the first strings on the right if their lengths is larger than the given length.";

PATTERN batstr.ltrim(X_0:bat[:str]):bat[:str];
COMMENT "Strip whitespaces from start of a string.";

PATTERN batstr.ltrim(X_0:bat[:str], X_1:bat[:oid]):bat[:str];
COMMENT "Strip whitespaces from start of a string.";

PATTERN batstr.ltrim2(X_0:str, X_1:bat[:str]):bat[:str];
COMMENT "Strip characters in the second string from start of the first strings.";

PATTERN batstr.ltrim2(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second string from start of the first strings.";

PATTERN batstr.ltrim2(X_0:bat[:str], X_1:bat[:str]):bat[:str];
COMMENT "Strip characters in the second strings from start of the first strings.";

PATTERN batstr.ltrim2(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second strings from start of the first strings.";

PATTERN batstr.ltrim2(X_0:bat[:str], X_1:str):bat[:str];
COMMENT "Strip characters in the second string from start of the first strings.";

PATTERN batstr.ltrim2(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second string from start of the first strings.";

PATTERN batstr.nbytes(X_0:bat[:str]):bat[:int];
COMMENT "Return the string length in bytes.";

PATTERN batstr.nbytes(X_0:bat[:str], X_1:bat[:oid]):bat[:int];
COMMENT "Return the string length in bytes.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:bat[:str]):bat[:int];
COMMENT "Reverse search for a substring. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:int];
COMMENT "Reverse search for a substring (with CLs). Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:bat[:str], X_2:bit):bat[:int];
COMMENT "Reverse search for a substring + icase flag. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:bat[:str], X_2:bit, X_3:bat[:oid], X_4:bat[:oid]):bat[:int];
COMMENT "Reverse search for a substring (with CLs) + icase flag. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:str):bat[:int];
COMMENT "Reverse search for a substring. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:int];
COMMENT "Reverse search for a substring (with CL). Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:str, X_2:bit):bat[:int];
COMMENT "Reverse search for a substring + icase flag. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:bat[:str], X_1:str, X_2:bit, X_3:bat[:oid]):bat[:int];
COMMENT "Reverse search for a substring (with CL) + icase flag. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:str, X_1:bat[:str]):bat[:int];
COMMENT "Reverse search for a substring. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:int];
COMMENT "Reverse search for a substring (with CL). Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:str, X_1:bat[:str], X_2:bit):bat[:int];
COMMENT "Reverse search for a substring + icase flag. Returns position, -1 if not found.";

PATTERN batstr.r_search(X_0:str, X_1:bat[:str], X_2:bit, X_3:bat[:oid]):bat[:int];
COMMENT "Reverse search for a substring (with CL) + icase flag. Returns position, -1 if not found.";

PATTERN batstr.repeat(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.repeat(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.repeat(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.repeat(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.repeat(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "";

PATTERN batstr.repeat(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.replace(X_0:bat[:str], X_1:bat[:str], X_2:bat[:str]):bat[:str];
COMMENT "Insert a string into another";

PATTERN batstr.replace(X_0:bat[:str], X_1:str, X_2:str):bat[:str];
COMMENT "Insert a string into another";

PATTERN batstr.rpad(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "Append whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.rpad(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "Append whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.rpad(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "Append whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.rpad(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Append whitespaces to the strings to reach the given lengths. Truncate the strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.rpad(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "Append whitespaces to the strings to reach the given length. Truncate the strings on the right if their lengths is larger than the given length.";

PATTERN batstr.rpad(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "Append whitespaces to the strings to reach the given length. Truncate the strings on the right if their lengths is larger than the given length.";

PATTERN batstr.rpad3(X_0:bat[:str], X_1:bat[:int], X_2:bat[:str]):bat[:str];
COMMENT "Append the second strings to the first strings to reach the given lengths. Truncate the first strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.rpad3(X_0:bat[:str], X_1:bat[:int], X_2:str):bat[:str];
COMMENT "Append the second string to the first strings to reach the given lengths. Truncate the first strings on the right if their lengths is larger than the given lengths.";

PATTERN batstr.rpad3(X_0:bat[:str], X_1:int, X_2:bat[:str]):bat[:str];
COMMENT "Append the second strings to the first strings to reach the given length. Truncate the first strings on the right if their lengths is larger than the given length.";

PATTERN batstr.rpad3(X_0:bat[:str], X_1:int, X_2:str):bat[:str];
COMMENT "Append the second string to the first strings to reach the given length. Truncate the first strings on the right if their lengths is larger than the given length.";

PATTERN batstr.rtrim(X_0:bat[:str]):bat[:str];
COMMENT "Strip whitespaces from end of a string.";

PATTERN batstr.rtrim(X_0:bat[:str], X_1:bat[:oid]):bat[:str];
COMMENT "Strip whitespaces from end of a string.";

PATTERN batstr.rtrim2(X_0:str, X_1:bat[:str]):bat[:str];
COMMENT "Strip characters in the second string from end of the first strings.";

PATTERN batstr.rtrim2(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second string from end of the first strings.";

PATTERN batstr.rtrim2(X_0:bat[:str], X_1:bat[:str]):bat[:str];
COMMENT "Strip characters in the second strings from end of the first strings.";

PATTERN batstr.rtrim2(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second strings from end of the first strings.";

PATTERN batstr.rtrim2(X_0:bat[:str], X_1:str):bat[:str];
COMMENT "Strip characters in the second string from end of the first strings.";

PATTERN batstr.rtrim2(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second string from end of the first strings.";

PATTERN batstr.search(X_0:bat[:str], X_1:bat[:str]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found.";

PATTERN batstr.search(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found.";

PATTERN batstr.search(X_0:bat[:str], X_1:bat[:str], X_2:bit):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found, icase flag.";

PATTERN batstr.search(X_0:bat[:str], X_1:bat[:str], X_2:bit, X_3:bat[:oid], X_4:bat[:oid]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found, icase flag.";

PATTERN batstr.search(X_0:bat[:str], X_1:str):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found.";

PATTERN batstr.search(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found.";

PATTERN batstr.search(X_0:bat[:str], X_1:str, X_2:bit):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found, icase flag.";

PATTERN batstr.search(X_0:bat[:str], X_1:str, X_2:bit, X_3:bat[:oid]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found, icase flag.";

PATTERN batstr.search(X_0:str, X_1:bat[:str]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found.";

PATTERN batstr.search(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found.";

PATTERN batstr.search(X_0:str, X_1:bat[:str], X_2:bit):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found, icase flag.";

PATTERN batstr.search(X_0:str, X_1:bat[:str], X_2:bit, X_3:bat[:oid]):bat[:int];
COMMENT "Search for a substring. Returns position, -1 if not found, icase flag.";

PATTERN batstr.space(X_0:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.space(X_0:bat[:int], X_1:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.splitpart(X_0:bat[:str], X_1:bat[:str], X_2:bat[:int]):bat[:str];
COMMENT "Split string on delimiter. Returns\ngiven field (counting from one.)";

PATTERN batstr.splitpart(X_0:bat[:str], X_1:bat[:str], X_2:int):bat[:str];
COMMENT "Split string on delimiter. Returns\ngiven field (counting from one.)";

PATTERN batstr.splitpart(X_0:bat[:str], X_1:str, X_2:bat[:int]):bat[:str];
COMMENT "Split string on delimiter. Returns\ngiven field (counting from one.)";

PATTERN batstr.splitpart(X_0:bat[:str], X_1:str, X_2:int):bat[:str];
COMMENT "Split string on delimiter. Returns\ngiven field (counting from one.)";

PATTERN batstr.startswith(X_0:bat[:str], X_1:bat[:str]):bat[:bit];
COMMENT "Check if bat string starts with bat substring.";

PATTERN batstr.startswith(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:bit];
COMMENT "Check if bat string starts with bat substring (with CLs).";

PATTERN batstr.startswith(X_0:bat[:str], X_1:bat[:str], X_2:bit):bat[:bit];
COMMENT "Check if bat string starts with bat substring, icase flag.";

PATTERN batstr.startswith(X_0:bat[:str], X_1:bat[:str], X_2:bit, X_3:bat[:oid], X_4:bat[:oid]):bat[:bit];
COMMENT "Check if bat string starts with bat substring (with CLs) + icase flag.";

PATTERN batstr.startswith(X_0:bat[:str], X_1:str):bat[:bit];
COMMENT "Check if bat string starts with substring.";

PATTERN batstr.startswith(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:bit];
COMMENT "Check if bat string(with CL) starts with substring.";

PATTERN batstr.startswith(X_0:bat[:str], X_1:str, X_2:bit):bat[:bit];
COMMENT "Check if bat string starts with substring, icase flag.";

PATTERN batstr.startswith(X_0:bat[:str], X_1:str, X_2:bit, X_3:bat[:oid]):bat[:bit];
COMMENT "Check if bat string(with CL) starts with substring + icase flag.";

PATTERN batstr.startswith(X_0:str, X_1:bat[:str]):bat[:bit];
COMMENT "Check if string starts with bat substring.";

PATTERN batstr.startswith(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:bit];
COMMENT "Check if string starts with bat substring(with CL).";

PATTERN batstr.startswith(X_0:str, X_1:bat[:str], X_2:bit):bat[:bit];
COMMENT "Check if string starts with bat substring + icase flag.";

PATTERN batstr.startswith(X_0:str, X_1:bat[:str], X_2:bit, X_3:bat[:oid]):bat[:bit];
COMMENT "Check if string starts with bat substring(with CL) + icase flag.";

PATTERN batstr.string(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "Return the tail s[offset..n] of a string s[0..n].";

PATTERN batstr.string(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Return the tail s[offset..n] of a string s[0..n].";

PATTERN batstr.string(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "Return the tail s[offset..n] of a string s[0..n].";

PATTERN batstr.string(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "Return the tail s[offset..n] of a string s[0..n].";

PATTERN batstr.string(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "Return the tail s[offset..n] of a string s[0..n].";

PATTERN batstr.string(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "Return the tail s[offset..n] of a string s[0..n].";

PATTERN batstr.stringleft(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.stringleft(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.stringleft(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.stringleft(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.stringleft(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "";

PATTERN batstr.stringleft(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.stringright(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.stringright(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.stringright(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "";

PATTERN batstr.stringright(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.stringright(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "";

PATTERN batstr.stringright(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "";

PATTERN batstr.substitute(X_0:bat[:str], X_1:bat[:str], X_2:bat[:str], X_3:bat[:bit]):bat[:str];
COMMENT "Substitute first occurrence of 'src' by\n'dst'. Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.";

PATTERN batstr.substitute(X_0:bat[:str], X_1:str, X_2:str, X_3:bit):bat[:str];
COMMENT "Substitute first occurrence of 'src' by\n'dst'. Iff repeated = true this is\nrepeated while 'src' can be found in the\nresult string. In order to prevent\nrecursion and result strings of unlimited\nsize, repeating is only done iff src is\nnot a substring of dst.";

PATTERN batstr.substring(X_0:bat[:str], X_1:bat[:int]):bat[:str];
COMMENT "Extract the tail of a string";

PATTERN batstr.substring(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Extract the tail of a string";

PATTERN batstr.substring(X_0:str, X_1:bat[:int]):bat[:str];
COMMENT "Extract the tail of a string";

PATTERN batstr.substring(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:str];
COMMENT "Extract the tail of a string";

PATTERN batstr.substring(X_0:bat[:str], X_1:int):bat[:str];
COMMENT "Extract the tail of a string";

PATTERN batstr.substring(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:str];
COMMENT "Extract the tail of a string";

PATTERN batstr.substring3(X_0:bat[:str], X_1:bat[:int], X_2:bat[:int]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:bat[:int], X_2:bat[:int], X_3:bat[:oid], X_4:bat[:oid], X_5:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:str, X_1:int, X_2:bat[:int]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:str, X_1:int, X_2:bat[:int], X_3:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:str, X_1:bat[:int], X_2:int):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:str, X_1:bat[:int], X_2:int, X_3:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:str, X_1:bat[:int], X_2:bat[:int]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:str, X_1:bat[:int], X_2:bat[:int], X_3:bat[:oid], X_4:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:int, X_2:int):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:int, X_2:int, X_3:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:int, X_2:bat[:int]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:int, X_2:bat[:int], X_3:bat[:oid], X_4:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:bat[:int], X_2:int):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.substring3(X_0:bat[:str], X_1:bat[:int], X_2:int, X_3:bat[:oid], X_4:bat[:oid]):bat[:str];
COMMENT "Substring extraction using [start,start+length]";

PATTERN batstr.toLower(X_0:bat[:str]):bat[:str];
COMMENT "Convert a string to lower case.";

PATTERN batstr.toLower(X_0:bat[:str], X_1:bat[:oid]):bat[:str];
COMMENT "Convert a string to lower case.";

PATTERN batstr.toUpper(X_0:bat[:str]):bat[:str];
COMMENT "Convert a string to upper case.";

PATTERN batstr.toUpper(X_0:bat[:str], X_1:bat[:oid]):bat[:str];
COMMENT "Convert a string to upper case.";

PATTERN batstr.trim(X_0:bat[:str]):bat[:str];
COMMENT "Strip whitespaces around a string.";

PATTERN batstr.trim(X_0:bat[:str], X_1:bat[:oid]):bat[:str];
COMMENT "Strip whitespaces around a string.";

PATTERN batstr.trim2(X_0:str, X_1:bat[:str]):bat[:str];
COMMENT "Strip characters in the second string around the first strings.";

PATTERN batstr.trim2(X_0:str, X_1:bat[:str], X_2:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second string around the first strings.";

PATTERN batstr.trim2(X_0:bat[:str], X_1:bat[:str]):bat[:str];
COMMENT "Strip characters in the second strings around the first strings.";

PATTERN batstr.trim2(X_0:bat[:str], X_1:bat[:str], X_2:bat[:oid], X_3:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second strings around the first strings.";

PATTERN batstr.trim2(X_0:bat[:str], X_1:str):bat[:str];
COMMENT "Strip characters in the second string around the first strings.";

PATTERN batstr.trim2(X_0:bat[:str], X_1:str, X_2:bat[:oid]):bat[:str];
COMMENT "Strip characters in the second string around the first strings.";

PATTERN batstr.unicode(X_0:bat[:int]):bat[:str];
COMMENT "convert a unicode to a character.";

PATTERN batstr.unicode(X_0:bat[:int], X_1:bat[:oid]):bat[:str];
COMMENT "convert a unicode to a character.";

PATTERN batstr.unicodeAt(X_0:bat[:str], X_1:bat[:int]):bat[:int];
COMMENT "get a unicode character (as an int) from a string position.";

PATTERN batstr.unicodeAt(X_0:bat[:str], X_1:bat[:int], X_2:bat[:oid], X_3:bat[:oid]):bat[:int];
COMMENT "get a unicode character (as an int) from a string position.";

PATTERN batstr.unicodeAt(X_0:str, X_1:bat[:int]):bat[:int];
COMMENT "get a unicode character (as an int) from a string position.";

PATTERN batstr.unicodeAt(X_0:str, X_1:bat[:int], X_2:bat[:oid]):bat[:int];
COMMENT "get a unicode character (as an int) from a string position.";

PATTERN batstr.unicodeAt(X_0:bat[:str], X_1:int):bat[:int];
COMMENT "get a unicode character (as an int) from a string position.";

PATTERN batstr.unicodeAt(X_0:bat[:str], X_1:int, X_2:bat[:oid]):bat[:int];
COMMENT "get a unicode character (as an int) from a string position.";

PCRE module

The PCRE module is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl, with just a few differences. The current implementation of PCRE (release 4.x) corresponds approximately with Perl 5.8, including support for UTF-8 encoded strings. To use this module the server has to be compiled with the pcre library.

MODULE pcre;

COMMAND pcre.imatch(X_0:str, X_1:str):bit;
COMMENT "Caseless Perl Compatible Regular Expression pattern matching against a string";

COMMAND pcre.index(X_0:pcre, X_1:str):int;
COMMENT "match a pattern, return matched position (or 0 when not found)";

COMMAND pcre.match(X_0:str, X_1:str):bit;
COMMENT "Perl Compatible Regular Expression pattern matching against a string";

COMMAND pcre.patindex(X_0:str, X_1:str):int;
COMMENT "Location of the first POSIX pattern matching against a string";

COMMAND pcre.pcre_quote(X_0:str):str;
COMMENT "Return a PCRE pattern string that matches the argument exactly.";

COMMAND pcre.replace(X_0:str, X_1:str, X_2:str, X_3:str):str;
COMMENT "Replace _all_ matches of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.";

COMMAND pcre.replace_first(X_0:str, X_1:str, X_2:str, X_3:str):str;
COMMENT "Replace _the first_ match of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.";

COMMAND pcre.sql2pcre(X_0:str, X_1:str):str;
COMMENT "Convert a SQL like pattern with the given escape character into a PCRE pattern.";

Batpcre module

MODULE batpcre;

COMMAND batpcre.replace(X_0:bat[:str], X_1:str, X_2:str, X_3:str):bat[:str];
COMMENT "";

COMMAND batpcre.replace_first(X_0:bat[:str], X_1:str, X_2:str, X_3:str):bat[:str];
COMMENT "";

TXTSIM module

The txtsim module provides similarity metrics for strings.

MODULE txtsim;

PATTERN txtsim.dameraulevenshtein(X_0:str, X_1:str):int;
COMMENT "Calculates Damerau-Levenshtein distance between two strings, operation costs (ins/del = 1, replacement = 1, transposition = 2)";

PATTERN txtsim.dameraulevenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int;
COMMENT "Calculates Damerau-Levenshtein distance between two strings, variable operation costs (ins/del, replacement, transposition)";

COMMAND txtsim.editdistance(X_0:str, X_1:str):int;
COMMENT "Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and transpose = 2";

COMMAND txtsim.editdistance2(X_0:str, X_1:str):int;
COMMENT "Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and transpose = 1";

COMMAND txtsim.jarowinkler(X_0:str, X_1:str):dbl;
COMMENT "Calculate Jaro Winkler similarity";

PATTERN txtsim.levenshtein(X_0:str, X_1:str):int;
COMMENT "Calculates Levenshtein distance between two strings, operation costs (ins/del = 1, replacement = 1)";

PATTERN txtsim.levenshtein(X_0:str, X_1:str, X_2:int, X_3:int):int;
COMMENT "Calculates Levenshtein distance between two strings, variable operation costs (ins/del, replacement)";

PATTERN txtsim.levenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int;
COMMENT "(Backwards compatibility purposes) Calculates Damerau-Levenshtein distance between two strings, variable operation costs (ins/del, replacement, transposition)";

PATTERN txtsim.maxlevenshtein(X_0:str, X_1:str, X_2:int):int;
COMMENT "Levenshtein distance with basic costs but up to a MAX";

PATTERN txtsim.maxlevenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int;
COMMENT "Levenshtein distance with variable costs but up to a MAX";

COMMAND txtsim.maxlevenshteinjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:int], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit) (X_8:bat[:oid], X_9:bat[:oid]);
COMMENT "";

COMMAND txtsim.minjarowinkler(X_0:str, X_1:str, X_2:dbl):bit;
COMMENT "";

COMMAND txtsim.minjarowinklerjoin(X_0:bat[:str], X_1:bat[:str], X_2:bat[:dbl], X_3:bat[:oid], X_4:bat[:oid], X_5:bit, X_6:lng, X_7:bit) (X_8:bat[:oid], X_9:bat[:oid]);
COMMENT "";

COMMAND txtsim.qgramnormalize(X_0:str):str;
COMMENT "'Normalizes' strings (eg. toUpper and replaces non-alphanumerics with one space";

COMMAND txtsim.qgramselfjoin(X_0:bat[:oid], X_1:bat[:oid], X_2:bat[:int], X_3:bat[:int], X_4:flt, X_5:int) (X_6:bat[:int], X_7:bat[:int]);
COMMENT "QGram self-join on ordered(!) qgram tables and sub-ordered q-gram positions";

COMMAND txtsim.soundex(X_0:str):str;
COMMENT "Soundex function for phonetic matching";

COMMAND txtsim.str2qgrams(X_0:str):bat[:str];
COMMENT "Break the string into 4-grams";

COMMAND txtsim.stringdiff(X_0:str, X_1:str):int;
COMMENT "Calculate the soundexed editdistance";

Battxtsim module

MODULE battxtsim;

PATTERN battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], X_2:int):bat[:bit];
COMMENT "Same as maxlevenshtein but for BATS";

PATTERN battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], X_2:int, X_3:int, X_4:int):bat[:bit];
COMMENT "Same as maxlevenshtein but for BATS";

Tokenizer module

MODULE tokenizer;

COMMAND tokenizer.append(X_0:str):oid;
COMMENT "tokenize a new string and append it to the tokenizer (duplicate elimination is performed)";

COMMAND tokenizer.close():void;
COMMENT "close the current tokenizer store";

COMMAND tokenizer.depositFile(X_0:str):void;
COMMENT "batch insertion from a file of strings to tokenize, each string is separated by a new line";

COMMAND tokenizer.getCardinality():bat[:lng];
COMMENT "debugging function that returns the unique tokens at each level";

COMMAND tokenizer.getCount():bat[:lng];
COMMENT "debugging function that returns the size of the bats at each level";

COMMAND tokenizer.getIndex():bat[:oid];
COMMENT "administrative function that returns the INDEX bat";

COMMAND tokenizer.getLevel(X_0:int):bat[:str];
COMMENT "administrative function that returns the bat on level i";

PATTERN tokenizer.locate(X_0:str):oid;
COMMENT "if the given string is in the store returns its oid, otherwise oid_nil";

COMMAND tokenizer.open(X_0:str):void;
COMMENT "open the named tokenizer store, a new one is created if the specified name does not exist";

PATTERN tokenizer.take(X_0:oid):str;
COMMENT "reconstruct and returns the i-th string";