view regexp/regexp.c @ 55:68263b10998e

Updated: added bat_iterator_end.
author Sjoerd Mullender <sjoerd@acm.org>
date Wed, 26 Jan 2022 14:59:05 +0100 (2022-01-26)
parents da896864dbbd
children 8122094c79b1
line wrap: on
line source
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0.  If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Copyright 2018-2021 MonetDB B.V.
 */

/* monetdb_config.h must be included as the first include file */
#include <monetdb_config.h>

/* mal_exception.h actually contains everything we need */
#include <mal_exception.h>

/* for the candidate iterator */
#include <gdk_cand.h>

/* system include files */
#include <string.h>

/* we use the PCRE library to do regular expression matching */
#include <pcre.h>

static int
parseflags(const char *flags)
{
	int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */

	if (flags) {
		while (*flags) {
			switch (*flags) {
			case 'i':	/* case insensitive */
				options |= PCRE_CASELESS;
				break;
			case 'x':	/* extended regular expressions */
				options |= PCRE_EXTENDED;
				break;
			case 'm':	/* multiline matching */
				options |= PCRE_MULTILINE | PCRE_DOTALL;;
				break;
			default:
				return -1; /* indicate there was an error */
			}
			flags++;
		}
	}
	return options;
}

static char *
do_match(bit *ret, const char *val, const char *pat, const char *flags)
{
	const char *err = NULL;
	int options;
	int pos = 0;
	pcre *re;

	/* if any of the input values are nil, the result is no match */
	if (strNil(val) || strNil(pat) || strNil(flags)) {
		/* special case for NIL inputs: NILs don't match anything */
		*ret = 0;
		return MAL_SUCCEED;
	}

	options = parseflags(flags);
	if (options == -1)
		throw(MAL, "regexp.rematch", "bad flag character");

	re = pcre_compile(pat, options, &err, &pos, NULL);
	if (re == NULL) {
		throw(MAL, "regexp.rematch",
		      "compilation of regular expression (%s) failed at %d with %s",
		      pat, pos, err);
	}
	pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0);
	pcre_free(re);
	if (pos < PCRE_ERROR_NOMATCH) {
		throw(MAL, "regexp.rematch",
		      "matching of regular expression (%s) failed with %d",
		      pat, pos);
	}
	*ret = pos >= 0;
	return MAL_SUCCEED;
}

static char *
regexpmatch(bit *ret, const char **val, const char **pat)
{
	return do_match(ret, *val, *pat, "");
}

static char *
regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags)
{
	return do_match(ret, *val, *pat, *flags);
}

static char *
do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags)
{
	BAT *b;			/* input BAT */
	BATiter bi;		/* helper to loop through values */
	BAT *bn;		/* result BAT */
	bit *outp;		/* pointer through which we add to result */
	BUN start, end;		/* iteration variables */

	const char *err = NULL;	/* error message from PCRE library */
	int pos = 0;		/* error position from PCRE library */
	int options;		/* PCRE options */
	pcre *re;		/* compiled regular expression */
	pcre_extra *sd;		/* studied regular expression */

	/* from the BAT ID we need to get the BAT descriptor, making
	 * sure that the data of the BAT is loaded into memory */
	if ((b = BATdescriptor(bid)) == NULL) {
		throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING);
	}
	/* check that the BAT has the expected type: we expect str or
	 * something compatible with str (if we only want str, we need
	 * to compare b->ttype with TYPE_str and not use ATOMstorage).
	 * Note, the MAL interpreter will only call this function with
	 * a str BAT because that is the only interface that is
	 * defined in the MAL file, so this check is superfluous. */
	if (ATOMstorage(b->ttype) != TYPE_str) {
		BBPunfix(b->batCacheid);
		throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH);
	}

	/* if any of the input values are nil, the result is no match */
	if (strNil(pat) || strNil(flags)) {
		/* no matches when the pattern or the flags is NIL
		 * we return an a BAT with all NIL values */
		bit f = bit_nil;
		if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL)
			throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
		*ret = bn->batCacheid;
		BBPkeepref(*ret);
		BBPunfix(b->batCacheid);
		return MAL_SUCCEED;
	}
	options = parseflags(flags);
	if (options == -1) {
		BBPunfix(b->batCacheid);
		throw(MAL, "batregexp.rematch", "bad flag character");
	}

	/* allocate a result BAT; the capacity we ask for is the size
	 * of the input BAT since we produce a value for each input
	 * value */
	bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
	}

	/* Position outp at the start of the result array.
	 * We know the array is large enough even if every value were
	 * to match, so we don't need to check for that. */
	outp = (bit *) Tloc(bn, 0);

	/* compile the regular expression */
	re = pcre_compile(pat, options, &err, &pos, NULL);
	if (re == NULL) {
		BBPunfix(b->batCacheid);
		BBPreclaim(bn);
		throw(MAL, "batregexp.rematch",
		      "compilation of regular expression (%s) failed at %d with %s",
		      pat, pos, err);
	}
	/* invest in study of the r.e. */
	sd = pcre_study(re, 0, &err);
	if (err != NULL) {
		pcre_free(re);
		BBPunfix(b->batCacheid);
		BBPreclaim(bn);
		throw(MAL, "batregexp.rematch",
		      "study of regular expression (%s) failed with %s",
		      pat, err);
	}

	/* now, start and end are the limits in b that we need to look
	 * at, and if set, cand and candend are the beginning and end
	 * of the list of OIDs of b that we need to consider */

	bi = bat_iterator(b);

	/* we will change these if we add a NIL */
	bn->tnil = false;
	bn->tnonil = true;
	for (start = 0, end = BATcount(b); start < end; start++) {
		const char *val = BUNtvar(bi, start);
		/* nil values never match */
		if (strNil(val)) {
			*outp++ = bit_nil;
			bn->tnil = true;
			bn->tnonil = false;
		} else {
			pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
			if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) {
				/* error during processing */
				bat_iterator_end(&bi);
				BBPunfix(b->batCacheid);
				BBPreclaim(bn);
				pcre_free_study(sd);
				pcre_free(re);
				throw(MAL, "batregexp.rematch",
				      "matching of regular expression (%s) failed with %d",
				      pat, pos);
			}
			*outp++ = pos >= 0; /* TRUE if match, FALSE if not */
		}
	}
	bat_iterator_end(&bi);

	/* set properties and size of result BAT */
	BATsetcount(bn, BATcount(b));

	if (BATcount(bn) > 1) {
		/* if more than 1 result, it is not reverse sorted */
		bn->tsorted = false;	/* probably not sorted */
		bn->trevsorted = false;	/* probably not reverse sorted */
		bn->tkey = false;	/* probably not key */
	} else {
		/* if empty or a single result, it is sorted, reverse
		 * sorted, and key */
		bn->tsorted = true;
		bn->trevsorted = true;
		bn->tkey = true;
	}
	bn->tnosorted = 0;	/* we don't know for sure */
	bn->tnorevsorted = 0;	/* we don't know for sure */
	bn->tnokey[0] = bn->tnokey[1] = 0;

	/* we're done with b and re */
	BBPunfix(b->batCacheid);
	pcre_free_study(sd);
	pcre_free(re);

	*ret = bn->batCacheid;
	BBPkeepref(*ret);
	return MAL_SUCCEED;
}

static char *
regexpmatchbulk(bat *ret, const bat *bid, const char **pat)
{
	return do_matchbulk(ret, *bid, *pat, "");
}


static char *
regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags)
{
	return do_matchbulk(ret, *bid, *pat, *flags);
}

static char *
do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti)
{
	BAT *b, *s = NULL;	/* input BAT and optional candidate list */

	struct canditer ci;	/* candidate iterator */
	BATiter bi;		/* helper to loop through values */

	BAT *bn;		/* result BAT */
	oid *outp;			/* pointer through which we add to result */

	const char *err = NULL;	/* error message from PCRE library */
	int pos = 0;		/* error position from PCRE library */
	int options;		/* PCRE options */
	pcre *re;		/* compiled regular expression */
	pcre_extra *sd;		/* studied regular expression */

	/* if any of the input values are nil, the result is no match */
	if (strNil(pat) || strNil(flags)) {
		/* no matches when the pattern or the flags is NIL
		 * we return an empty BAT of the correct type */
		if ((bn = BATdense(0, 0, 0)) == NULL)
			throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
		*ret = bn->batCacheid;
		BBPkeepref(*ret);
		return MAL_SUCCEED;
	}
	options = parseflags(flags);
	if (options == -1)
		throw(MAL, "regexp.rematchselect", "bad flag character");

	/* from the BAT IDs we need to get the BAT descriptors, making
	 * sure that the data of the BATs are loaded into memory */
	if ((b = BATdescriptor(bid)) == NULL) {
		throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
	}
	/* check that the BAT has the expected type: we expect str or
	 * something compatible with str (if we only want str, we need
	 * to compare b->ttype with TYPE_str and not use ATOMstorage).
	 * Note, the MAL interpreter will only call this function with
	 * a str BAT because that is the only interface that is
	 * defined in the MAL file, so this check is superfluous. */
	if (ATOMstorage(b->ttype) != TYPE_str) {
		BBPunfix(b->batCacheid);
		throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH);
	}
	if (!is_bat_nil(sid) &&
	    (s = BATdescriptor(sid)) == NULL) {
		BBPunfix(b->batCacheid);
		throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
	}

	if (canditer_init(&ci, b, s) == 0) {
		/* trivially empty result */
		BBPunfix(b->batCacheid);
		if (s)
			BBPunfix(s->batCacheid);
		bn = BATdense(0, 0, 0);
		*ret = bn->batCacheid;
		BBPkeepref(*ret);
		return MAL_SUCCEED;
	}

	/* allocate a result BAT; the capacity we ask for is the
	 * maximum potential result size (i.e. the size of the
	 * candidate list if there is one, else the size of the input
	 * BAT b) */
	bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		if (s)
			BBPunfix(s->batCacheid);
		throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
	}

	/* Position outp at the start of the result array.
	 * We know the array is large enough even if every value were
	 * to match, so we don't need to check for that. */
	outp = (oid *) Tloc(bn, 0);

	/* compile the regular expression */
	re = pcre_compile(pat, options, &err, &pos, NULL);
	if (re == NULL) {
		BBPunfix(b->batCacheid);
		if (s)
			BBPunfix(s->batCacheid);
		BBPreclaim(bn);
		throw(MAL, "regexp.rematchselect",
		      "compilation of regular expression (%s) failed at %d with %s",
		      pat, pos, err);
	}
	/* invest in study of the r.e. */
	sd = pcre_study(re, 0, &err);
	if (err != NULL) {
		pcre_free(re);
		BBPunfix(b->batCacheid);
		if (s)
			BBPunfix(s->batCacheid);
		BBPreclaim(bn);
		throw(MAL, "regexp.rematchselect",
		      "study of regular expression (%s) failed with %s",
		      pat, err);
	}

	bi = bat_iterator(b);

	/* iterate through the candidates */
	for (BUN i = 0; i < ci.ncand; i++) {
		/* get the next candidate */
		oid o = canditer_next(&ci);

		/* the candidate list has a list of OIDs which are
		 * relative to b->hseqbase, we need to convert that to
		 * an index relative to the start of the array in the
		 * (tail) heap */
		const char *val = BUNtvar(bi, o - b->hseqbase);

		/* nil values never match */
		if (!strNil(val)) {
			pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
			if (pos >= 0) {
				/* regular expression matched */
				if (!anti)
					*outp++ = o;
			} else if (pos == PCRE_ERROR_NOMATCH) {
				/* regular expression didn't match */
				if (anti)
					*outp++ = o;
			} else {
				/* error during processing */
				bat_iterator_end(&bi);
				BBPunfix(b->batCacheid);
				BBPunfix(s->batCacheid);
				BBPreclaim(bn);
				pcre_free_study(sd);
				pcre_free(re);
				throw(MAL, "regexp.rematchselect",
				      "matching of regular expression (%s) failed with %d",
				      pat, pos);
			}
		}
	}
	/* we're done with the BAT iterator */
	bat_iterator_end(&bi);
	/* we're done with b, s, and re */
	BBPunfix(b->batCacheid);
	if (s)
		BBPunfix(s->batCacheid);
	pcre_free_study(sd);
	pcre_free(re);

	/* set properties and size of result BAT */
	BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */
	/* the result BAT of a select operation MUST be sorted, and
	 * all values MUST be distinct (i.e. it is a candidate list);
	 * due to the way we created the result, we know this is the
	 * case */
	bn->tsorted = true;
	bn->tnosorted = 0;
	bn->tkey = true;
	bn->tseqbase = oid_nil;
	if (BATcount(bn) > 1) {
		/* if more than 1 result, it is not reverse sorted */
		bn->trevsorted = false;
		bn->tnorevsorted = 1; /* index 1 is larger than index 0 */
		/* the BAT is dense if the type is TYPE_oid, the
		 * values are sorted in ascending order, they are all
		 * distinct, and they form a consecutive sequence (no
		 * missing values); we only need to check the last
		 * condition, which we do by checking the difference
		 * between the first and last values */
		outp = (oid *) Tloc(bn, 0); /* pointer to start */
		if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1)
			bn->tseqbase = outp[0];
	} else {
		/* if empty or a single result, it is reverse sorted
		 * and dense */
		bn->trevsorted = true;
		bn->tnorevsorted = 0;
		bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0);
	}
	/* there are no NIL values in the result */
	bn->tnil = false;
	bn->tnonil = true;

	*ret = bn->batCacheid;
	BBPkeepref(*ret);
	return MAL_SUCCEED;
}

static char *
regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti)
{
	return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti);
}


static char *
regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti)
{
	return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti);
}

static char *
do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags,
	bat slid, bat srid, bit nil_matches, lng estimate)
{
	BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */
	BAT *bn1, *bn2;		/* output BATs */

	struct canditer lci;	/* candidate iterator for l */
	struct canditer rci;	/* candidate iterator for r */

	BATiter li;		/* helper to loop through values */
	BATiter ri;		/* helper to loop through values */
	oid ro;			/* right OID being matched */

	const char *pat;	/* the regular expression being matched */
	const char *err;	/* error message from PCRE library */
	int pos = 0;		/* error position from PCRE library */
	int options;		/* PCRE options */
	pcre *re;		/* compiled regular expression */
	pcre_extra *sd;		/* studied regular expression */

	(void) nil_matches;	/* only relevant for equi-join */

	if (strNil(flags)) {
		/* no matches when the flags is NIL
		 * we return two empty BATs */
		bn1 = BATdense(0, 0, 0);
		bn2 = BATdense(0, 0, 0);
		if (bn1 == NULL || bn2 == NULL) {
			BBPreclaim(bn1);
			BBPreclaim(bn2);
			throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
		}
		return MAL_SUCCEED;
	}
	options = parseflags(flags);
	if (options == -1)
		throw(MAL, "regexp.rematchjoin", "bad flag character");

	l = BATdescriptor(lid);
	r = BATdescriptor(rid);
	if (!is_bat_nil(slid))
		sl = BATdescriptor(slid);
	if (!is_bat_nil(srid))
		sr = BATdescriptor(srid);
	if (l == NULL || r == NULL ||
	    (!is_bat_nil(slid) && sl == NULL) ||
	    (!is_bat_nil(srid) && sr == NULL)) {
		/* one of the calls to BATdescriptor failed */
		if (l)
			BBPunfix(l->batCacheid);
		if (r)
			BBPunfix(r->batCacheid);
		if (sl)
			BBPunfix(sl->batCacheid);
		if (sr)
			BBPunfix(sr->batCacheid);
		throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING);
	}
	/* check that the BATs have the expected type: we expect str
	 * or something compatible with str for l (the values) and str
	 * for r (the patterns).
	 * Note, the MAL interpreter will only call this function with
	 * a pair of str BATs because that is the only interface that
	 * is defined in the MAL file, so this check is superfluous.
	 */
	if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) {
		BBPunfix(l->batCacheid);
		BBPunfix(r->batCacheid);
		if (sl)
			BBPunfix(sl->batCacheid);
		if (sr)
			BBPunfix(sr->batCacheid);
		throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH);
	}

	if (canditer_init(&lci, l, sl) == 0 ||
	    canditer_init(&rci, r, sr) == 0) {
		/* if either side is empty (or no candidates) the
		 * result is empty */
		BBPunfix(l->batCacheid);
		BBPunfix(r->batCacheid);
		if (sl)
			BBPunfix(sl->batCacheid);
		if (sr)
			BBPunfix(sr->batCacheid);
		bn1 = BATdense(0, 0, 0);
		bn2 = BATdense(0, 0, 0);
		if (bn1 == NULL || bn2 == NULL) {
			BBPreclaim(bn1);
			BBPreclaim(bn2);
			throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
		}
		return MAL_SUCCEED;
	}

	/* if there is no valid estimate, use the size of the left
	 * input as size estimate */
	if (is_lng_nil(estimate) || estimate == 0)
		estimate = lci.ncand;

	/* create the output BATs */
	bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
	bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
	if (bn1 == NULL || bn2 == NULL) {
		/* one of the calls to COLnew failed
		 * note, BBPreclaim checks whether its argument is NULL */
		BBPreclaim(bn1);
		BBPreclaim(bn2);
		BBPunfix(l->batCacheid);
		BBPunfix(r->batCacheid);
		if (sl)
			BBPunfix(sl->batCacheid);
		if (sr)
			BBPunfix(sr->batCacheid);
		throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
	}

	li = bat_iterator(l);
	ri = bat_iterator(r);

	for (BUN i = 0; i < rci.ncand; i++) {
		ro = canditer_next(&rci);
		pat = BUNtvar(ri, ro - r->hseqbase);

		/* nil regular expressions don't match (despite
		 * nil_matches) */
		if (strNil(pat))
			continue;
		re = pcre_compile(pat, options, &err, &pos, NULL);
		sd = NULL;
		if (re == NULL)
			goto bailout;
		sd = pcre_study(re, 0, &err);
		if (err != NULL)
			goto bailout;

		/* inner loop: reset iterator, then iterate over it */
		canditer_reset(&lci);
		for (BUN j = 0; j < lci.ncand; j++) {
			oid lo = canditer_next(&lci);
			const char *val = BUNtvar(li, lo - l->hseqbase);
			if (strNil(val))
				continue;
			pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
			if (pos >= 0) {
				/* regular expression matched */
				if (BUNappend(bn1, &lo, false) != GDK_SUCCEED ||
				    BUNappend(bn2, &ro, false) != GDK_SUCCEED)
					goto bailout;
			} else if (pos != PCRE_ERROR_NOMATCH) {
				/* error during processing */
				err = "matching of regular expression failed";
				goto bailout;
			}
		}
		pcre_free_study(sd);
		pcre_free(re);
	}
	bat_iterator_end(&li);
	bat_iterator_end(&ri);

	BBPunfix(l->batCacheid);
	BBPunfix(r->batCacheid);
	if (sl)
		BBPunfix(sl->batCacheid);
	if (sr)
		BBPunfix(sr->batCacheid);
	*lres = bn1->batCacheid;
	*rres = bn2->batCacheid;
	BBPkeepref(*lres);
	BBPkeepref(*rres);
	return MAL_SUCCEED;

  bailout:
	bat_iterator_end(&li);
	bat_iterator_end(&ri);
	BBPreclaim(bn1);
	BBPreclaim(bn2);
	BBPunfix(l->batCacheid);
	BBPunfix(r->batCacheid);
	if (sl)
		BBPunfix(sl->batCacheid);
	if (sr)
		BBPunfix(sr->batCacheid);
	if (sd)
		pcre_free_study(sd);
	if (re)
		pcre_free(re);
	if (err)
		throw(MAL, "pcre.rematchjoin",
		      "error with regular expression: %s", err);
	throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION);
}

static char *
regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
		const bat *sl, const bat *sr, const bit *nil_matches,
		const lng *estimate)
{
	return do_join(lres, rres, *lid, *rid, "", *sl, *sr,
		       *nil_matches, *estimate);
}

static char *
regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
		 const char **flags, const bat *sl, const bat *sr,
		 const bit *nil_matches, const lng *estimate)
{
	return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr,
		       *nil_matches, *estimate);
}

#include "mel.h"

static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)"
	" EXTERNAL NAME regexp.rematch; "
	"CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)"
	" EXTERNAL NAME regexp.rematch;";

static mel_func regexp_init_funcs[] = {
	command("regexp", "rematch", regexpmatch, false,
		"Return true when the value 'val' matches the regular expression 'pat'",
		args(1,3, arg("",bit),arg("val",str),arg("pat",str))),
	command("regexp", "rematchselect", regexpmatchselect, false,
		"Return the list of matches in 'val' that match the regular expression 'pat'",
		args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))),
	command("regexp", "rematchjoin", regexpmatchjoin, false,
		"Return the matching pairs from the 'val' and 'pat' columns",
		args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
	command("batregexp", "rematch", regexpmatchbulk, false,
		"Return a BAT with true for match and false for no match",
		args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))),
	command("regexp", "rematch", regexpmatchf, false,
		"Return true when the value 'val' matches the regular expression 'pat'",
		args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))),
	command("regexp", "rematchselect", regexpmatchfselect, false,
		"Return the list of matches in 'val' that match the regular expression 'pat'",
		args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))),
	command("regexp", "rematchjoin", regexpmatchfjoin, false,
		"Return the matching pairs from the 'val' and 'pat'\ncolumns",
		args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
	command("batregexp", "rematch", regexpmatchfbulk, false,
		"Return a BAT with true for match and false for no match",
		args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))),
	{ .imp=NULL }		/* sentinel */
};

#include "mal_import.h"
#include "sql_import.h"
#ifdef _MSC_VER
#undef read
#pragma section(".CRT$XCU",read)
#endif
LIB_STARTUP_FUNC(init_regexp)
{
	mal_module("regexp", NULL, regexp_init_funcs);
	sql_register("regexp", regexp_sql);
}