view reverse/reverse.c @ 61:b42fe5c9bd34 default tip

update to new versioned monetdb directories
author Niels Nes <niels@cwi.nl>
date Mon, 23 Sep 2024 13:34:22 +0200 (7 months ago)
parents 02895996506d
children
line wrap: on
line source
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0.  If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Copyright 2013-2022 MonetDB B.V.
 */

/* monetdb_config.h must be included as the first include file */
#include <monetdb_config.h>

/* mal_exception.h actually contains everything we need */
#include <mal_exception.h>

/* system include files */
#include <string.h>

/* dst is a buffer of length larger than len (i.e. dst[len] exists),
 * src is a UTF-8-encoded string of length exactly len bytes */
static void
do_reverse(char *dst, const char *src, size_t len)
{
	dst[len] = 0;
	if (strNil(src)) {
		/* special case for nil:str */
		assert(len == strlen(str_nil));
		strcpy(dst, str_nil);
		return;
	}
	/* all strings in MonetDB are encoded using UTF-8; we must
	 * make sure that the reversed string is also encoded in valid
	 * UTF-8, so we treat multibyte characters as single units */
	while (*src) {
		if ((*src & 0xF8) == 0xF0) {
			/* 4 byte UTF-8 sequence */
			assert(len >= 4);
			dst[len - 4] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 4;
		} else if ((*src & 0xF0) == 0xE0) {
			/* 3 byte UTF-8 sequence */
			assert(len >= 3);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 3;
		} else if ((*src & 0xE0) == 0xC0) {
			/* 2 byte UTF-8 sequence */
			assert(len >= 2);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 2;
		} else {
			/* 1 byte UTF-8 "sequence" */
			assert(len >= 1);
			assert((*src & 0x80) == 0);
			dst[--len] = *src++;
		}
	}
	assert(len == 0);
}

static char *
UDFreverse(char **retval, const char **arg)
{
	size_t len;

	len = strlen(*arg);
	*retval = GDKmalloc(len + 1);
	if (*retval == NULL)
		throw(MAL, "revstr.revstr", MAL_MALLOC_FAIL);

	do_reverse(*retval, *arg, len);

	return MAL_SUCCEED;

}

static char *
UDFBATreverse(bat *retval, const bat *arg)
{
	BAT *b, *bn;
	BATiter bi;
	BUN p, q;
	const char *src;
	size_t len;
	char *dst;
	size_t dstlen;

	/* allocate temporary space for reversed strings; we grow this
	 * if we need more */
	dstlen = 1024;
	dst = GDKmalloc(dstlen);
	if (dst == NULL)
		throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL);

	b = BATdescriptor(*arg);
	if (b == NULL) {
		GDKfree(dst);
		throw(MAL, "batrevstr.revstr", RUNTIME_OBJECT_MISSING);
	}

	/* we should only get called for string BATs */
	assert(b->ttype == TYPE_str);

	/* allocate result BAT */
	bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		GDKfree(dst);
		throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL);
	}

	/* loop through BAT b; p is index of the entry we're working
	 * on, q is used internally by BATloop to do the iterating */
	bi = bat_iterator(b);
	BATloop(b, p, q) {
		src = (const char *) BUNtail(bi, p);
		len = strlen(src);
		/* make sure dst is large enough */
		if (len >= dstlen) {
			char *ndst;

			dstlen = len + 1024;
			ndst = GDKrealloc(dst, dstlen);
			if (ndst == NULL) {
				/* if GDKrealloc fails, dst is still
				 * allocated */
				goto bailout;
			}
			dst = ndst;
		}
		do_reverse(dst, src, len);
		if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
			/* BUNappend can fail since it may have to
			 * grow memory areas--especially true for
			 * string BATs */
			goto bailout;
		}
	}
	bat_iterator_end(&bi);
	GDKfree(dst);

	BBPunfix(b->batCacheid);

	*retval = bn->batCacheid;
	BBPkeepref(bn);

	return MAL_SUCCEED;

  bailout:
	/* we only get here in the case of an allocation error; clean
	 * up the mess we've created and throw an exception */
	bat_iterator_end(&bi);
	GDKfree(dst);
	BBPunfix(b->batCacheid);
	BBPunfix(bn->batCacheid);
	throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL);
}

#include "mel.h"

static char reverse_sql[] = "CREATE FUNCTION revstr(src STRING)"
	" RETURNS STRING EXTERNAL NAME revstr.revstr;";

static mel_func reverse_init_funcs[] = {
	command("revstr", "revstr", UDFreverse, false,
		"Reverse a string",
		args(1,2, arg("",str),arg("ra1",str))),
	command("batrevstr", "revstr", UDFBATreverse, false,
		"Reverse a BAT of strings",
		args(1,2, batarg("",str),batarg("b",str))),
	{ .imp=NULL }		/* sentinel */
};

#include "mal_import.h"
#include "sql_import.h"
#ifdef _MSC_VER
#undef read
#pragma section(".CRT$XCU",read)
#endif
LIB_STARTUP_FUNC(init_reverse)
{
	mal_module("revstr", NULL, reverse_init_funcs);
	sql_register("revstr", reverse_sql);
}