view cxx-reverse/reverse.cpp @ 55:68263b10998e

Updated: added bat_iterator_end.
author Sjoerd Mullender <sjoerd@acm.org>
date Wed, 26 Jan 2022 14:59:05 +0100 (2022-01-26)
parents 9ff721585946
children 8122094c79b1
line wrap: on
line source
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0.  If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Copyright 2013-2021 MonetDB B.V.
 */

/* monetdb_config.h must be included as the first include file */

using namespace std;

#include <monetdb_config.h>

/* mal_exception.h actually contains everything we need */
#include <mal_exception.h>

/* system include files */
#include <string.h>


/* dst is a buffer of length larger than len (i.e. dst[len] exists),
 * src is a UTF-8-encoded string of length exactly len bytes */
static void
do_reverse(char *dst, const char *src, size_t len)
{
	dst[len] = 0;
	if (strNil(src)) {
		/* special case for nil:str */
		assert(len == strlen(str_nil));
		strcpy(dst, str_nil);
		return;
	}
	/* all strings in MonetDB are encoded using UTF-8; we must
	 * make sure that the reversed string is also encoded in valid
	 * UTF-8, so we treat multibyte characters as single units */
	while (*src) {
		if ((*src & 0xF8) == 0xF0) {
			/* 4 byte UTF-8 sequence */
			assert(len >= 4);
			dst[len - 4] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 4;
		} else if ((*src & 0xF0) == 0xE0) {
			/* 3 byte UTF-8 sequence */
			assert(len >= 3);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 3;
		} else if ((*src & 0xE0) == 0xC0) {
			/* 2 byte UTF-8 sequence */
			assert(len >= 2);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 2;
		} else {
			/* 1 byte UTF-8 "sequence" */
			assert(len >= 1);
			assert((*src & 0x80) == 0);
			dst[--len] = *src++;
		}
	}
	assert(len == 0);
}

extern "C" char *
UDFreverse(char **retval, const char **arg)
{
	size_t len;

	len = strlen(*arg);
	*retval = (char*) GDKmalloc(len + 1);
	if (*retval == NULL)
		throw(MAL, "revstr.revstr", MAL_MALLOC_FAIL);

	do_reverse(*retval, *arg, len);

	return MAL_SUCCEED;

}

extern "C" char *
UDFBATreverse(bat *retval, const bat *arg)
{
	BAT *b, *bn;
	BATiter bi;
	BUN p, q;
	const char *src;
	size_t len;
	char *dst;
	size_t dstlen;

	/* allocate temporary space for reversed strings; we grow this
	 * if we need more */
	dstlen = 1024;
	dst = (char*) GDKmalloc(dstlen);
	if (dst == NULL)
		throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL);

	b = BATdescriptor(*arg);
	if (b == NULL) {
		GDKfree(dst);
		throw(MAL, "batrevstr.revstr", RUNTIME_OBJECT_MISSING);
	}

	/* we should only get called for string BATs */
	assert(b->ttype == TYPE_str);

	/* allocate result BAT */
	bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		GDKfree(dst);
		throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL);
	}

	/* loop through BAT b; p is index of the entry we're working
	 * on, q is used internally by BATloop to do the iterating */
	bi = bat_iterator(b);
	BATloop(b, p, q) {
		src = (const char *) BUNtail(bi, p);
		len = strlen(src);
		/* make sure dst is large enough */
		if (len >= dstlen) {
			char *ndst;

			dstlen = len + 1024;
			ndst = (char*) GDKrealloc(dst, dstlen);
			if (ndst == NULL) {
				/* if GDKrealloc fails, dst is still
				 * allocated */
				goto bailout;
			}
			dst = ndst;
		}
		do_reverse(dst, src, len);
		if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
			/* BUNappend can fail since it may have to
			 * grow memory areas--especially true for
			 * string BATs */
			goto bailout;
		}
	}
	bat_iterator_end(&bi);
	GDKfree(dst);

	BBPunfix(b->batCacheid);

	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);

	return MAL_SUCCEED;

  bailout:
	/* we only get here in the case of an allocation error; clean
	 * up the mess we've created and throw an exception */
	bat_iterator_end(&bi);
	GDKfree(dst);
	BBPunfix(b->batCacheid);
	BBPunfix(bn->batCacheid);
	throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL);
}

extern "C" {
#include "mel.h"

static unsigned char reverse_sql[] = "CREATE FUNCTION reverse(src STRING)"
	" RETURNS STRING EXTERNAL NAME reverse.revstr;";


#include "mal_import.h"
#include "sql_import.h"
#ifdef _MSC_VER
#undef read
#pragma section(".CRT$XCU",read)
#endif

/* Only C++2x supports restricted C99 designator features. For once it does not support nested designators.
 * So the helper macro's for declaring and registering are currently not useful for registering the MAL functions.
 * There is also a bug in gcc that prevents the use of designators https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227
 * 
 * static mel_func reverse_init_funcs[] = {
 * command("reverse", "revstr", UDFreverse, false, "Reverse a string", args(1,2, arg("",str),arg("ra1",str))),
 * command("batreverse", "revstr", UDFBATreverse, false, "Reverse a BAT of strings", args(1,2, batarg("",str),batarg("b",str))),
 * { .imp=NULL }};
*/

static void set_arg(mel_arg& arg, uint8_t isbat) {
	arg.isbat = isbat;
	strcpy(arg.type, "str");
}

static void set_func(mel_func& func, const char* mod, const char* fcn, fptr imp, uint8_t retc, uint8_t argc, mel_arg* args) {
	strcpy(func.mod, mod);
	strcpy(func.fcn, fcn);
	func.comment = NULL;

	func.imp = imp;
	func.command = 1;
	func.retc = retc;
	func.argc = argc;
	func.args = args;
}

static mel_func reverse_init_funcs[2] = {0};

static mel_arg args[2] = {0};
static mel_arg batargs[2] = {0};

LIB_STARTUP_FUNC(init_reverse)
{
	set_arg(args[0], 0);
	set_arg(args[1], 0);
	set_func(reverse_init_funcs[0], "reverse", "revstr", (fptr) &UDFreverse, 1, 2, args);

	set_arg(batargs[0], 1);
	set_arg(batargs[1], 1);
	set_func(reverse_init_funcs[1], "batreverse", "revstr", (fptr) &UDFBATreverse, 1, 2, batargs);
	mal_module("reverse", NULL, reverse_init_funcs);
	sql_register("reverse", reverse_sql);
}
}