view reverse/reverse.c @ 33:a8ffdbc388ce

Ported to Jun2020 branch.
author Sjoerd Mullender <sjoerd@acm.org>
date Tue, 09 Jun 2020 10:30:35 +0200
parents e925d55b369b
children e5d2d0c9b7b3
line wrap: on
line source
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0.  If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Copyright 2013-2018 MonetDB B.V.
 */

/* monetdb_config.h must be included as the first include file */
#include <monetdb_config.h>

/* mal_exception.h actually contains everything we need */
#include <mal_exception.h>

/* system include files */
#include <string.h>

/* dst is a buffer of length larger than len (i.e. dst[len] exists),
 * src is a UTF-8-encoded string of length exactly len bytes */
static void
do_reverse(char *dst, const char *src, size_t len)
{
	dst[len] = 0;
	if (strNil(src)) {
		/* special case for nil:str */
		assert(len == strlen(str_nil));
		strcpy(dst, str_nil);
		return;
	}
	/* all strings in MonetDB are encoded using UTF-8; we must
	 * make sure that the reversed string is also encoded in valid
	 * UTF-8, so we treat multibyte characters as single units */
	while (*src) {
		if ((*src & 0xF8) == 0xF0) {
			/* 4 byte UTF-8 sequence */
			assert(len >= 4);
			dst[len - 4] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 4;
		} else if ((*src & 0xF0) == 0xE0) {
			/* 3 byte UTF-8 sequence */
			assert(len >= 3);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 3;
		} else if ((*src & 0xE0) == 0xC0) {
			/* 2 byte UTF-8 sequence */
			assert(len >= 2);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 2;
		} else {
			/* 1 byte UTF-8 "sequence" */
			assert(len >= 1);
			assert((*src & 0x80) == 0);
			dst[--len] = *src++;
		}
	}
	assert(len == 0);
}

/* __declspec() must be used on Windows, but not on other systems */
#ifndef _MSC_VER
/* not Windows */
#define __declspec(x)	/* nothing */
#endif

extern __declspec(dllexport) char *UDFreverse(char **retval, const char **arg);
extern __declspec(dllexport) char *UDFBATreverse(bat *retval, const bat *arg);

char *
UDFreverse(char **retval, const char **arg)
{
	size_t len;

	len = strlen(*arg);
	*retval = GDKmalloc(len + 1);
	if (*retval == NULL)
		throw(MAL, "reverse.reverse", MAL_MALLOC_FAIL);

	do_reverse(*retval, *arg, len);

	return MAL_SUCCEED;

}

char *
UDFBATreverse(bat *retval, const bat *arg)
{
	BAT *b, *bn;
	BATiter bi;
	BUN p, q;
	const char *src;
	size_t len;
	char *dst;
	size_t dstlen;

	/* allocate temporary space for reversed strings; we grow this
	 * if we need more */
	dstlen = 1024;
	dst = GDKmalloc(dstlen);
	if (dst == NULL)
		throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL);

	b = BATdescriptor(*arg);
	if (b == NULL) {
		GDKfree(dst);
		throw(MAL, "batreverse.reverse", RUNTIME_OBJECT_MISSING);
	}

	/* we should only get called for string BATs */
	assert(b->ttype == TYPE_str);

	/* allocate result BAT */
	bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		GDKfree(dst);
		throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL);
	}

	/* loop through BAT b; p is index of the entry we're working
	 * on, q is used internally by BATloop to do the iterating */
	bi = bat_iterator(b);
	BATloop(b, p, q) {
		src = (const char *) BUNtail(bi, p);
		len = strlen(src);
		/* make sure dst is large enough */
		if (len >= dstlen) {
			char *ndst;

			dstlen = len + 1024;
			ndst = GDKrealloc(dst, dstlen);
			if (ndst == NULL) {
				/* if GDKrealloc fails, dst is still
				 * allocated */
				goto bailout;
			}
			dst = ndst;
		}
		do_reverse(dst, src, len);
		if (BUNappend(bn, dst, false) != GDK_SUCCEED) {
			/* BUNappend can fail since it may have to
			 * grow memory areas--especially true for
			 * string BATs */
			goto bailout;
		}
	}
	GDKfree(dst);

	BBPunfix(b->batCacheid);

	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);

	return MAL_SUCCEED;

  bailout:
	/* we only get here in the case of an allocation error; clean
	 * up the mess we've created and throw an exception */
	GDKfree(dst);
	BBPunfix(b->batCacheid);
	BBPunfix(bn->batCacheid);
	throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL);
}