view reverse/reverse.c @ 0:a1080ed7fe4d

Created an example UDF for a SQL reverse() function.
author Sjoerd Mullender <sjoerd@acm.org>
date Fri, 05 Jul 2013 17:14:08 +0200 (2013-07-05)
parents
children 22a95811ba05
line wrap: on
line source
/*
 * The contents of this file are subject to the MonetDB Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.monetdb.org/Legal/MonetDBLicense
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the MonetDB Database System.
 *
 * The Initial Developer of the Original Code is CWI.
 * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
 * Copyright August 2008-2013 MonetDB B.V.
 * All Rights Reserved.
 */

/* monetdb_config.h must be included as the first include file */
#include <monetdb_config.h>
/* mal_exception.h actually contains everything we need */
#include <mal_exception.h>

/* system include files */
#include <string.h>

/* dst is a buffer of length larger than len, src is a UTF-8-encoded
 * string of length exactly len bytes */
static void
do_reverse(char *dst, const char *src, size_t len)
{
	dst[len] = 0;
	if (strcmp(src, str_nil) == 0) {
		/* special case for nil */
		strcpy(dst, str_nil);
		assert(len == strlen(str_nil));
		return;
	}
	while (*src) {
		if ((*src & 0xF8) == 0xF0) {
			/* 4 byte UTF-8 sequence */
			assert(len >= 4);
			dst[len - 4] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 4;
		} else if ((*src & 0xF0) == 0xE0) {
			/* 3 byte UTF-8 sequence */
			assert(len >= 3);
			dst[len - 3] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 3;
		} else if ((*src & 0xE0) == 0xC0) {
			/* 2 byte UTF-8 sequence */
			assert(len >= 2);
			dst[len - 2] = *src++;
			assert((*src & 0xC0) == 0x80);
			dst[len - 1] = *src++;
			len -= 2;
		} else {
			assert(len >= 1);
			assert((*src & 0x80) == 0);
			dst[--len] = *src++;
		}
	}
	assert(len == 0);
}

/* __declspec() must be used on Windows, but not on other systems */
#ifndef _MSC_VER
/* not Windows */
#define __declspec(x)	/* nothing */
#endif

extern __declspec(dllexport) char *UDFreverse(char **retval, const char **arg);
extern __declspec(dllexport) char *UDFBATreverse(bat *retval, const bat *arg);

char *
UDFreverse(char **retval, const char **arg)
{
	size_t len;

	len = strlen(*arg);
	*retval = GDKmalloc(len + 1);
	if (*retval == NULL)
		throw(MAL, "reverse.reverse", MAL_MALLOC_FAIL);

	do_reverse(*retval, *arg, len);

	return MAL_SUCCEED;

}

char *
UDFBATreverse(bat *retval, const bat *arg)
{
	BAT *b, *bn;
	BATiter bi;
	BUN p, q;
	const char *src;
	size_t len;
	char *dst;
	size_t dstlen;

	/* allocate temporary space for reversed strings; we grow this
	 * if we need more */
	dstlen = 1024;
	dst = GDKmalloc(dstlen);
	if (dst == NULL)
		throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL);

	b = BATdescriptor(*arg);
	if (b == NULL)
		throw(MAL, "batreverse.reverse", RUNTIME_OBJECT_MISSING);

	/* we should only get called for string BATs */
	assert(b->ttype == TYPE_str);

	/* allocate result BAT */
	bn = BATnew(TYPE_void, TYPE_str, BATcount(b));
	if (bn == NULL) {
		BBPreleaseref(b->batCacheid);
		throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL);
	}

	/* copy seqbase from old to new */
	BATseqbase(bn, b->hseqbase);

	/* loop through BAT b; p is index of the entry we're working
	 * on, q is used internally by BATloop to do the iterating */
	bi = bat_iterator(b);
	BATloop(b, p, q) {
		src = (const char *) BUNtail(bi, p);
		len = strlen(src);
		/* make sure dst is large enough */
		if (len >= dstlen) {
			char *ndst;

			dstlen = len + 1024;
			ndst = GDKrealloc(dst, dstlen);
			if (ndst == NULL) {
				/* if GDKrealloc fails, dst is still
				 * allocated */
				goto bailout;
			}
			dst = ndst;
		}
		do_reverse(dst, src, len);
		if (BUNappend(bn, dst, 0) == NULL) {
			/* BUNappend can fail since it may have to
			 * grow memory areas--especially true for
			 * string BATs */
			goto bailout;
		}
	}
	GDKfree(dst);

	BBPreleaseref(b->batCacheid);

	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);

	return MAL_SUCCEED;

  bailout:
	/* we only get here in the case of an allocation error; clean
	 * up the mess we've created and throw an exception */
	GDKfree(dst);
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(bn->batCacheid);
	throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL);
}