Mercurial > hg > MonetDB-extend
view reverse/reverse.c @ 33:a8ffdbc388ce
Ported to Jun2020 branch.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Tue, 09 Jun 2020 10:30:35 +0200 |
parents | e925d55b369b |
children | e5d2d0c9b7b3 |
line wrap: on
line source
/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2013-2018 MonetDB B.V. */ /* monetdb_config.h must be included as the first include file */ #include <monetdb_config.h> /* mal_exception.h actually contains everything we need */ #include <mal_exception.h> /* system include files */ #include <string.h> /* dst is a buffer of length larger than len (i.e. dst[len] exists), * src is a UTF-8-encoded string of length exactly len bytes */ static void do_reverse(char *dst, const char *src, size_t len) { dst[len] = 0; if (strNil(src)) { /* special case for nil:str */ assert(len == strlen(str_nil)); strcpy(dst, str_nil); return; } /* all strings in MonetDB are encoded using UTF-8; we must * make sure that the reversed string is also encoded in valid * UTF-8, so we treat multibyte characters as single units */ while (*src) { if ((*src & 0xF8) == 0xF0) { /* 4 byte UTF-8 sequence */ assert(len >= 4); dst[len - 4] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 3] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 2] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 1] = *src++; len -= 4; } else if ((*src & 0xF0) == 0xE0) { /* 3 byte UTF-8 sequence */ assert(len >= 3); dst[len - 3] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 2] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 1] = *src++; len -= 3; } else if ((*src & 0xE0) == 0xC0) { /* 2 byte UTF-8 sequence */ assert(len >= 2); dst[len - 2] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 1] = *src++; len -= 2; } else { /* 1 byte UTF-8 "sequence" */ assert(len >= 1); assert((*src & 0x80) == 0); dst[--len] = *src++; } } assert(len == 0); } /* __declspec() must be used on Windows, but not on other systems */ #ifndef _MSC_VER /* not Windows */ #define __declspec(x) /* nothing */ #endif extern __declspec(dllexport) char *UDFreverse(char **retval, const char **arg); extern __declspec(dllexport) char *UDFBATreverse(bat *retval, const bat *arg); char * UDFreverse(char **retval, const char **arg) { size_t len; len = strlen(*arg); *retval = GDKmalloc(len + 1); if (*retval == NULL) throw(MAL, "reverse.reverse", MAL_MALLOC_FAIL); do_reverse(*retval, *arg, len); return MAL_SUCCEED; } char * UDFBATreverse(bat *retval, const bat *arg) { BAT *b, *bn; BATiter bi; BUN p, q; const char *src; size_t len; char *dst; size_t dstlen; /* allocate temporary space for reversed strings; we grow this * if we need more */ dstlen = 1024; dst = GDKmalloc(dstlen); if (dst == NULL) throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL); b = BATdescriptor(*arg); if (b == NULL) { GDKfree(dst); throw(MAL, "batreverse.reverse", RUNTIME_OBJECT_MISSING); } /* we should only get called for string BATs */ assert(b->ttype == TYPE_str); /* allocate result BAT */ bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT); if (bn == NULL) { BBPunfix(b->batCacheid); GDKfree(dst); throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL); } /* loop through BAT b; p is index of the entry we're working * on, q is used internally by BATloop to do the iterating */ bi = bat_iterator(b); BATloop(b, p, q) { src = (const char *) BUNtail(bi, p); len = strlen(src); /* make sure dst is large enough */ if (len >= dstlen) { char *ndst; dstlen = len + 1024; ndst = GDKrealloc(dst, dstlen); if (ndst == NULL) { /* if GDKrealloc fails, dst is still * allocated */ goto bailout; } dst = ndst; } do_reverse(dst, src, len); if (BUNappend(bn, dst, false) != GDK_SUCCEED) { /* BUNappend can fail since it may have to * grow memory areas--especially true for * string BATs */ goto bailout; } } GDKfree(dst); BBPunfix(b->batCacheid); *retval = bn->batCacheid; BBPkeepref(bn->batCacheid); return MAL_SUCCEED; bailout: /* we only get here in the case of an allocation error; clean * up the mess we've created and throw an exception */ GDKfree(dst); BBPunfix(b->batCacheid); BBPunfix(bn->batCacheid); throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL); }