Mercurial > hg > MonetDB-extend
view cxx-reverse/reverse.cpp @ 55:68263b10998e
Updated: added bat_iterator_end.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Wed, 26 Jan 2022 14:59:05 +0100 (2022-01-26) |
parents | 9ff721585946 |
children | 8122094c79b1 |
line wrap: on
line source
/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2013-2021 MonetDB B.V. */ /* monetdb_config.h must be included as the first include file */ using namespace std; #include <monetdb_config.h> /* mal_exception.h actually contains everything we need */ #include <mal_exception.h> /* system include files */ #include <string.h> /* dst is a buffer of length larger than len (i.e. dst[len] exists), * src is a UTF-8-encoded string of length exactly len bytes */ static void do_reverse(char *dst, const char *src, size_t len) { dst[len] = 0; if (strNil(src)) { /* special case for nil:str */ assert(len == strlen(str_nil)); strcpy(dst, str_nil); return; } /* all strings in MonetDB are encoded using UTF-8; we must * make sure that the reversed string is also encoded in valid * UTF-8, so we treat multibyte characters as single units */ while (*src) { if ((*src & 0xF8) == 0xF0) { /* 4 byte UTF-8 sequence */ assert(len >= 4); dst[len - 4] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 3] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 2] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 1] = *src++; len -= 4; } else if ((*src & 0xF0) == 0xE0) { /* 3 byte UTF-8 sequence */ assert(len >= 3); dst[len - 3] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 2] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 1] = *src++; len -= 3; } else if ((*src & 0xE0) == 0xC0) { /* 2 byte UTF-8 sequence */ assert(len >= 2); dst[len - 2] = *src++; assert((*src & 0xC0) == 0x80); dst[len - 1] = *src++; len -= 2; } else { /* 1 byte UTF-8 "sequence" */ assert(len >= 1); assert((*src & 0x80) == 0); dst[--len] = *src++; } } assert(len == 0); } extern "C" char * UDFreverse(char **retval, const char **arg) { size_t len; len = strlen(*arg); *retval = (char*) GDKmalloc(len + 1); if (*retval == NULL) throw(MAL, "revstr.revstr", MAL_MALLOC_FAIL); do_reverse(*retval, *arg, len); return MAL_SUCCEED; } extern "C" char * UDFBATreverse(bat *retval, const bat *arg) { BAT *b, *bn; BATiter bi; BUN p, q; const char *src; size_t len; char *dst; size_t dstlen; /* allocate temporary space for reversed strings; we grow this * if we need more */ dstlen = 1024; dst = (char*) GDKmalloc(dstlen); if (dst == NULL) throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL); b = BATdescriptor(*arg); if (b == NULL) { GDKfree(dst); throw(MAL, "batrevstr.revstr", RUNTIME_OBJECT_MISSING); } /* we should only get called for string BATs */ assert(b->ttype == TYPE_str); /* allocate result BAT */ bn = COLnew(b->hseqbase, TYPE_str, BATcount(b), TRANSIENT); if (bn == NULL) { BBPunfix(b->batCacheid); GDKfree(dst); throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL); } /* loop through BAT b; p is index of the entry we're working * on, q is used internally by BATloop to do the iterating */ bi = bat_iterator(b); BATloop(b, p, q) { src = (const char *) BUNtail(bi, p); len = strlen(src); /* make sure dst is large enough */ if (len >= dstlen) { char *ndst; dstlen = len + 1024; ndst = (char*) GDKrealloc(dst, dstlen); if (ndst == NULL) { /* if GDKrealloc fails, dst is still * allocated */ goto bailout; } dst = ndst; } do_reverse(dst, src, len); if (BUNappend(bn, dst, false) != GDK_SUCCEED) { /* BUNappend can fail since it may have to * grow memory areas--especially true for * string BATs */ goto bailout; } } bat_iterator_end(&bi); GDKfree(dst); BBPunfix(b->batCacheid); *retval = bn->batCacheid; BBPkeepref(bn->batCacheid); return MAL_SUCCEED; bailout: /* we only get here in the case of an allocation error; clean * up the mess we've created and throw an exception */ bat_iterator_end(&bi); GDKfree(dst); BBPunfix(b->batCacheid); BBPunfix(bn->batCacheid); throw(MAL, "batrevstr.revstr", MAL_MALLOC_FAIL); } extern "C" { #include "mel.h" static unsigned char reverse_sql[] = "CREATE FUNCTION reverse(src STRING)" " RETURNS STRING EXTERNAL NAME reverse.revstr;"; #include "mal_import.h" #include "sql_import.h" #ifdef _MSC_VER #undef read #pragma section(".CRT$XCU",read) #endif /* Only C++2x supports restricted C99 designator features. For once it does not support nested designators. * So the helper macro's for declaring and registering are currently not useful for registering the MAL functions. * There is also a bug in gcc that prevents the use of designators https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227 * * static mel_func reverse_init_funcs[] = { * command("reverse", "revstr", UDFreverse, false, "Reverse a string", args(1,2, arg("",str),arg("ra1",str))), * command("batreverse", "revstr", UDFBATreverse, false, "Reverse a BAT of strings", args(1,2, batarg("",str),batarg("b",str))), * { .imp=NULL }}; */ static void set_arg(mel_arg& arg, uint8_t isbat) { arg.isbat = isbat; strcpy(arg.type, "str"); } static void set_func(mel_func& func, const char* mod, const char* fcn, fptr imp, uint8_t retc, uint8_t argc, mel_arg* args) { strcpy(func.mod, mod); strcpy(func.fcn, fcn); func.comment = NULL; func.imp = imp; func.command = 1; func.retc = retc; func.argc = argc; func.args = args; } static mel_func reverse_init_funcs[2] = {0}; static mel_arg args[2] = {0}; static mel_arg batargs[2] = {0}; LIB_STARTUP_FUNC(init_reverse) { set_arg(args[0], 0); set_arg(args[1], 0); set_func(reverse_init_funcs[0], "reverse", "revstr", (fptr) &UDFreverse, 1, 2, args); set_arg(batargs[0], 1); set_arg(batargs[1], 1); set_func(reverse_init_funcs[1], "batreverse", "revstr", (fptr) &UDFBATreverse, 1, 2, batargs); mal_module("reverse", NULL, reverse_init_funcs); sql_register("reverse", reverse_sql); } }