LCOV - code coverage report
Current view: top level - sql/backends/monet5/UDF/udf - udf.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 130 187 69.5 %
Date: 2024-04-25 20:03:45 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /* monetdb_config.h must be the first include in each .c file */
      14             : #include "monetdb_config.h"
      15             : #include "udf.h"
      16             : #include "str.h"
      17             : 
      18             : /* Reverse a string */
      19             : 
      20             : /* actual implementation */
      21             : /* all non-exported functions must be declared static */
      22             : static str
      23          15 : UDFreverse_(str *buf, size_t *buflen, const char *src)
      24             : {
      25          15 :         size_t len = strlen(src);
      26          15 :         char *dst = NULL;
      27             : 
      28             :         /* assert calling sanity */
      29          15 :         assert(buf);
      30             :         /* test if input buffer is large enough for result string, otherwise re-allocate it */
      31          15 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, (len + 1), "udf.reverse");
      32          15 :         dst = *buf;
      33             : 
      34          15 :         dst[len] = 0;
      35             :         /* all strings in MonetDB are encoded using UTF-8; we must
      36             :          * make sure that the reversed string is also encoded in valid
      37             :          * UTF-8, so we treat multibyte characters as single units */
      38         257 :         while (*src) {
      39         242 :                 if ((*src & 0xF8) == 0xF0) {
      40             :                         /* 4 byte UTF-8 sequence */
      41           0 :                         assert(len >= 4);
      42           0 :                         dst[len - 4] = *src++;
      43           0 :                         assert((*src & 0xC0) == 0x80);
      44           0 :                         dst[len - 3] = *src++;
      45           0 :                         assert((*src & 0xC0) == 0x80);
      46           0 :                         dst[len - 2] = *src++;
      47           0 :                         assert((*src & 0xC0) == 0x80);
      48           0 :                         dst[len - 1] = *src++;
      49           0 :                         len -= 4;
      50         242 :                 } else if ((*src & 0xF0) == 0xE0) {
      51             :                         /* 3 byte UTF-8 sequence */
      52           6 :                         assert(len >= 3);
      53           6 :                         dst[len - 3] = *src++;
      54           6 :                         assert((*src & 0xC0) == 0x80);
      55           6 :                         dst[len - 2] = *src++;
      56           6 :                         assert((*src & 0xC0) == 0x80);
      57           6 :                         dst[len - 1] = *src++;
      58           6 :                         len -= 3;
      59         236 :                 } else if ((*src & 0xE0) == 0xC0) {
      60             :                         /* 2 byte UTF-8 sequence */
      61           0 :                         assert(len >= 2);
      62           0 :                         dst[len - 2] = *src++;
      63           0 :                         assert((*src & 0xC0) == 0x80);
      64           0 :                         dst[len - 1] = *src++;
      65           0 :                         len -= 2;
      66             :                 } else {
      67             :                         /* 1 byte UTF-8 "sequence" */
      68         236 :                         assert(len >= 1);
      69         236 :                         assert((*src & 0x80) == 0);
      70         236 :                         dst[--len] = *src++;
      71             :                 }
      72             :         }
      73          15 :         assert(len == 0);
      74             : 
      75             :         return MAL_SUCCEED;
      76             : }
      77             : 
      78             : /* MAL wrapper */
      79             : str
      80           7 : UDFreverse(str *res, const str *arg)
      81             : {
      82           7 :         str msg = MAL_SUCCEED, s;
      83             : 
      84             :         /* assert calling sanity */
      85           7 :         assert(res && arg);
      86           7 :         s = *arg;
      87           7 :         if (strNil(s)) {
      88           0 :                 if (!(*res = GDKstrdup(str_nil)))
      89           0 :                         throw(MAL, "udf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      90             :         } else {
      91           7 :                 size_t buflen = strlen(s) + 1;
      92             : 
      93           7 :                 if (!(*res = GDKmalloc(buflen)))
      94           0 :                         throw(MAL, "udf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      95           7 :                 if ((msg = UDFreverse_(res, &buflen, s)) != MAL_SUCCEED) {
      96           0 :                         GDKfree(*res);
      97           0 :                         *res = NULL;
      98           0 :                         return msg;
      99             :                 }
     100             :         }
     101             :         return msg;
     102             : }
     103             : 
     104             : /* Reverse a BAT of strings */
     105             : /*
     106             :  * Generic "type-oblivious" version,
     107             :  * using generic "type-oblivious" BAT access interface.
     108             :  */
     109             : 
     110             : /* actual implementation */
     111             : static char *
     112           2 : UDFBATreverse_(BAT **ret, BAT *src)
     113             : {
     114           2 :         BATiter li;
     115           2 :         BAT *bn = NULL;
     116           2 :         BUN p = 0, q = 0;
     117           2 :         size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     118           2 :         str msg = MAL_SUCCEED, buf;
     119           2 :         bool nils = false;
     120             : 
     121             :         /* assert calling sanity */
     122           2 :         assert(ret);
     123             : 
     124             :         /* handle NULL pointer */
     125           2 :         if (src == NULL)
     126           0 :                 throw(MAL, "batudf.reverse", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     127             :         /* check tail type */
     128           2 :         if (src->ttype != TYPE_str)
     129           0 :                 throw(MAL, "batudf.reverse", SQLSTATE(42000) "tail-type of input BAT must be TYPE_str");
     130             : 
     131             :         /* to avoid many allocations, we allocate a single buffer, which will reallocate if a
     132             :            larger string is found and freed at the end */
     133           2 :         if (!(buf = GDKmalloc(buflen))) {
     134           0 :                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     135           0 :                 goto bailout;
     136             :         }
     137           2 :         q = BATcount(src);
     138             :         /* allocate void-headed result BAT */
     139           2 :         if (!(bn = COLnew(src->hseqbase, TYPE_str, q, TRANSIENT))) {
     140           0 :                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     141           0 :                 goto bailout;
     142             :         }
     143             : 
     144             :         /* create BAT iterator */
     145           2 :         li = bat_iterator(src);
     146             :         /* the core of the algorithm */
     147          12 :         for (p = 0; p < q ; p++) {
     148           8 :                 const char *x = BUNtvar(li, p);
     149             : 
     150           8 :                 if (strNil(x)) {
     151             :                         /* if the input string is null, then append directly */
     152           0 :                         if (tfastins_nocheckVAR(bn, p, str_nil) != GDK_SUCCEED) {
     153           0 :                                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     154           0 :                                 goto bailout1;
     155             :                         }
     156             :                         nils = true;
     157             :                 } else {
     158             :                         /* revert tail value */
     159           8 :                         if ((msg = UDFreverse_(&buf, &buflen, x)) != MAL_SUCCEED)
     160           0 :                                 goto bailout1;
     161             :                         /* assert logical sanity */
     162           8 :                         assert(buf && x);
     163             :                         /* append to the output BAT. We are using a faster route, because we know what we are doing */
     164           8 :                         if (tfastins_nocheckVAR(bn, p, buf) != GDK_SUCCEED) {
     165           0 :                                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     166           0 :                                 goto bailout1;
     167             :                         }
     168             :                 }
     169             :         }
     170           2 : bailout1:
     171           2 :         bat_iterator_end(&li);
     172             : 
     173           2 : bailout:
     174           2 :         GDKfree(buf);
     175           2 :         if (bn && !msg) {
     176           2 :                 BATsetcount(bn, q);
     177           2 :                 bn->tnil = nils;
     178           2 :                 bn->tnonil = !nils;
     179           2 :                 bn->tkey = BATcount(bn) <= 1;
     180           2 :                 bn->tsorted = BATcount(bn) <= 1;
     181           2 :                 bn->trevsorted = BATcount(bn) <= 1;
     182           0 :         } else if (bn) {
     183           0 :                 BBPreclaim(bn);
     184           0 :                 bn = NULL;
     185             :         }
     186           2 :         *ret = bn;
     187           2 :         return msg;
     188             : }
     189             : 
     190             : /* MAL wrapper */
     191             : char *
     192           2 : UDFBATreverse(bat *ret, const bat *arg)
     193             : {
     194           2 :         BAT *res = NULL, *src = NULL;
     195           2 :         char *msg = NULL;
     196             : 
     197             :         /* assert calling sanity */
     198           2 :         assert(ret != NULL && arg != NULL);
     199             : 
     200             :         /* bat-id -> BAT-descriptor */
     201           2 :         if ((src = BATdescriptor(*arg)) == NULL)
     202           0 :                 throw(MAL, "batudf.reverse", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     203             : 
     204             :         /* do the work */
     205           2 :         msg = UDFBATreverse_( &res, src );
     206             : 
     207             :         /* release input BAT-descriptor */
     208           2 :         BBPunfix(src->batCacheid);
     209             : 
     210           2 :         if (msg == MAL_SUCCEED) {
     211             :                 /* register result BAT in buffer pool */
     212           2 :                 *ret = res->batCacheid;
     213           2 :                 BBPkeepref(res);
     214             :         }
     215             : 
     216             :         return msg;
     217             : }
     218             : 
     219             : /* fuse */
     220             : 
     221             : /* instantiate type-specific functions */
     222             : 
     223             : #define UI bte
     224             : #define UU unsigned char
     225             : #define UO sht
     226             : #include "udf_impl.h"
     227             : #undef UI
     228             : #undef UU
     229             : #undef UO
     230             : 
     231             : #define UI sht
     232             : #define UU unsigned short
     233             : #define UO int
     234             : #include "udf_impl.h"
     235             : #undef UI
     236             : #undef UU
     237             : #undef UO
     238             : 
     239             : #define UI int
     240             : #define UU unsigned int
     241             : #define UO lng
     242             : #include "udf_impl.h"
     243             : #undef UI
     244             : #undef UU
     245             : #undef UO
     246             : 
     247             : #ifdef HAVE_HGE
     248             : #define UI lng
     249             : #define UU ulng
     250             : #define UO hge
     251             : #include "udf_impl.h"
     252             : #undef UI
     253             : #undef UU
     254             : #undef UO
     255             : #endif
     256             : 
     257             : /* BAT fuse */
     258             : 
     259             : /* actual implementation */
     260             : static char *
     261           8 : UDFBATfuse_(BAT **ret, BAT *bone, BAT *btwo)
     262             : {
     263           8 :         BAT *bres = NULL;
     264           8 :         bit two_tail_sorted_unsigned = FALSE;
     265           8 :         bit two_tail_revsorted_unsigned = FALSE;
     266           8 :         BUN n;
     267           8 :         char *msg = NULL;
     268             : 
     269             :         /* assert calling sanity */
     270           8 :         assert(ret != NULL);
     271             : 
     272             :         /* handle NULL pointer */
     273           8 :         if (bone == NULL || btwo == NULL)
     274           0 :                 throw(MAL, "batudf.fuse",  SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     275             : 
     276             :         /* check for aligned heads */
     277           8 :         if (BATcount(bone) != BATcount(btwo) ||
     278           8 :             bone->hseqbase != btwo->hseqbase) {
     279           0 :                 throw(MAL, "batudf.fuse",
     280             :                       "heads of input BATs must be aligned");
     281             :         }
     282           8 :         n = BATcount(bone);
     283             : 
     284             :         /* check tail types */
     285           8 :         if (bone->ttype != btwo->ttype) {
     286           0 :                 throw(MAL, "batudf.fuse",
     287             :                       "tails of input BATs must be identical");
     288             :         }
     289             : 
     290             :         /* allocate result BAT */
     291           8 :         switch (bone->ttype) {
     292           2 :         case TYPE_bte:
     293           2 :                 bres = COLnew(bone->hseqbase, TYPE_sht, n, TRANSIENT);
     294           2 :                 break;
     295           2 :         case TYPE_sht:
     296           2 :                 bres = COLnew(bone->hseqbase, TYPE_int, n, TRANSIENT);
     297           2 :                 break;
     298           4 :         case TYPE_int:
     299           4 :                 bres = COLnew(bone->hseqbase, TYPE_lng, n, TRANSIENT);
     300           4 :                 break;
     301             : #ifdef HAVE_HGE
     302           0 :         case TYPE_lng:
     303           0 :                 bres = COLnew(bone->hseqbase, TYPE_hge, n, TRANSIENT);
     304           0 :                 break;
     305             : #endif
     306           0 :         default:
     307           0 :                 throw(MAL, "batudf.fuse",
     308             :                       "tails of input BATs must be one of {bte, sht, int"
     309             : #ifdef HAVE_HGE
     310             :                       ", lng"
     311             : #endif
     312             :                       "}");
     313             :         }
     314           8 :         if (bres == NULL)
     315           0 :                 throw(MAL, "batudf.fuse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     316             : 
     317             :         /* call type-specific core algorithm */
     318           8 :         switch (bone->ttype) {
     319           2 :         case TYPE_bte:
     320           2 :                 msg = UDFBATfuse_bte_sht ( bres, bone, btwo, n,
     321             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     322           2 :                 break;
     323           2 :         case TYPE_sht:
     324           2 :                 msg = UDFBATfuse_sht_int ( bres, bone, btwo, n,
     325             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     326           2 :                 break;
     327           4 :         case TYPE_int:
     328           4 :                 msg = UDFBATfuse_int_lng ( bres, bone, btwo, n,
     329             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     330           4 :                 break;
     331             : #ifdef HAVE_HGE
     332           0 :         case TYPE_lng:
     333           0 :                 msg = UDFBATfuse_lng_hge ( bres, bone, btwo, n,
     334             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     335           0 :                 break;
     336             : #endif
     337           0 :         default:
     338           0 :                 BBPunfix(bres->batCacheid);
     339           0 :                 throw(MAL, "batudf.fuse",
     340             :                       "tails of input BATs must be one of {bte, sht, int"
     341             : #ifdef HAVE_HGE
     342             :                       ", lng"
     343             : #endif
     344             :                       "}");
     345             :         }
     346             : 
     347           8 :         if (msg != MAL_SUCCEED) {
     348           0 :                 BBPunfix(bres->batCacheid);
     349             :         } else {
     350             :                 /* set number of tuples in result BAT */
     351           8 :                 BATsetcount(bres, n);
     352             : 
     353             :                 /* Result tail is sorted, if the left/first input tail is
     354             :                  * sorted and key (unique), or if the left/first input tail is
     355             :                  * sorted and the second/right input tail is sorted and the
     356             :                  * second/right tail values are either all >= 0 or all < 0;
     357             :                  * otherwise, we cannot tell.
     358             :                  */
     359           8 :                 if (bone->tsorted
     360           8 :                     && (BATtkey(bone) || two_tail_sorted_unsigned))
     361           6 :                         bres->tsorted = true;
     362             :                 else
     363           2 :                         bres->tsorted = (BATcount(bres) <= 1);
     364           8 :                 if (bone->trevsorted
     365           0 :                     && (BATtkey(bone) || two_tail_revsorted_unsigned))
     366           0 :                         bres->trevsorted = true;
     367             :                 else
     368           8 :                         bres->trevsorted = (BATcount(bres) <= 1);
     369             :                 /* result tail is key (unique), iff both input tails are */
     370          14 :                 BATkey(bres, BATtkey(bone) || BATtkey(btwo));
     371             : 
     372           8 :                 *ret = bres;
     373             :         }
     374             : 
     375             :         return msg;
     376             : }
     377             : 
     378             : /* MAL wrapper */
     379             : char *
     380           8 : UDFBATfuse(bat *ires, const bat *ione, const bat *itwo)
     381             : {
     382           8 :         BAT *bres = NULL, *bone = NULL, *btwo = NULL;
     383           8 :         char *msg = NULL;
     384             : 
     385             :         /* assert calling sanity */
     386           8 :         assert(ires != NULL && ione != NULL && itwo != NULL);
     387             : 
     388             :         /* bat-id -> BAT-descriptor */
     389           8 :         if ((bone = BATdescriptor(*ione)) == NULL)
     390           0 :                 throw(MAL, "batudf.fuse",  SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     391             : 
     392             :         /* bat-id -> BAT-descriptor */
     393           8 :         if ((btwo = BATdescriptor(*itwo)) == NULL) {
     394           0 :                 BBPunfix(bone->batCacheid);
     395           0 :                 throw(MAL, "batudf.fuse",  SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     396             :         }
     397             : 
     398             :         /* do the work */
     399           8 :         msg = UDFBATfuse_ ( &bres, bone, btwo );
     400             : 
     401             :         /* release input BAT-descriptors */
     402           8 :         BBPunfix(bone->batCacheid);
     403           8 :         BBPunfix(btwo->batCacheid);
     404             : 
     405           8 :         if (msg == MAL_SUCCEED) {
     406             :                 /* register result BAT in buffer pool */
     407           8 :                 *ires = bres->batCacheid;
     408           8 :                 BBPkeepref(bres);
     409             :         }
     410             : 
     411             :         return msg;
     412             : }
     413             : 
     414             : #include "mel.h"
     415             : static mel_func udf_init_funcs[] = {
     416             :  command("udf", "reverse", UDFreverse, false, "Reverse a string", args(1,2, arg("",str),arg("ra1",str))),
     417             :  command("batudf", "reverse", UDFBATreverse, false, "Reverse a BAT of strings", args(1,2, batarg("",str),batarg("b",str))),
     418             :  command("udf", "fuse", UDFfuse_bte_sht, false, "fuse two (1-byte) bte values into one (2-byte) sht value", args(1,3, arg("",sht),arg("one",bte),arg("two",bte))),
     419             :  command("udf", "fuse", UDFfuse_sht_int, false, "fuse two (2-byte) sht values into one (4-byte) int value", args(1,3, arg("",int),arg("one",sht),arg("two",sht))),
     420             :  command("udf", "fuse", UDFfuse_int_lng, false, "fuse two (4-byte) int values into one (8-byte) lng value", args(1,3, arg("",lng),arg("one",int),arg("two",int))),
     421             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (1-byte) bte values into one (2-byte) sht value", args(1,3, batarg("",sht),batarg("one",bte),batarg("two",bte))),
     422             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (2-byte) sht values into one (4-byte) int value", args(1,3, batarg("",int),batarg("one",sht),batarg("two",sht))),
     423             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (4-byte) int values into one (8-byte) lng value", args(1,3, batarg("",lng),batarg("one",int),batarg("two",int))),
     424             : #ifdef HAVE_HGE
     425             :  command("udf", "fuse", UDFfuse_lng_hge, false, "fuse two (8-byte) lng values into one (16-byte) hge value", args(1,3, arg("",hge),arg("one",lng),arg("two",lng))),
     426             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (8-byte) lng values into one (16-byte) hge value", args(1,3, batarg("",hge),batarg("one",lng),batarg("two",lng))),
     427             : #endif
     428             :  { .imp=NULL }
     429             : };
     430             : #include "mal_import.h"
     431             : #ifdef _MSC_VER
     432             : #undef read
     433             : #pragma section(".CRT$XCU",read)
     434             : #endif
     435           4 : LIB_STARTUP_FUNC(init_udf_mal)
     436           4 : { mal_module("udf", NULL, udf_init_funcs); }

Generated by: LCOV version 1.14