LCOV - code coverage report
Current view: top level - gdk - gdk_atoms.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 60 65 92.3 %
Date: 2024-10-04 20:04:04 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #ifndef _GDK_ATOMS_H_
      14             : #define _GDK_ATOMS_H_
      15             : 
      16             : /* atomFromStr returns the number of bytes of the input string that
      17             :  * were processed.  atomToStr returns the length of the string
      18             :  * produced.  Both functions return -1 on (any kind of) failure.  If
      19             :  * *dst is not NULL, *len specifies the available space.  If there is
      20             :  * not enough space, or if *dst is NULL, *dst will be freed (if not
      21             :  * NULL) and a new buffer will be allocated and returned in *dst.
      22             :  * *len will be set to reflect the actual size allocated.  If
      23             :  * allocation fails, *dst will be NULL on return and *len is
      24             :  * undefined.  In any case, if the function returns, *buf is either
      25             :  * NULL or a valid pointer and then *len is the size of the area *buf
      26             :  * points to.
      27             :  *
      28             :  * atomCmp returns a value less than zero/equal to zero/greater than
      29             :  * zer if the first argument points to a values which is deemed
      30             :  * smaller/equal to/larger than the value pointed to by the second
      31             :  * argument.
      32             :  *
      33             :  * atomHash calculates a hash function for the value pointed to by the
      34             :  * argument.
      35             :  */
      36             : 
      37             : #define IDLENGTH        64      /* maximum BAT id length */
      38             : 
      39             : typedef struct {
      40             :         /* simple attributes */
      41             :         char name[IDLENGTH];
      42             :         uint8_t storage;        /* stored as another type? */
      43             :         bool linear;            /* atom can be ordered linearly */
      44             :         uint16_t size;          /* fixed size of atom */
      45             : 
      46             :         /* automatically generated fields */
      47             :         const void *atomNull;   /* global nil value */
      48             : 
      49             :         /* generic (fixed + varsized atom) ADT functions */
      50             :         ssize_t (*atomFromStr) (const char *src, size_t *len, void **dst, bool external);
      51             :         ssize_t (*atomToStr) (char **dst, size_t *len, const void *src, bool external);
      52             :         void *(*atomRead) (void *dst, size_t *dstlen, stream *s, size_t cnt);
      53             :         gdk_return (*atomWrite) (const void *src, stream *s, size_t cnt);
      54             :         int (*atomCmp) (const void *v1, const void *v2);
      55             :         BUN (*atomHash) (const void *v);
      56             : 
      57             :         /* varsized atom-only ADT functions */
      58             :         var_t (*atomPut) (BAT *, var_t *off, const void *src);
      59             :         void (*atomDel) (Heap *, var_t *atom);
      60             :         size_t (*atomLen) (const void *atom);
      61             :         gdk_return (*atomHeap) (Heap *, size_t);
      62             : } atomDesc;
      63             : 
      64             : #define MAXATOMS        128
      65             : 
      66             : gdk_export atomDesc BATatoms[MAXATOMS];
      67             : gdk_export int GDKatomcnt;
      68             : 
      69             : gdk_export int ATOMallocate(const char *nme);
      70             : gdk_export int ATOMindex(const char *nme);
      71             : 
      72             : gdk_export const char *ATOMname(int id);
      73             : gdk_export size_t ATOMlen(int id, const void *v);
      74             : gdk_export void *ATOMnil(int id)
      75             :         __attribute__((__malloc__));
      76             : gdk_export int ATOMprint(int id, const void *val, stream *fd);
      77             : gdk_export char *ATOMformat(int id, const void *val)
      78             :         __attribute__((__warn_unused_result__));
      79             : 
      80             : gdk_export void *ATOMdup(int id, const void *val);
      81             : 
      82             : /*
      83             :  * @- maximum atomic string lengths
      84             :  */
      85             : #define bitStrlen       8
      86             : #define bteStrlen       8
      87             : #define shtStrlen       12
      88             : #define intStrlen       24
      89             : #if SIZEOF_OID == SIZEOF_INT
      90             : #define oidStrlen       24
      91             : #else
      92             : #define oidStrlen       48
      93             : #endif
      94             : #if SIZEOF_PTR == SIZEOF_INT
      95             : #define ptrStrlen       24
      96             : #else
      97             : #define ptrStrlen       48
      98             : #endif
      99             : #define lngStrlen       48
     100             : #ifdef HAVE_HGE
     101             : #define hgeStrlen       96
     102             : #endif
     103             : #define fltStrlen       48
     104             : #define dblStrlen       96
     105             : 
     106             : /*
     107             :  * The system comes with the traditional atomic types: int (4 bytes),
     108             :  * bool(1 byte) and str (variable). In addition, we support the notion
     109             :  * of an OID type, which ensures uniqueness of its members.  This
     110             :  * leads to the following type descriptor table.
     111             :  */
     112             : 
     113             : #ifdef HAVE_HGE
     114             : gdk_export ssize_t hgeFromStr(const char *src, size_t *len, hge **dst, bool external);
     115             : gdk_export ssize_t hgeToStr(str *dst, size_t *len, const hge *src, bool external);
     116             : #endif
     117             : gdk_export ssize_t lngFromStr(const char *src, size_t *len, lng **dst, bool external);
     118             : gdk_export ssize_t lngToStr(str *dst, size_t *len, const lng *src, bool external);
     119             : gdk_export ssize_t intFromStr(const char *src, size_t *len, int **dst, bool external);
     120             : gdk_export ssize_t intToStr(str *dst, size_t *len, const int *src, bool external);
     121             : gdk_export ssize_t batFromStr(const char *src, size_t *len, bat **dst, bool external);
     122             : gdk_export ssize_t batToStr(str *dst, size_t *len, const bat *src, bool external);
     123             : gdk_export ssize_t ptrFromStr(const char *src, size_t *len, ptr **dst, bool external);
     124             : gdk_export ssize_t ptrToStr(str *dst, size_t *len, const ptr *src, bool external);
     125             : gdk_export ssize_t bitFromStr(const char *src, size_t *len, bit **dst, bool external);
     126             : gdk_export ssize_t bitToStr(str *dst, size_t *len, const bit *src, bool external);
     127             : gdk_export ssize_t OIDfromStr(const char *src, size_t *len, oid **dst, bool external);
     128             : gdk_export ssize_t OIDtoStr(str *dst, size_t *len, const oid *src, bool external);
     129             : gdk_export ssize_t shtFromStr(const char *src, size_t *len, sht **dst, bool external);
     130             : gdk_export ssize_t shtToStr(str *dst, size_t *len, const sht *src, bool external);
     131             : gdk_export ssize_t bteFromStr(const char *src, size_t *len, bte **dst, bool external);
     132             : gdk_export ssize_t bteToStr(str *dst, size_t *len, const bte *src, bool external);
     133             : gdk_export ssize_t fltFromStr(const char *src, size_t *len, flt **dst, bool external);
     134             : gdk_export ssize_t fltToStr(str *dst, size_t *len, const flt *src, bool external);
     135             : gdk_export ssize_t dblFromStr(const char *src, size_t *len, dbl **dst, bool external);
     136             : gdk_export ssize_t dblToStr(str *dst, size_t *len, const dbl *src, bool external);
     137             : gdk_export ssize_t GDKstrFromStr(unsigned char *restrict dst, const unsigned char *restrict src, ssize_t len, char quote);
     138             : gdk_export ssize_t strFromStr(const char *restrict src, size_t *restrict len, str *restrict dst, bool external);
     139             : gdk_export size_t escapedStrlen(const char *restrict src, const char *sep1, const char *sep2, int quote);
     140             : gdk_export size_t escapedStr(char *restrict dst, const char *restrict src, size_t dstlen, const char *sep1, const char *sep2, int quote);
     141             : /*
     142             :  * @- nil values
     143             :  * All types have a single value designated as a NIL value. It
     144             :  * designates a missing value and it is ignored (forbidden) in several
     145             :  * primitives.  The current policy is to use the smallest value in any
     146             :  * ordered domain.  The routine atomnil returns a pointer to the nil
     147             :  * value representation.
     148             :  */
     149             : #define GDK_bit_max ((bit) 1)
     150             : #define GDK_bit_min ((bit) 0)
     151             : #define GDK_bte_max ((bte) INT8_MAX)
     152             : #define GDK_bte_min ((bte) INT8_MIN+1)
     153             : #define GDK_sht_max ((sht) INT16_MAX)
     154             : #define GDK_sht_min ((sht) INT16_MIN+1)
     155             : #define GDK_int_max ((int) INT32_MAX)
     156             : #define GDK_int_min ((int) INT32_MIN+1)
     157             : #define GDK_lng_max ((lng) INT64_MAX)
     158             : #define GDK_lng_min ((lng) INT64_MIN+1)
     159             : #ifdef HAVE_HGE
     160             : #define GDK_hge_max ((((hge) 1) << 126) - 1 + (((hge) 1) << 126))
     161             : #define GDK_hge_min (-GDK_hge_max)
     162             : #endif
     163             : #define GDK_flt_max ((flt) FLT_MAX)
     164             : #define GDK_flt_min ((flt) -FLT_MAX)
     165             : #define GDK_dbl_max ((dbl) DBL_MAX)
     166             : #define GDK_dbl_min ((dbl) -DBL_MAX)
     167             : #define GDK_oid_max (((oid) 1 << ((8 * SIZEOF_OID) - 1)) - 1)
     168             : #define GDK_oid_min ((oid) 0)
     169             : /* representation of the nil */
     170             : gdk_export const bte bte_nil;
     171             : gdk_export const sht sht_nil;
     172             : gdk_export const int int_nil;
     173             : #ifdef NAN_CANNOT_BE_USED_AS_INITIALIZER
     174             : /* Definition of NAN is seriously broken on Intel compiler (at least
     175             :  * in some versions), so we work around it. */
     176             : union _flt_nil_t {
     177             :         uint32_t l;
     178             :         flt f;
     179             : };
     180             : gdk_export const union _flt_nil_t _flt_nil_;
     181             : #define flt_nil (_flt_nil_.f)
     182             : union _dbl_nil_t {
     183             :         uint64_t l;
     184             :         dbl d;
     185             : };
     186             : gdk_export const union _dbl_nil_t _dbl_nil_;
     187             : #define dbl_nil (_dbl_nil_.d)
     188             : #else
     189             : gdk_export const flt flt_nil;
     190             : gdk_export const dbl dbl_nil;
     191             : #endif
     192             : gdk_export const lng lng_nil;
     193             : #ifdef HAVE_HGE
     194             : gdk_export const hge hge_nil;
     195             : #endif
     196             : gdk_export const oid oid_nil;
     197             : gdk_export const char str_nil[2];
     198             : gdk_export const ptr ptr_nil;
     199             : gdk_export const uuid uuid_nil;
     200             : 
     201             : /* derived NIL values - OIDDEPEND */
     202             : #define bit_nil ((bit) bte_nil)
     203             : #define bat_nil ((bat) int_nil)
     204             : 
     205             : #define void_nil        oid_nil
     206             : 
     207             : #define is_bit_nil(v)   ((v) == GDK_bte_min-1)
     208             : #define is_bte_nil(v)   ((v) == GDK_bte_min-1)
     209             : #define is_sht_nil(v)   ((v) == GDK_sht_min-1)
     210             : #define is_int_nil(v)   ((v) == GDK_int_min-1)
     211             : #define is_lng_nil(v)   ((v) == GDK_lng_min-1)
     212             : #ifdef HAVE_HGE
     213             : #define is_hge_nil(v)   ((v) == GDK_hge_min-1)
     214             : #endif
     215             : #define is_oid_nil(v)   ((v) == ((oid) 1 << ((8 * SIZEOF_OID) - 1)))
     216             : #define is_flt_nil(v)   isnan(v)
     217             : #define is_dbl_nil(v)   isnan(v)
     218             : #define is_bat_nil(v)   (((v) & 0x7FFFFFFF) == 0) /* v == bat_nil || v == 0 */
     219             : 
     220             : #include <math.h>
     221             : 
     222             : #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && _MSC_VER < 1800
     223             : #include <float.h>
     224             : #define isnan(x)        _isnan(x)
     225             : #define isinf(x)        (_fpclass(x) & (_FPCLASS_NINF | _FPCLASS_PINF))
     226             : #define isfinite(x)     _finite(x)
     227             : #endif
     228             : 
     229             : #ifdef HAVE_HGE
     230             : #define is_uuid_nil(x)  ((x).h == 0)
     231             : #else
     232             : #define is_uuid_nil(x)  (memcmp((x).u, uuid_nil.u, UUID_SIZE) == 0)
     233             : #endif
     234             : 
     235             : #define is_blob_nil(x)  ((x)->nitems == ~(size_t)0)
     236             : 
     237             : /*
     238             :  * @- Derived types
     239             :  * In all algorithms across GDK, you will find switches on the types
     240             :  * (bte, sht, int, flt, dbl, lng, hge, str). They respectively
     241             :  * represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a
     242             :  * 64-bit double, a 64-bit int, a 128-bit int, and a pointer-sized location
     243             :  * of a char-buffer (ended by a zero char).
     244             :  *
     245             :  * In contrast, the types (bit, ptr, bat, oid) are derived types. They
     246             :  * do not occur in the switches. The ATOMstorage macro maps them
     247             :  * respectively onto a @code{ bte}, @code{ int} (pointers are 32-bit),
     248             :  * @code{ int}, and @code{ int}. OIDs are 32-bit.
     249             :  *
     250             :  * This approach makes it tractable to switch to 64-bits OIDs, or to a
     251             :  * fully 64-bits OS easily. One only has to map the @code{ oid} and
     252             :  * @code{ ptr} types to @code{ lng} instead of @code{ int}.
     253             :  *
     254             :  * Derived types mimic their fathers in many ways. They inherit the
     255             :  * @code{ size}, @code{ linear}, and @code{ null}
     256             :  * properties of their father.  The same goes for the
     257             :  * ADT functions HASH, CMP, PUT, NULL, DEL, LEN, and HEAP. So, a
     258             :  * derived type differs in only two ways from its father:
     259             :  * @table @code
     260             :  * @item [string representation]
     261             :  * the only two ADT operations specific for a derived type are FROMSTR
     262             :  * and TOSTR.
     263             :  * @item [identity]
     264             :  * (a @code{ bit} is really of a different type than @code{ bte}). The
     265             :  * set of operations on derived type values or BATs of such types may
     266             :  * differ from the sets of operations on the father type.
     267             :  * @end table
     268             :  */
     269             : /* use "do ... while(0)" so that lhs can safely be used in if statements */
     270             : #define ATOMstorage(t)          BATatoms[t].storage
     271             : #define ATOMsize(t)             BATatoms[t].size
     272             : #define ATOMfromstr(t,s,l,src,ext)      BATatoms[t].atomFromStr(src,l,s,ext)
     273             : #define ATOMnilptr(t)           BATatoms[t].atomNull
     274             : #define ATOMcompare(t)          BATatoms[t].atomCmp
     275             : #define ATOMcmp(t,l,r)          ((*ATOMcompare(t))(l, r))
     276             : #define ATOMhash(t,src)         BATatoms[t].atomHash(src)
     277             : #define ATOMdel(t,hp,src)       do if (BATatoms[t].atomDel) BATatoms[t].atomDel(hp,src); while (0)
     278             : #define ATOMvarsized(t)         (BATatoms[t].atomPut != NULL)
     279             : #define ATOMlinear(t)           BATatoms[t].linear
     280             : #define ATOMtype(t)             ((t) == TYPE_void ? TYPE_oid : (t))
     281             : 
     282             : /* The base type is the storage type if the comparison function, the
     283             :  * hash function, and the nil value are the same as those of the
     284             :  * storage type; otherwise it is the type itself. */
     285             : #define ATOMbasetype(t) ((t) != ATOMstorage(t) &&                       \
     286             :                          ATOMnilptr(t) == ATOMnilptr(ATOMstorage(t)) && \
     287             :                          ATOMcompare(t) == ATOMcompare(ATOMstorage(t)) && \
     288             :                          BATatoms[t].atomHash == BATatoms[ATOMstorage(t)].atomHash ? \
     289             :                          ATOMstorage(t) : (t))
     290             : 
     291             : /*
     292             :  * In case that atoms are added to a bat, their logical reference
     293             :  * count should be incremented (and decremented if deleted). Notice
     294             :  * that BATs with atomic types that have logical references (e.g. BATs
     295             :  * of BATs but also BATs of ODMG odSet) can never be persistent, as
     296             :  * this would make the commit tremendously complicated.
     297             :  */
     298             : 
     299             : static inline gdk_return __attribute__((__warn_unused_result__))
     300   103659017 : ATOMputVAR(BAT *b, var_t *dst, const void *src)
     301             : {
     302   103659017 :         assert(BATatoms[b->ttype].atomPut != NULL);
     303   103659017 :         if ((*BATatoms[b->ttype].atomPut)(b, dst, src) == (var_t) -1)
     304           0 :                 return GDK_FAIL;
     305             :         return GDK_SUCCEED;
     306             : }
     307             : 
     308             : 
     309             : static inline gdk_return __attribute__((__warn_unused_result__))
     310   528183575 : ATOMputFIX(int type, void *dst, const void *src)
     311             : {
     312   528183575 :         assert(BATatoms[type].atomPut == NULL);
     313   528183575 :         switch (ATOMsize(type)) {
     314             :         case 0:         /* void */
     315             :                 break;
     316    36297645 :         case 1:
     317    36297645 :                 * (bte *) dst = * (bte *) src;
     318    36297645 :                 break;
     319    14910734 :         case 2:
     320    14910734 :                 * (sht *) dst = * (sht *) src;
     321    14910734 :                 break;
     322   431980568 :         case 4:
     323   431980568 :                 * (int *) dst = * (int *) src;
     324   431980568 :                 break;
     325    28571369 :         case 8:
     326    28571369 :                 * (lng *) dst = * (lng *) src;
     327    28571369 :                 break;
     328    16423259 :         case 16:
     329             : #ifdef HAVE_HGE
     330    16423259 :                 * (hge *) dst = * (hge *) src;
     331             : #else
     332             :                 * (uuid *) dst = * (uuid *) src;
     333             : #endif
     334    16423259 :                 break;
     335           0 :         default:
     336           0 :                 memcpy(dst, src, ATOMsize(type));
     337           0 :                 break;
     338             :         }
     339   528183575 :         return GDK_SUCCEED;
     340             : }
     341             : 
     342             : static inline gdk_return __attribute__((__warn_unused_result__))
     343     1146130 : ATOMreplaceVAR(BAT *b, var_t *dst, const void *src)
     344             : {
     345     1146130 :         var_t loc = *dst;
     346     1146130 :         int type = b->ttype;
     347             : 
     348     1146130 :         assert(BATatoms[type].atomPut != NULL);
     349     1146130 :         if ((*BATatoms[type].atomPut)(b, &loc, src) == (var_t) -1)
     350             :                 return GDK_FAIL;
     351     1146185 :         ATOMdel(type, b->tvheap, dst);
     352     1146185 :         *dst = loc;
     353     1146185 :         return GDK_SUCCEED;
     354             : }
     355             : 
     356             : /* string heaps:
     357             :  * - strings are 8 byte aligned
     358             :  * - start with a 1024 bucket hash table
     359             :  * - heaps < 64KiB are fully duplicate eliminated with this hash tables
     360             :  * - heaps >= 64KiB are opportunistically (imperfect) duplicate
     361             :  *   eliminated as only the last 128KiB chunk is considered and there
     362             :  *   is no linked list
     363             :  * - buckets and next pointers are unsigned short "indices"
     364             :  * - indices should be multiplied by 8 and takes from ELIMBASE to get
     365             :  *   an offset
     366             :  * Note that a 64KiB chunk of the heap contains at most 8K 8-byte
     367             :  * aligned strings. The 1K bucket list means that in worst load, the
     368             :  * list length is 8 (OK).
     369             :  */
     370             : #define GDK_STRHASHTABLE        (1<<10)   /* 1024 */
     371             : #define GDK_STRHASHMASK         (GDK_STRHASHTABLE-1)
     372             : #define GDK_STRHASHSIZE         (GDK_STRHASHTABLE * sizeof(stridx_t))
     373             : #define GDK_ELIMPOWER           16      /* 64KiB is the threshold */
     374             : #define GDK_ELIMDOUBLES(h)      ((h)->free < GDK_ELIMLIMIT)
     375             : #define GDK_ELIMLIMIT           (1<<GDK_ELIMPOWER)        /* equivalently: ELIMBASE == 0 */
     376             : #define GDK_ELIMBASE(x)         (((x) >> GDK_ELIMPOWER) << GDK_ELIMPOWER)
     377             : #define GDK_VAROFFSET           ((var_t) GDK_STRHASHSIZE)
     378             : 
     379             : /*
     380             :  * @- String Comparison, NILs and UTF-8
     381             :  *
     382             :  * Using the char* type for strings is handy as this is the type of
     383             :  * any constant strings in a C/C++ program. Therefore, MonetDB uses
     384             :  * this definition for str.  However, different compilers and
     385             :  * platforms use either signed or unsigned characters for the char
     386             :  * type.  It is required that string ordering in MonetDB is consistent
     387             :  * over platforms though.
     388             :  *
     389             :  * As for the choice how strings should be ordered, our support for
     390             :  * UTF-8 actually imposes that it should follow 'unsigned char'
     391             :  * doctrine (like in the AIX native compiler). In this semantics,
     392             :  * though we have to take corrective action to ensure that str(nil) is
     393             :  * the smallest value of the domain.
     394             :  */
     395             : static inline bool __attribute__((__pure__))
     396      333767 : strEQ(const char *l, const char *r)
     397             : {
     398      333767 :         return strcmp(l, r) == 0;
     399             : }
     400             : 
     401             : static inline bool __attribute__((__pure__))
     402  1894463890 : strNil(const char *s)
     403             : {
     404  1897752540 :         return s == NULL || (s[0] == '\200' && s[1] == '\0');
     405             : }
     406             : 
     407             : static inline size_t __attribute__((__pure__))
     408    55733751 : strLen(const char *s)
     409             : {
     410   110792767 :         return strNil(s) ? 2 : strlen(s) + 1;
     411             : }
     412             : 
     413             : static inline int __attribute__((__pure__))
     414   513553381 : strCmp(const char *l, const char *r)
     415             : {
     416  1024334355 :         return strNil(r)
     417   265036915 :                 ? !strNil(l)
     418   502208524 :                 : strNil(l) ? -1 : strcmp(l, r);
     419             : }
     420             : 
     421             : static inline size_t
     422   563899897 : VarHeapVal(const void *b, BUN p, int w)
     423             : {
     424   563899897 :         switch (w) {
     425   207812078 :         case 1:
     426   207812078 :                 return (size_t) ((const uint8_t *) b)[p] + GDK_VAROFFSET;
     427    82345321 :         case 2:
     428    82345321 :                 return (size_t) ((const uint16_t *) b)[p] + GDK_VAROFFSET;
     429   225076384 :         case 4:
     430   225076384 :                 return (size_t) ((const uint32_t *) b)[p];
     431             : #if SIZEOF_VAR_T == 8
     432    48666114 :         case 8:
     433    48666114 :                 return (size_t) ((const uint64_t *) b)[p];
     434             : #endif
     435             :         default:
     436           0 :                 MT_UNREACHABLE();
     437             :         }
     438             : }
     439             : 
     440             : static inline BUN __attribute__((__pure__))
     441   221996096 : strHash(const char *key)
     442             : {
     443   221996096 :         BUN y = 0;
     444             : 
     445  4246183577 :         for (BUN i = 0; key[i]; i++) {
     446  4024187481 :                 y += key[i];
     447  4024187481 :                 y += (y << 10);
     448  4024187481 :                 y ^= (y >> 6);
     449             :         }
     450   221996096 :         y += (y << 3);
     451   221996096 :         y ^= (y >> 11);
     452   221996096 :         y += (y << 15);
     453   221996096 :         return y;
     454             : }
     455             : 
     456             : #endif /* _GDK_ATOMS_H_ */

Generated by: LCOV version 1.14