LCOV - code coverage report
Current view: top level - gdk - gdk_hash.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 57 74 77.0 %
Date: 2024-12-19 20:05:57 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #ifndef _GDK_SEARCH_H_
      14             : #define _GDK_SEARCH_H_
      15             : 
      16             : struct Hash {
      17             :         int type;               /* type of index entity */
      18             :         uint8_t width;          /* width of hash entries */
      19             :         BUN mask1;              /* .mask1 < .nbucket <= .mask2 */
      20             :         BUN mask2;              /* ... both are power-of-two minus one */
      21             :         BUN nbucket;            /* number of valid hash buckets */
      22             :         BUN nunique;            /* number of unique values */
      23             :         BUN nheads;             /* number of chain heads */
      24             :         void *Bckt;             /* hash buckets, points into .heapbckt */
      25             :         void *Link;             /* collision list, points into .heaplink */
      26             :         Heap heaplink;          /* heap where the hash links are stored */
      27             :         Heap heapbckt;          /* heap where the hash buckets are stored */
      28             : };
      29             : 
      30             : static inline BUN
      31  1128206802 : HASHbucket(const Hash *h, BUN v)
      32             : {
      33  1128206802 :         return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
      34             : }
      35             : 
      36             : gdk_export gdk_return BAThash(BAT *b);
      37             : gdk_export void HASHdestroy(BAT *b);
      38             : gdk_export BUN HASHprobe(const Hash *h, const void *v);
      39             : gdk_export BUN HASHlist(Hash *h, BUN i);
      40             : 
      41             : #define BUN2 2
      42             : #define BUN4 4
      43             : #if SIZEOF_BUN == 8
      44             : #define BUN8 8
      45             : #endif
      46             : #ifdef BUN2
      47             : typedef uint16_t BUN2type;
      48             : #endif
      49             : typedef uint32_t BUN4type;
      50             : #if SIZEOF_BUN > 4
      51             : typedef uint64_t BUN8type;
      52             : #endif
      53             : #ifdef BUN2
      54             : #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
      55             : #endif
      56             : #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
      57             : #ifdef BUN8
      58             : #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
      59             : #endif
      60             : 
      61             : /* play around with h->Bckt[i] and h->Link[j] */
      62             : 
      63             : static inline void
      64   393817504 : HASHput(Hash *h, BUN i, BUN v)
      65             : {
      66             :         /* if v == BUN_NONE, assigning the value to a BUN2type
      67             :          * etc. automatically converts to BUN2_NONE etc. */
      68   393817504 :         switch (h->width) {
      69             : #ifdef BUN2
      70   181696405 :         case BUN2:
      71   181696405 :                 ((BUN2type *) h->Bckt)[i] = (BUN2type) v;
      72   181696405 :                 break;
      73             : #endif
      74   212121099 :         case BUN4:
      75   212121099 :                 ((BUN4type *) h->Bckt)[i] = (BUN4type) v;
      76   212121099 :                 break;
      77             : #ifdef BUN8
      78           0 :         case BUN8:
      79           0 :                 ((BUN8type *) h->Bckt)[i] = (BUN8type) v;
      80           0 :                 break;
      81             : #endif
      82             :         default:
      83           0 :                 MT_UNREACHABLE();
      84             :         }
      85   393817504 : }
      86             : 
      87             : static inline void
      88   393726501 : HASHputlink(Hash *h, BUN i, BUN v)
      89             : {
      90             :         /* if v == BUN_NONE, assigning the value to a BUN2type
      91             :          * etc. automatically converts to BUN2_NONE etc. */
      92   393726501 :         switch (h->width) {
      93             : #ifdef BUN2
      94   181585239 :         case BUN2:
      95   181585239 :                 assert(v == BUN_NONE || v == BUN2_NONE || v < i);
      96   181585239 :                 ((BUN2type *) h->Link)[i] = (BUN2type) v;
      97   181585239 :                 break;
      98             : #endif
      99   212141262 :         case BUN4:
     100   212141262 :                 assert(v == BUN_NONE || v == BUN4_NONE || v < i);
     101   212141262 :                 ((BUN4type *) h->Link)[i] = (BUN4type) v;
     102   212141262 :                 break;
     103             : #ifdef BUN8
     104           0 :         case BUN8:
     105           0 :                 assert(v == BUN_NONE || v == BUN8_NONE || v < i);
     106           0 :                 ((BUN8type *) h->Link)[i] = (BUN8type) v;
     107           0 :                 break;
     108             : #endif
     109             :         default:
     110           0 :                 MT_UNREACHABLE();
     111             :         }
     112   393726501 : }
     113             : 
     114             : static inline BUN __attribute__((__pure__))
     115  1459530520 : HASHget(const Hash *h, BUN i)
     116             : {
     117  1459530520 :         switch (h->width) {
     118             : #ifdef BUN2
     119   706638039 :         case BUN2:
     120   706638039 :                 i = (BUN) ((BUN2type *) h->Bckt)[i];
     121   706638039 :                 return i == BUN2_NONE ? BUN_NONE : i;
     122             : #endif
     123   752892481 :         case BUN4:
     124   752892481 :                 i = (BUN) ((BUN4type *) h->Bckt)[i];
     125   752892481 :                 return i == BUN4_NONE ? BUN_NONE : i;
     126             : #ifdef BUN8
     127           0 :         case BUN8:
     128           0 :                 i = (BUN) ((BUN8type *) h->Bckt)[i];
     129           0 :                 return i == BUN8_NONE ? BUN_NONE : i;
     130             : #endif
     131             :         default:
     132           0 :                 MT_UNREACHABLE();
     133             :         }
     134             : }
     135             : 
     136             : static inline BUN __attribute__((__pure__))
     137   323835215 : HASHgetlink(const Hash *h, BUN i)
     138             : {
     139   323835215 :         switch (h->width) {
     140             : #ifdef BUN2
     141   107682863 :         case BUN2:
     142   107682863 :                 i = (BUN) ((BUN2type *) h->Link)[i];
     143   107682863 :                 return i == BUN2_NONE ? BUN_NONE : i;
     144             : #endif
     145   216152352 :         case BUN4:
     146   216152352 :                 i = (BUN) ((BUN4type *) h->Link)[i];
     147   216152352 :                 return i == BUN4_NONE ? BUN_NONE : i;
     148             : #ifdef BUN8
     149           0 :         case BUN8:
     150           0 :                 i = (BUN) ((BUN8type *) h->Link)[i];
     151           0 :                 return i == BUN8_NONE ? BUN_NONE : i;
     152             : #endif
     153             :         default:
     154           0 :                 MT_UNREACHABLE();
     155             :         }
     156             : }
     157             : 
     158             : /* mix_bte(0x80) == 0x80 */
     159             : #define mix_bte(X)      ((unsigned int) (unsigned char) (X))
     160             : /* mix_sht(0x8000) == 0x8000 */
     161             : #define mix_sht(X)      ((unsigned int) (unsigned short) (X))
     162             : /* mix_int(0x81060038) == 0x80000000 */
     163             : #define mix_int(X)      (((unsigned int) (X) >> 7) ^      \
     164             :                          ((unsigned int) (X) >> 13) ^     \
     165             :                          ((unsigned int) (X) >> 21) ^     \
     166             :                          (unsigned int) (X))
     167             : /* mix_lng(0x810600394347424F) == 0x8000000000000000 */
     168             : #define mix_lng(X)      (((ulng) (X) >> 7) ^      \
     169             :                          ((ulng) (X) >> 13) ^     \
     170             :                          ((ulng) (X) >> 21) ^     \
     171             :                          ((ulng) (X) >> 31) ^     \
     172             :                          ((ulng) (X) >> 38) ^     \
     173             :                          ((ulng) (X) >> 46) ^     \
     174             :                          ((ulng) (X) >> 56) ^     \
     175             :                          (ulng) (X))
     176             : #ifdef HAVE_HGE
     177             : /* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
     178             :  * 0x80000000000000000000000000000000 */
     179             : #define mix_hge(X)      (((uhge) (X) >> 7) ^      \
     180             :                          ((uhge) (X) >> 13) ^     \
     181             :                          ((uhge) (X) >> 21) ^     \
     182             :                          ((uhge) (X) >> 31) ^     \
     183             :                          ((uhge) (X) >> 38) ^     \
     184             :                          ((uhge) (X) >> 46) ^     \
     185             :                          ((uhge) (X) >> 56) ^     \
     186             :                          ((uhge) (X) >> 65) ^     \
     187             :                          ((uhge) (X) >> 70) ^     \
     188             :                          ((uhge) (X) >> 78) ^     \
     189             :                          ((uhge) (X) >> 85) ^     \
     190             :                          ((uhge) (X) >> 90) ^     \
     191             :                          ((uhge) (X) >> 98) ^     \
     192             :                          ((uhge) (X) >> 107) ^    \
     193             :                          ((uhge) (X) >> 116) ^    \
     194             :                          (uhge) (X))
     195             : #endif
     196             : #define hash_loc(H,V)   hash_any(H,V)
     197             : #define hash_var(H,V)   hash_any(H,V)
     198             : #define hash_any(H,V)   HASHbucket(H, ATOMhash((H)->type, (V)))
     199             : #define hash_bte(H,V)   (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
     200             : #define hash_sht(H,V)   (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
     201             : #define hash_int(H,V)   HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
     202             : /* XXX return size_t-sized value for 8-byte oid? */
     203             : #define hash_lng(H,V)   HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
     204             : #ifdef HAVE_HGE
     205             : #define hash_hge(H,V)   HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
     206             : #endif
     207             : #if SIZEOF_OID == SIZEOF_INT
     208             : #define hash_oid(H,V)   hash_int(H,V)
     209             : #else
     210             : #define hash_oid(H,V)   hash_lng(H,V)
     211             : #endif
     212             : 
     213             : #define hash_flt(H,V)   HASHbucket(H, ATOMhash(TYPE_flt, (V)))
     214             : #define hash_dbl(H,V)   HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
     215             : 
     216             : static inline BUN __attribute__((__pure__))
     217        2166 : mix_uuid(const uuid *u)
     218             : {
     219        2166 :         ulng u1, u2;
     220             : 
     221        2166 :         u1 = (ulng) (uint8_t) u->u[0] << 56 |
     222        2166 :                 (ulng) (uint8_t) u->u[1] << 48 |
     223        2166 :                 (ulng) (uint8_t) u->u[2] << 40 |
     224        2166 :                 (ulng) (uint8_t) u->u[3] << 32 |
     225        2166 :                 (ulng) (uint8_t) u->u[4] << 24 |
     226        2166 :                 (ulng) (uint8_t) u->u[5] << 16 |
     227        2166 :                 (ulng) (uint8_t) u->u[6] << 8 |
     228        2166 :                 (ulng) (uint8_t) u->u[7];
     229        2166 :         u2 = (ulng) (uint8_t) u->u[8] << 56 |
     230        2166 :                 (ulng) (uint8_t) u->u[9] << 48 |
     231        2166 :                 (ulng) (uint8_t) u->u[10] << 40 |
     232        2166 :                 (ulng) (uint8_t) u->u[11] << 32 |
     233        2166 :                 (ulng) (uint8_t) u->u[12] << 24 |
     234        2166 :                 (ulng) (uint8_t) u->u[13] << 16 |
     235        2166 :                 (ulng) (uint8_t) u->u[14] << 8 |
     236        2166 :                 (ulng) (uint8_t) u->u[15];
     237             :         /* we're not using mix_hge since this way we get the same result
     238             :          * on systems with and without 128 bit integer support */
     239        2166 :         return (BUN) (mix_lng(u1) ^ mix_lng(u2));
     240             : }
     241             : #define hash_uuid(H,V)  HASHbucket(H, mix_uuid((const uuid *) (V)))
     242             : 
     243             : /*
     244             :  * @- hash-table supported loop over BUNs The first parameter `bi' is
     245             :  * a BAT iterator, the second (`h') should point to the Hash
     246             :  * structure, and `v' a pointer to an atomic value (corresponding to
     247             :  * the head column of `b'). The 'hb' is an BUN index, pointing out the
     248             :  * `hb'-th BUN.
     249             :  */
     250             : #define HASHloop(bi, h, hb, v)                                  \
     251             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     252             :              hb != BUN_NONE;                                    \
     253             :              hb = HASHgetlink(h, hb))                           \
     254             :                 if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
     255             : #define HASHloop_str(bi, h, hb, v)                              \
     256             :         for (hb = HASHget(h, HASHbucket(h, strHash(v)));        \
     257             :              hb != BUN_NONE;                                    \
     258             :              hb = HASHgetlink(h, hb))                           \
     259             :                 if (strEQ(v, BUNtvar(bi, hb)))
     260             : 
     261             : #define HASHlooploc(bi, h, hb, v)                               \
     262             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     263             :              hb != BUN_NONE;                                    \
     264             :              hb = HASHgetlink(h, hb))                           \
     265             :                 if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
     266             : #define HASHloopvar(bi, h, hb, v)                               \
     267             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     268             :              hb != BUN_NONE;                                    \
     269             :              hb = HASHgetlink(h, hb))                           \
     270             :                 if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
     271             : 
     272             : #define HASHloop_TYPE(bi, h, hb, v, TYPE)                               \
     273             :         for (hb = HASHget(h, hash_##TYPE(h, v));                        \
     274             :              hb != BUN_NONE;                                            \
     275             :              hb = HASHgetlink(h,hb))                                    \
     276             :                 if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
     277             : 
     278             : /* need to take special care comparing nil floating point values */
     279             : #define HASHloop_fTYPE(bi, h, hb, v, TYPE)                              \
     280             :         for (hb = HASHget(h, hash_##TYPE(h, v));                        \
     281             :              hb != BUN_NONE;                                            \
     282             :              hb = HASHgetlink(h,hb))                                    \
     283             :                 if (is_##TYPE##_nil(* (const TYPE *) (v))               \
     284             :                     ? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
     285             :                     : * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
     286             : 
     287             : #define HASHloop_bte(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, bte)
     288             : #define HASHloop_sht(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, sht)
     289             : #define HASHloop_int(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, int)
     290             : #define HASHloop_lng(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, lng)
     291             : #ifdef HAVE_HGE
     292             : #define HASHloop_hge(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, hge)
     293             : #endif
     294             : #define HASHloop_flt(bi, h, hb, v)      HASHloop_fTYPE(bi, h, hb, v, flt)
     295             : #define HASHloop_dbl(bi, h, hb, v)      HASHloop_fTYPE(bi, h, hb, v, dbl)
     296             : #ifdef HAVE_HGE
     297             : #define HASHloop_uuid(bi, hsh, hb, v)                                   \
     298             :         for (hb = HASHget(hsh, hash_uuid(hsh, v));                      \
     299             :              hb != BUN_NONE;                                            \
     300             :              hb = HASHgetlink(hsh,hb))                                  \
     301             :                 if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
     302             : #else
     303             : #define HASHloop_uuid(bi, h, hb, v)                                     \
     304             :         for (hb = HASHget(h, hash_uuid(h, v));                          \
     305             :              hb != BUN_NONE;                                            \
     306             :              hb = HASHgetlink(h,hb))                                    \
     307             :                 if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
     308             : //              if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
     309             : #endif
     310             : 
     311             : #endif /* _GDK_SEARCH_H_ */

Generated by: LCOV version 1.14