LCOV - code coverage report
Current view: top level - gdk - gdk_hash.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 57 74 77.0 %
Date: 2024-10-07 21:21:43 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #ifndef _GDK_SEARCH_H_
      14             : #define _GDK_SEARCH_H_
      15             : 
      16             : struct Hash {
      17             :         int type;               /* type of index entity */
      18             :         uint8_t width;          /* width of hash entries */
      19             :         BUN mask1;              /* .mask1 < .nbucket <= .mask2 */
      20             :         BUN mask2;              /* ... both are power-of-two minus one */
      21             :         BUN nbucket;            /* number of valid hash buckets */
      22             :         BUN nunique;            /* number of unique values */
      23             :         BUN nheads;             /* number of chain heads */
      24             :         void *Bckt;             /* hash buckets, points into .heapbckt */
      25             :         void *Link;             /* collision list, points into .heaplink */
      26             :         Heap heaplink;          /* heap where the hash links are stored */
      27             :         Heap heapbckt;          /* heap where the hash buckets are stored */
      28             : };
      29             : 
      30             : static inline BUN
      31   961526708 : HASHbucket(const Hash *h, BUN v)
      32             : {
      33   961526708 :         return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
      34             : }
      35             : 
      36             : gdk_export gdk_return BAThash(BAT *b);
      37             : gdk_export void HASHdestroy(BAT *b);
      38             : gdk_export BUN HASHprobe(const Hash *h, const void *v);
      39             : gdk_export BUN HASHlist(Hash *h, BUN i);
      40             : gdk_export size_t HASHsize(BAT *b);
      41             : 
      42             : #define BUN2 2
      43             : #define BUN4 4
      44             : #if SIZEOF_BUN == 8
      45             : #define BUN8 8
      46             : #endif
      47             : #ifdef BUN2
      48             : typedef uint16_t BUN2type;
      49             : #endif
      50             : typedef uint32_t BUN4type;
      51             : #if SIZEOF_BUN > 4
      52             : typedef uint64_t BUN8type;
      53             : #endif
      54             : #ifdef BUN2
      55             : #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
      56             : #endif
      57             : #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
      58             : #ifdef BUN8
      59             : #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
      60             : #endif
      61             : 
      62             : /* play around with h->Bckt[i] and h->Link[j] */
      63             : 
      64             : static inline void
      65   286970359 : HASHput(Hash *h, BUN i, BUN v)
      66             : {
      67             :         /* if v == BUN_NONE, assigning the value to a BUN2type
      68             :          * etc. automatically converts to BUN2_NONE etc. */
      69   286970359 :         switch (h->width) {
      70             : #ifdef BUN2
      71   176906754 :         case BUN2:
      72   176906754 :                 ((BUN2type *) h->Bckt)[i] = (BUN2type) v;
      73   176906754 :                 break;
      74             : #endif
      75   110063605 :         case BUN4:
      76   110063605 :                 ((BUN4type *) h->Bckt)[i] = (BUN4type) v;
      77   110063605 :                 break;
      78             : #ifdef BUN8
      79           0 :         case BUN8:
      80           0 :                 ((BUN8type *) h->Bckt)[i] = (BUN8type) v;
      81           0 :                 break;
      82             : #endif
      83             :         default:
      84           0 :                 MT_UNREACHABLE();
      85             :         }
      86   286970359 : }
      87             : 
      88             : static inline void
      89   287032563 : HASHputlink(Hash *h, BUN i, BUN v)
      90             : {
      91             :         /* if v == BUN_NONE, assigning the value to a BUN2type
      92             :          * etc. automatically converts to BUN2_NONE etc. */
      93   287032563 :         switch (h->width) {
      94             : #ifdef BUN2
      95   176805805 :         case BUN2:
      96   176805805 :                 assert(v == BUN_NONE || v == BUN2_NONE || v < i);
      97   176805805 :                 ((BUN2type *) h->Link)[i] = (BUN2type) v;
      98   176805805 :                 break;
      99             : #endif
     100   110226758 :         case BUN4:
     101   110226758 :                 assert(v == BUN_NONE || v == BUN4_NONE || v < i);
     102   110226758 :                 ((BUN4type *) h->Link)[i] = (BUN4type) v;
     103   110226758 :                 break;
     104             : #ifdef BUN8
     105           0 :         case BUN8:
     106           0 :                 assert(v == BUN_NONE || v == BUN8_NONE || v < i);
     107           0 :                 ((BUN8type *) h->Link)[i] = (BUN8type) v;
     108           0 :                 break;
     109             : #endif
     110             :         default:
     111           0 :                 MT_UNREACHABLE();
     112             :         }
     113   287032563 : }
     114             : 
     115             : static inline BUN __attribute__((__pure__))
     116  1156938702 : HASHget(const Hash *h, BUN i)
     117             : {
     118  1156938702 :         switch (h->width) {
     119             : #ifdef BUN2
     120   683979515 :         case BUN2:
     121   683979515 :                 i = (BUN) ((BUN2type *) h->Bckt)[i];
     122   683979515 :                 return i == BUN2_NONE ? BUN_NONE : i;
     123             : #endif
     124   472959187 :         case BUN4:
     125   472959187 :                 i = (BUN) ((BUN4type *) h->Bckt)[i];
     126   472959187 :                 return i == BUN4_NONE ? BUN_NONE : i;
     127             : #ifdef BUN8
     128           0 :         case BUN8:
     129           0 :                 i = (BUN) ((BUN8type *) h->Bckt)[i];
     130           0 :                 return i == BUN8_NONE ? BUN_NONE : i;
     131             : #endif
     132             :         default:
     133           0 :                 MT_UNREACHABLE();
     134             :         }
     135             : }
     136             : 
     137             : static inline BUN __attribute__((__pure__))
     138   412574859 : HASHgetlink(const Hash *h, BUN i)
     139             : {
     140   412574859 :         switch (h->width) {
     141             : #ifdef BUN2
     142   215672281 :         case BUN2:
     143   215672281 :                 i = (BUN) ((BUN2type *) h->Link)[i];
     144   215672281 :                 return i == BUN2_NONE ? BUN_NONE : i;
     145             : #endif
     146   196902578 :         case BUN4:
     147   196902578 :                 i = (BUN) ((BUN4type *) h->Link)[i];
     148   196902578 :                 return i == BUN4_NONE ? BUN_NONE : i;
     149             : #ifdef BUN8
     150           0 :         case BUN8:
     151           0 :                 i = (BUN) ((BUN8type *) h->Link)[i];
     152           0 :                 return i == BUN8_NONE ? BUN_NONE : i;
     153             : #endif
     154             :         default:
     155           0 :                 MT_UNREACHABLE();
     156             :         }
     157             : }
     158             : 
     159             : /* mix_bte(0x80) == 0x80 */
     160             : #define mix_bte(X)      ((unsigned int) (unsigned char) (X))
     161             : /* mix_sht(0x8000) == 0x8000 */
     162             : #define mix_sht(X)      ((unsigned int) (unsigned short) (X))
     163             : /* mix_int(0x81060038) == 0x80000000 */
     164             : #define mix_int(X)      (((unsigned int) (X) >> 7) ^      \
     165             :                          ((unsigned int) (X) >> 13) ^     \
     166             :                          ((unsigned int) (X) >> 21) ^     \
     167             :                          (unsigned int) (X))
     168             : /* mix_lng(0x810600394347424F) == 0x8000000000000000 */
     169             : #define mix_lng(X)      (((ulng) (X) >> 7) ^      \
     170             :                          ((ulng) (X) >> 13) ^     \
     171             :                          ((ulng) (X) >> 21) ^     \
     172             :                          ((ulng) (X) >> 31) ^     \
     173             :                          ((ulng) (X) >> 38) ^     \
     174             :                          ((ulng) (X) >> 46) ^     \
     175             :                          ((ulng) (X) >> 56) ^     \
     176             :                          (ulng) (X))
     177             : #ifdef HAVE_HGE
     178             : /* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
     179             :  * 0x80000000000000000000000000000000 */
     180             : #define mix_hge(X)      (((uhge) (X) >> 7) ^      \
     181             :                          ((uhge) (X) >> 13) ^     \
     182             :                          ((uhge) (X) >> 21) ^     \
     183             :                          ((uhge) (X) >> 31) ^     \
     184             :                          ((uhge) (X) >> 38) ^     \
     185             :                          ((uhge) (X) >> 46) ^     \
     186             :                          ((uhge) (X) >> 56) ^     \
     187             :                          ((uhge) (X) >> 65) ^     \
     188             :                          ((uhge) (X) >> 70) ^     \
     189             :                          ((uhge) (X) >> 78) ^     \
     190             :                          ((uhge) (X) >> 85) ^     \
     191             :                          ((uhge) (X) >> 90) ^     \
     192             :                          ((uhge) (X) >> 98) ^     \
     193             :                          ((uhge) (X) >> 107) ^    \
     194             :                          ((uhge) (X) >> 116) ^    \
     195             :                          (uhge) (X))
     196             : #endif
     197             : #define hash_loc(H,V)   hash_any(H,V)
     198             : #define hash_var(H,V)   hash_any(H,V)
     199             : #define hash_any(H,V)   HASHbucket(H, ATOMhash((H)->type, (V)))
     200             : #define hash_bte(H,V)   (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
     201             : #define hash_sht(H,V)   (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
     202             : #define hash_int(H,V)   HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
     203             : /* XXX return size_t-sized value for 8-byte oid? */
     204             : #define hash_lng(H,V)   HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
     205             : #ifdef HAVE_HGE
     206             : #define hash_hge(H,V)   HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
     207             : #endif
     208             : #if SIZEOF_OID == SIZEOF_INT
     209             : #define hash_oid(H,V)   hash_int(H,V)
     210             : #else
     211             : #define hash_oid(H,V)   hash_lng(H,V)
     212             : #endif
     213             : 
     214             : #define hash_flt(H,V)   HASHbucket(H, ATOMhash(TYPE_flt, (V)))
     215             : #define hash_dbl(H,V)   HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
     216             : 
     217             : static inline BUN __attribute__((__pure__))
     218        2166 : mix_uuid(const uuid *u)
     219             : {
     220        2166 :         ulng u1, u2;
     221             : 
     222        2166 :         u1 = (ulng) (uint8_t) u->u[0] << 56 |
     223        2166 :                 (ulng) (uint8_t) u->u[1] << 48 |
     224        2166 :                 (ulng) (uint8_t) u->u[2] << 40 |
     225        2166 :                 (ulng) (uint8_t) u->u[3] << 32 |
     226        2166 :                 (ulng) (uint8_t) u->u[4] << 24 |
     227        2166 :                 (ulng) (uint8_t) u->u[5] << 16 |
     228        2166 :                 (ulng) (uint8_t) u->u[6] << 8 |
     229        2166 :                 (ulng) (uint8_t) u->u[7];
     230        2166 :         u2 = (ulng) (uint8_t) u->u[8] << 56 |
     231        2166 :                 (ulng) (uint8_t) u->u[9] << 48 |
     232        2166 :                 (ulng) (uint8_t) u->u[10] << 40 |
     233        2166 :                 (ulng) (uint8_t) u->u[11] << 32 |
     234        2166 :                 (ulng) (uint8_t) u->u[12] << 24 |
     235        2166 :                 (ulng) (uint8_t) u->u[13] << 16 |
     236        2166 :                 (ulng) (uint8_t) u->u[14] << 8 |
     237        2166 :                 (ulng) (uint8_t) u->u[15];
     238             :         /* we're not using mix_hge since this way we get the same result
     239             :          * on systems with and without 128 bit integer support */
     240        2166 :         return (BUN) (mix_lng(u1) ^ mix_lng(u2));
     241             : }
     242             : #define hash_uuid(H,V)  HASHbucket(H, mix_uuid((const uuid *) (V)))
     243             : 
     244             : /*
     245             :  * @- hash-table supported loop over BUNs The first parameter `bi' is
     246             :  * a BAT iterator, the second (`h') should point to the Hash
     247             :  * structure, and `v' a pointer to an atomic value (corresponding to
     248             :  * the head column of `b'). The 'hb' is an BUN index, pointing out the
     249             :  * `hb'-th BUN.
     250             :  */
     251             : #define HASHloop(bi, h, hb, v)                                  \
     252             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     253             :              hb != BUN_NONE;                                    \
     254             :              hb = HASHgetlink(h, hb))                           \
     255             :                 if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
     256             : #define HASHloop_str(bi, h, hb, v)                              \
     257             :         for (hb = HASHget(h, HASHbucket(h, strHash(v)));        \
     258             :              hb != BUN_NONE;                                    \
     259             :              hb = HASHgetlink(h, hb))                           \
     260             :                 if (strEQ(v, BUNtvar(bi, hb)))
     261             : 
     262             : #define HASHlooploc(bi, h, hb, v)                               \
     263             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     264             :              hb != BUN_NONE;                                    \
     265             :              hb = HASHgetlink(h, hb))                           \
     266             :                 if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
     267             : #define HASHloopvar(bi, h, hb, v)                               \
     268             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     269             :              hb != BUN_NONE;                                    \
     270             :              hb = HASHgetlink(h, hb))                           \
     271             :                 if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
     272             : 
     273             : #define HASHloop_TYPE(bi, h, hb, v, TYPE)                               \
     274             :         for (hb = HASHget(h, hash_##TYPE(h, v));                        \
     275             :              hb != BUN_NONE;                                            \
     276             :              hb = HASHgetlink(h,hb))                                    \
     277             :                 if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
     278             : 
     279             : /* need to take special care comparing nil floating point values */
     280             : #define HASHloop_fTYPE(bi, h, hb, v, TYPE)                              \
     281             :         for (hb = HASHget(h, hash_##TYPE(h, v));                        \
     282             :              hb != BUN_NONE;                                            \
     283             :              hb = HASHgetlink(h,hb))                                    \
     284             :                 if (is_##TYPE##_nil(* (const TYPE *) (v))               \
     285             :                     ? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
     286             :                     : * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
     287             : 
     288             : #define HASHloop_bte(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, bte)
     289             : #define HASHloop_sht(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, sht)
     290             : #define HASHloop_int(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, int)
     291             : #define HASHloop_lng(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, lng)
     292             : #ifdef HAVE_HGE
     293             : #define HASHloop_hge(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, hge)
     294             : #endif
     295             : #define HASHloop_flt(bi, h, hb, v)      HASHloop_fTYPE(bi, h, hb, v, flt)
     296             : #define HASHloop_dbl(bi, h, hb, v)      HASHloop_fTYPE(bi, h, hb, v, dbl)
     297             : #ifdef HAVE_HGE
     298             : #define HASHloop_uuid(bi, hsh, hb, v)                                   \
     299             :         for (hb = HASHget(hsh, hash_uuid(hsh, v));                      \
     300             :              hb != BUN_NONE;                                            \
     301             :              hb = HASHgetlink(hsh,hb))                                  \
     302             :                 if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
     303             : #else
     304             : #define HASHloop_uuid(bi, h, hb, v)                                     \
     305             :         for (hb = HASHget(h, hash_uuid(h, v));                          \
     306             :              hb != BUN_NONE;                                            \
     307             :              hb = HASHgetlink(h,hb))                                    \
     308             :                 if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
     309             : //              if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
     310             : #endif
     311             : 
     312             : #endif /* _GDK_SEARCH_H_ */

Generated by: LCOV version 1.14