LCOV - code coverage report
Current view: top level - gdk - gdk_hash.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 57 74 77.0 %
Date: 2024-11-15 19:37:45 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #ifndef _GDK_SEARCH_H_
      14             : #define _GDK_SEARCH_H_
      15             : 
      16             : struct Hash {
      17             :         int type;               /* type of index entity */
      18             :         uint8_t width;          /* width of hash entries */
      19             :         BUN mask1;              /* .mask1 < .nbucket <= .mask2 */
      20             :         BUN mask2;              /* ... both are power-of-two minus one */
      21             :         BUN nbucket;            /* number of valid hash buckets */
      22             :         BUN nunique;            /* number of unique values */
      23             :         BUN nheads;             /* number of chain heads */
      24             :         void *Bckt;             /* hash buckets, points into .heapbckt */
      25             :         void *Link;             /* collision list, points into .heaplink */
      26             :         Heap heaplink;          /* heap where the hash links are stored */
      27             :         Heap heapbckt;          /* heap where the hash buckets are stored */
      28             : };
      29             : 
      30             : static inline BUN
      31   949898382 : HASHbucket(const Hash *h, BUN v)
      32             : {
      33   949898382 :         return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
      34             : }
      35             : 
      36             : gdk_export gdk_return BAThash(BAT *b);
      37             : gdk_export void HASHdestroy(BAT *b);
      38             : gdk_export BUN HASHprobe(const Hash *h, const void *v);
      39             : gdk_export BUN HASHlist(Hash *h, BUN i);
      40             : gdk_export size_t HASHsize(BAT *b);
      41             : 
      42             : #define BUN2 2
      43             : #define BUN4 4
      44             : #if SIZEOF_BUN == 8
      45             : #define BUN8 8
      46             : #endif
      47             : #ifdef BUN2
      48             : typedef uint16_t BUN2type;
      49             : #endif
      50             : typedef uint32_t BUN4type;
      51             : #if SIZEOF_BUN > 4
      52             : typedef uint64_t BUN8type;
      53             : #endif
      54             : #ifdef BUN2
      55             : #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
      56             : #endif
      57             : #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
      58             : #ifdef BUN8
      59             : #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
      60             : #endif
      61             : 
      62             : /* play around with h->Bckt[i] and h->Link[j] */
      63             : 
      64             : static inline void
      65   281181317 : HASHput(Hash *h, BUN i, BUN v)
      66             : {
      67             :         /* if v == BUN_NONE, assigning the value to a BUN2type
      68             :          * etc. automatically converts to BUN2_NONE etc. */
      69   281181317 :         switch (h->width) {
      70             : #ifdef BUN2
      71   172001965 :         case BUN2:
      72   172001965 :                 ((BUN2type *) h->Bckt)[i] = (BUN2type) v;
      73   172001965 :                 break;
      74             : #endif
      75   109179352 :         case BUN4:
      76   109179352 :                 ((BUN4type *) h->Bckt)[i] = (BUN4type) v;
      77   109179352 :                 break;
      78             : #ifdef BUN8
      79           0 :         case BUN8:
      80           0 :                 ((BUN8type *) h->Bckt)[i] = (BUN8type) v;
      81           0 :                 break;
      82             : #endif
      83             :         default:
      84           0 :                 MT_UNREACHABLE();
      85             :         }
      86   281181317 : }
      87             : 
      88             : static inline void
      89   281183521 : HASHputlink(Hash *h, BUN i, BUN v)
      90             : {
      91             :         /* if v == BUN_NONE, assigning the value to a BUN2type
      92             :          * etc. automatically converts to BUN2_NONE etc. */
      93   281183521 :         switch (h->width) {
      94             : #ifdef BUN2
      95   171821290 :         case BUN2:
      96   171821290 :                 assert(v == BUN_NONE || v == BUN2_NONE || v < i);
      97   171821290 :                 ((BUN2type *) h->Link)[i] = (BUN2type) v;
      98   171821290 :                 break;
      99             : #endif
     100   109362231 :         case BUN4:
     101   109362231 :                 assert(v == BUN_NONE || v == BUN4_NONE || v < i);
     102   109362231 :                 ((BUN4type *) h->Link)[i] = (BUN4type) v;
     103   109362231 :                 break;
     104             : #ifdef BUN8
     105           0 :         case BUN8:
     106           0 :                 assert(v == BUN_NONE || v == BUN8_NONE || v < i);
     107           0 :                 ((BUN8type *) h->Link)[i] = (BUN8type) v;
     108           0 :                 break;
     109             : #endif
     110             :         default:
     111           0 :                 MT_UNREACHABLE();
     112             :         }
     113   281183521 : }
     114             : 
     115             : __attribute__((__pure__))
     116             : static inline BUN
     117  1138105039 : HASHget(const Hash *h, BUN i)
     118             : {
     119  1138105039 :         switch (h->width) {
     120             : #ifdef BUN2
     121   669534755 :         case BUN2:
     122   669534755 :                 i = (BUN) ((BUN2type *) h->Bckt)[i];
     123   669534755 :                 return i == BUN2_NONE ? BUN_NONE : i;
     124             : #endif
     125   468570284 :         case BUN4:
     126   468570284 :                 i = (BUN) ((BUN4type *) h->Bckt)[i];
     127   468570284 :                 return i == BUN4_NONE ? BUN_NONE : i;
     128             : #ifdef BUN8
     129           0 :         case BUN8:
     130           0 :                 i = (BUN) ((BUN8type *) h->Bckt)[i];
     131           0 :                 return i == BUN8_NONE ? BUN_NONE : i;
     132             : #endif
     133             :         default:
     134           0 :                 MT_UNREACHABLE();
     135             :         }
     136             : }
     137             : 
     138             : __attribute__((__pure__))
     139             : static inline BUN
     140   334736805 : HASHgetlink(const Hash *h, BUN i)
     141             : {
     142   334736805 :         switch (h->width) {
     143             : #ifdef BUN2
     144   138653691 :         case BUN2:
     145   138653691 :                 i = (BUN) ((BUN2type *) h->Link)[i];
     146   138653691 :                 return i == BUN2_NONE ? BUN_NONE : i;
     147             : #endif
     148   196083114 :         case BUN4:
     149   196083114 :                 i = (BUN) ((BUN4type *) h->Link)[i];
     150   196083114 :                 return i == BUN4_NONE ? BUN_NONE : i;
     151             : #ifdef BUN8
     152           0 :         case BUN8:
     153           0 :                 i = (BUN) ((BUN8type *) h->Link)[i];
     154           0 :                 return i == BUN8_NONE ? BUN_NONE : i;
     155             : #endif
     156             :         default:
     157           0 :                 MT_UNREACHABLE();
     158             :         }
     159             : }
     160             : 
     161             : /* mix_bte(0x80) == 0x80 */
     162             : #define mix_bte(X)      ((unsigned int) (unsigned char) (X))
     163             : /* mix_sht(0x8000) == 0x8000 */
     164             : #define mix_sht(X)      ((unsigned int) (unsigned short) (X))
     165             : /* mix_int(0x81060038) == 0x80000000 */
     166             : #define mix_int(X)      (((unsigned int) (X) >> 7) ^      \
     167             :                          ((unsigned int) (X) >> 13) ^     \
     168             :                          ((unsigned int) (X) >> 21) ^     \
     169             :                          (unsigned int) (X))
     170             : /* mix_lng(0x810600394347424F) == 0x8000000000000000 */
     171             : #define mix_lng(X)      (((ulng) (X) >> 7) ^      \
     172             :                          ((ulng) (X) >> 13) ^     \
     173             :                          ((ulng) (X) >> 21) ^     \
     174             :                          ((ulng) (X) >> 31) ^     \
     175             :                          ((ulng) (X) >> 38) ^     \
     176             :                          ((ulng) (X) >> 46) ^     \
     177             :                          ((ulng) (X) >> 56) ^     \
     178             :                          (ulng) (X))
     179             : #ifdef HAVE_HGE
     180             : /* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
     181             :  * 0x80000000000000000000000000000000 */
     182             : #define mix_hge(X)      (((uhge) (X) >> 7) ^      \
     183             :                          ((uhge) (X) >> 13) ^     \
     184             :                          ((uhge) (X) >> 21) ^     \
     185             :                          ((uhge) (X) >> 31) ^     \
     186             :                          ((uhge) (X) >> 38) ^     \
     187             :                          ((uhge) (X) >> 46) ^     \
     188             :                          ((uhge) (X) >> 56) ^     \
     189             :                          ((uhge) (X) >> 65) ^     \
     190             :                          ((uhge) (X) >> 70) ^     \
     191             :                          ((uhge) (X) >> 78) ^     \
     192             :                          ((uhge) (X) >> 85) ^     \
     193             :                          ((uhge) (X) >> 90) ^     \
     194             :                          ((uhge) (X) >> 98) ^     \
     195             :                          ((uhge) (X) >> 107) ^    \
     196             :                          ((uhge) (X) >> 116) ^    \
     197             :                          (uhge) (X))
     198             : #endif
     199             : #define hash_loc(H,V)   hash_any(H,V)
     200             : #define hash_var(H,V)   hash_any(H,V)
     201             : #define hash_any(H,V)   HASHbucket(H, ATOMhash((H)->type, (V)))
     202             : #define hash_bte(H,V)   (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
     203             : #define hash_sht(H,V)   (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
     204             : #define hash_int(H,V)   HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
     205             : /* XXX return size_t-sized value for 8-byte oid? */
     206             : #define hash_lng(H,V)   HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
     207             : #ifdef HAVE_HGE
     208             : #define hash_hge(H,V)   HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
     209             : #endif
     210             : #if SIZEOF_OID == SIZEOF_INT
     211             : #define hash_oid(H,V)   hash_int(H,V)
     212             : #else
     213             : #define hash_oid(H,V)   hash_lng(H,V)
     214             : #endif
     215             : 
     216             : #define hash_flt(H,V)   HASHbucket(H, ATOMhash(TYPE_flt, (V)))
     217             : #define hash_dbl(H,V)   HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
     218             : 
     219             : __attribute__((__pure__))
     220             : static inline BUN
     221        2166 : mix_uuid(const uuid *u)
     222             : {
     223        2166 :         ulng u1, u2;
     224             : 
     225        2166 :         u1 = (ulng) (uint8_t) u->u[0] << 56 |
     226        2166 :                 (ulng) (uint8_t) u->u[1] << 48 |
     227        2166 :                 (ulng) (uint8_t) u->u[2] << 40 |
     228        2166 :                 (ulng) (uint8_t) u->u[3] << 32 |
     229        2166 :                 (ulng) (uint8_t) u->u[4] << 24 |
     230        2166 :                 (ulng) (uint8_t) u->u[5] << 16 |
     231        2166 :                 (ulng) (uint8_t) u->u[6] << 8 |
     232        2166 :                 (ulng) (uint8_t) u->u[7];
     233        2166 :         u2 = (ulng) (uint8_t) u->u[8] << 56 |
     234        2166 :                 (ulng) (uint8_t) u->u[9] << 48 |
     235        2166 :                 (ulng) (uint8_t) u->u[10] << 40 |
     236        2166 :                 (ulng) (uint8_t) u->u[11] << 32 |
     237        2166 :                 (ulng) (uint8_t) u->u[12] << 24 |
     238        2166 :                 (ulng) (uint8_t) u->u[13] << 16 |
     239        2166 :                 (ulng) (uint8_t) u->u[14] << 8 |
     240        2166 :                 (ulng) (uint8_t) u->u[15];
     241             :         /* we're not using mix_hge since this way we get the same result
     242             :          * on systems with and without 128 bit integer support */
     243        2166 :         return (BUN) (mix_lng(u1) ^ mix_lng(u2));
     244             : }
     245             : #define hash_uuid(H,V)  HASHbucket(H, mix_uuid((const uuid *) (V)))
     246             : 
     247             : /*
     248             :  * @- hash-table supported loop over BUNs The first parameter `bi' is
     249             :  * a BAT iterator, the second (`h') should point to the Hash
     250             :  * structure, and `v' a pointer to an atomic value (corresponding to
     251             :  * the head column of `b'). The 'hb' is an BUN index, pointing out the
     252             :  * `hb'-th BUN.
     253             :  */
     254             : #define HASHloop(bi, h, hb, v)                                  \
     255             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     256             :              hb != BUN_NONE;                                    \
     257             :              hb = HASHgetlink(h, hb))                           \
     258             :                 if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
     259             : #define HASHloop_str(bi, h, hb, v)                              \
     260             :         for (hb = HASHget(h, HASHbucket(h, strHash(v)));        \
     261             :              hb != BUN_NONE;                                    \
     262             :              hb = HASHgetlink(h, hb))                           \
     263             :                 if (strEQ(v, BUNtvar(bi, hb)))
     264             : 
     265             : #define HASHlooploc(bi, h, hb, v)                               \
     266             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     267             :              hb != BUN_NONE;                                    \
     268             :              hb = HASHgetlink(h, hb))                           \
     269             :                 if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
     270             : #define HASHloopvar(bi, h, hb, v)                               \
     271             :         for (hb = HASHget(h, HASHprobe(h, v));                  \
     272             :              hb != BUN_NONE;                                    \
     273             :              hb = HASHgetlink(h, hb))                           \
     274             :                 if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
     275             : 
     276             : #define HASHloop_TYPE(bi, h, hb, v, TYPE)                               \
     277             :         for (hb = HASHget(h, hash_##TYPE(h, v));                        \
     278             :              hb != BUN_NONE;                                            \
     279             :              hb = HASHgetlink(h,hb))                                    \
     280             :                 if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
     281             : 
     282             : /* need to take special care comparing nil floating point values */
     283             : #define HASHloop_fTYPE(bi, h, hb, v, TYPE)                              \
     284             :         for (hb = HASHget(h, hash_##TYPE(h, v));                        \
     285             :              hb != BUN_NONE;                                            \
     286             :              hb = HASHgetlink(h,hb))                                    \
     287             :                 if (is_##TYPE##_nil(* (const TYPE *) (v))               \
     288             :                     ? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
     289             :                     : * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
     290             : 
     291             : #define HASHloop_bte(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, bte)
     292             : #define HASHloop_sht(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, sht)
     293             : #define HASHloop_int(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, int)
     294             : #define HASHloop_lng(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, lng)
     295             : #ifdef HAVE_HGE
     296             : #define HASHloop_hge(bi, h, hb, v)      HASHloop_TYPE(bi, h, hb, v, hge)
     297             : #endif
     298             : #define HASHloop_flt(bi, h, hb, v)      HASHloop_fTYPE(bi, h, hb, v, flt)
     299             : #define HASHloop_dbl(bi, h, hb, v)      HASHloop_fTYPE(bi, h, hb, v, dbl)
     300             : #ifdef HAVE_HGE
     301             : #define HASHloop_uuid(bi, hsh, hb, v)                                   \
     302             :         for (hb = HASHget(hsh, hash_uuid(hsh, v));                      \
     303             :              hb != BUN_NONE;                                            \
     304             :              hb = HASHgetlink(hsh,hb))                                  \
     305             :                 if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
     306             : #else
     307             : #define HASHloop_uuid(bi, h, hb, v)                                     \
     308             :         for (hb = HASHget(h, hash_uuid(h, v));                          \
     309             :              hb != BUN_NONE;                                            \
     310             :              hb = HASHgetlink(h,hb))                                    \
     311             :                 if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
     312             : //              if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
     313             : #endif
     314             : 
     315             : #endif /* _GDK_SEARCH_H_ */

Generated by: LCOV version 1.14