Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #ifndef _GDK_SEARCH_H_
14 : #define _GDK_SEARCH_H_
15 :
16 : struct Hash {
17 : int type; /* type of index entity */
18 : uint8_t width; /* width of hash entries */
19 : BUN mask1; /* .mask1 < .nbucket <= .mask2 */
20 : BUN mask2; /* ... both are power-of-two minus one */
21 : BUN nbucket; /* number of valid hash buckets */
22 : BUN nunique; /* number of unique values */
23 : BUN nheads; /* number of chain heads */
24 : void *Bckt; /* hash buckets, points into .heapbckt */
25 : void *Link; /* collision list, points into .heaplink */
26 : Heap heaplink; /* heap where the hash links are stored */
27 : Heap heapbckt; /* heap where the hash buckets are stored */
28 : };
29 :
30 : static inline BUN
31 987826434 : HASHbucket(const Hash *h, BUN v)
32 : {
33 987826434 : return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
34 : }
35 :
36 : gdk_export gdk_return BAThash(BAT *b);
37 : gdk_export void HASHdestroy(BAT *b);
38 : gdk_export BUN HASHprobe(const Hash *h, const void *v);
39 : gdk_export BUN HASHlist(Hash *h, BUN i);
40 : gdk_export size_t HASHsize(BAT *b);
41 :
42 : #define BUN2 2
43 : #define BUN4 4
44 : #if SIZEOF_BUN == 8
45 : #define BUN8 8
46 : #endif
47 : #ifdef BUN2
48 : typedef uint16_t BUN2type;
49 : #endif
50 : typedef uint32_t BUN4type;
51 : #if SIZEOF_BUN > 4
52 : typedef uint64_t BUN8type;
53 : #endif
54 : #ifdef BUN2
55 : #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
56 : #endif
57 : #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
58 : #ifdef BUN8
59 : #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
60 : #endif
61 :
62 : /* play around with h->Bckt[i] and h->Link[j] */
63 :
64 : static inline void
65 289671947 : HASHput(Hash *h, BUN i, BUN v)
66 : {
67 : /* if v == BUN_NONE, assigning the value to a BUN2type
68 : * etc. automatically converts to BUN2_NONE etc. */
69 289671947 : switch (h->width) {
70 : #ifdef BUN2
71 177667240 : case BUN2:
72 177667240 : ((BUN2type *) h->Bckt)[i] = (BUN2type) v;
73 177667240 : break;
74 : #endif
75 112004707 : case BUN4:
76 112004707 : ((BUN4type *) h->Bckt)[i] = (BUN4type) v;
77 112004707 : break;
78 : #ifdef BUN8
79 0 : case BUN8:
80 0 : ((BUN8type *) h->Bckt)[i] = (BUN8type) v;
81 0 : break;
82 : #endif
83 : default:
84 0 : MT_UNREACHABLE();
85 : }
86 289671947 : }
87 :
88 : static inline void
89 289559982 : HASHputlink(Hash *h, BUN i, BUN v)
90 : {
91 : /* if v == BUN_NONE, assigning the value to a BUN2type
92 : * etc. automatically converts to BUN2_NONE etc. */
93 289559982 : switch (h->width) {
94 : #ifdef BUN2
95 177570506 : case BUN2:
96 177570506 : assert(v == BUN_NONE || v == BUN2_NONE || v < i);
97 177570506 : ((BUN2type *) h->Link)[i] = (BUN2type) v;
98 177570506 : break;
99 : #endif
100 111989476 : case BUN4:
101 111989476 : assert(v == BUN_NONE || v == BUN4_NONE || v < i);
102 111989476 : ((BUN4type *) h->Link)[i] = (BUN4type) v;
103 111989476 : break;
104 : #ifdef BUN8
105 0 : case BUN8:
106 0 : assert(v == BUN_NONE || v == BUN8_NONE || v < i);
107 0 : ((BUN8type *) h->Link)[i] = (BUN8type) v;
108 0 : break;
109 : #endif
110 : default:
111 0 : MT_UNREACHABLE();
112 : }
113 289559982 : }
114 :
115 : __attribute__((__pure__))
116 : static inline BUN
117 1191665206 : HASHget(const Hash *h, BUN i)
118 : {
119 1191665206 : switch (h->width) {
120 : #ifdef BUN2
121 692958737 : case BUN2:
122 692958737 : i = (BUN) ((BUN2type *) h->Bckt)[i];
123 692958737 : return i == BUN2_NONE ? BUN_NONE : i;
124 : #endif
125 498706469 : case BUN4:
126 498706469 : i = (BUN) ((BUN4type *) h->Bckt)[i];
127 498706469 : return i == BUN4_NONE ? BUN_NONE : i;
128 : #ifdef BUN8
129 0 : case BUN8:
130 0 : i = (BUN) ((BUN8type *) h->Bckt)[i];
131 0 : return i == BUN8_NONE ? BUN_NONE : i;
132 : #endif
133 : default:
134 0 : MT_UNREACHABLE();
135 : }
136 : }
137 :
138 : __attribute__((__pure__))
139 : static inline BUN
140 393925806 : HASHgetlink(const Hash *h, BUN i)
141 : {
142 393925806 : switch (h->width) {
143 : #ifdef BUN2
144 190079986 : case BUN2:
145 190079986 : i = (BUN) ((BUN2type *) h->Link)[i];
146 190079986 : return i == BUN2_NONE ? BUN_NONE : i;
147 : #endif
148 203845820 : case BUN4:
149 203845820 : i = (BUN) ((BUN4type *) h->Link)[i];
150 203845820 : return i == BUN4_NONE ? BUN_NONE : i;
151 : #ifdef BUN8
152 0 : case BUN8:
153 0 : i = (BUN) ((BUN8type *) h->Link)[i];
154 0 : return i == BUN8_NONE ? BUN_NONE : i;
155 : #endif
156 : default:
157 0 : MT_UNREACHABLE();
158 : }
159 : }
160 :
161 : /* mix_bte(0x80) == 0x80 */
162 : #define mix_bte(X) ((unsigned int) (unsigned char) (X))
163 : /* mix_sht(0x8000) == 0x8000 */
164 : #define mix_sht(X) ((unsigned int) (unsigned short) (X))
165 : /* mix_int(0x81060038) == 0x80000000 */
166 : #define mix_int(X) (((unsigned int) (X) >> 7) ^ \
167 : ((unsigned int) (X) >> 13) ^ \
168 : ((unsigned int) (X) >> 21) ^ \
169 : (unsigned int) (X))
170 : /* mix_lng(0x810600394347424F) == 0x8000000000000000 */
171 : #define mix_lng(X) (((ulng) (X) >> 7) ^ \
172 : ((ulng) (X) >> 13) ^ \
173 : ((ulng) (X) >> 21) ^ \
174 : ((ulng) (X) >> 31) ^ \
175 : ((ulng) (X) >> 38) ^ \
176 : ((ulng) (X) >> 46) ^ \
177 : ((ulng) (X) >> 56) ^ \
178 : (ulng) (X))
179 : #ifdef HAVE_HGE
180 : /* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
181 : * 0x80000000000000000000000000000000 */
182 : #define mix_hge(X) (((uhge) (X) >> 7) ^ \
183 : ((uhge) (X) >> 13) ^ \
184 : ((uhge) (X) >> 21) ^ \
185 : ((uhge) (X) >> 31) ^ \
186 : ((uhge) (X) >> 38) ^ \
187 : ((uhge) (X) >> 46) ^ \
188 : ((uhge) (X) >> 56) ^ \
189 : ((uhge) (X) >> 65) ^ \
190 : ((uhge) (X) >> 70) ^ \
191 : ((uhge) (X) >> 78) ^ \
192 : ((uhge) (X) >> 85) ^ \
193 : ((uhge) (X) >> 90) ^ \
194 : ((uhge) (X) >> 98) ^ \
195 : ((uhge) (X) >> 107) ^ \
196 : ((uhge) (X) >> 116) ^ \
197 : (uhge) (X))
198 : #endif
199 : #define hash_loc(H,V) hash_any(H,V)
200 : #define hash_var(H,V) hash_any(H,V)
201 : #define hash_any(H,V) HASHbucket(H, ATOMhash((H)->type, (V)))
202 : #define hash_bte(H,V) (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
203 : #define hash_sht(H,V) (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
204 : #define hash_int(H,V) HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
205 : /* XXX return size_t-sized value for 8-byte oid? */
206 : #define hash_lng(H,V) HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
207 : #ifdef HAVE_HGE
208 : #define hash_hge(H,V) HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
209 : #endif
210 : #if SIZEOF_OID == SIZEOF_INT
211 : #define hash_oid(H,V) hash_int(H,V)
212 : #else
213 : #define hash_oid(H,V) hash_lng(H,V)
214 : #endif
215 :
216 : #define hash_flt(H,V) HASHbucket(H, ATOMhash(TYPE_flt, (V)))
217 : #define hash_dbl(H,V) HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
218 :
219 : __attribute__((__pure__))
220 : static inline BUN
221 2166 : mix_uuid(const uuid *u)
222 : {
223 2166 : ulng u1, u2;
224 :
225 2166 : u1 = (ulng) (uint8_t) u->u[0] << 56 |
226 2166 : (ulng) (uint8_t) u->u[1] << 48 |
227 2166 : (ulng) (uint8_t) u->u[2] << 40 |
228 2166 : (ulng) (uint8_t) u->u[3] << 32 |
229 2166 : (ulng) (uint8_t) u->u[4] << 24 |
230 2166 : (ulng) (uint8_t) u->u[5] << 16 |
231 2166 : (ulng) (uint8_t) u->u[6] << 8 |
232 2166 : (ulng) (uint8_t) u->u[7];
233 2166 : u2 = (ulng) (uint8_t) u->u[8] << 56 |
234 2166 : (ulng) (uint8_t) u->u[9] << 48 |
235 2166 : (ulng) (uint8_t) u->u[10] << 40 |
236 2166 : (ulng) (uint8_t) u->u[11] << 32 |
237 2166 : (ulng) (uint8_t) u->u[12] << 24 |
238 2166 : (ulng) (uint8_t) u->u[13] << 16 |
239 2166 : (ulng) (uint8_t) u->u[14] << 8 |
240 2166 : (ulng) (uint8_t) u->u[15];
241 : /* we're not using mix_hge since this way we get the same result
242 : * on systems with and without 128 bit integer support */
243 2166 : return (BUN) (mix_lng(u1) ^ mix_lng(u2));
244 : }
245 : #define hash_uuid(H,V) HASHbucket(H, mix_uuid((const uuid *) (V)))
246 :
247 : /*
248 : * @- hash-table supported loop over BUNs The first parameter `bi' is
249 : * a BAT iterator, the second (`h') should point to the Hash
250 : * structure, and `v' a pointer to an atomic value (corresponding to
251 : * the head column of `b'). The 'hb' is an BUN index, pointing out the
252 : * `hb'-th BUN.
253 : */
254 : #define HASHloop(bi, h, hb, v) \
255 : for (hb = HASHget(h, HASHprobe(h, v)); \
256 : hb != BUN_NONE; \
257 : hb = HASHgetlink(h, hb)) \
258 : if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
259 : #define HASHloop_str(bi, h, hb, v) \
260 : for (hb = HASHget(h, HASHbucket(h, strHash(v))); \
261 : hb != BUN_NONE; \
262 : hb = HASHgetlink(h, hb)) \
263 : if (strEQ(v, BUNtvar(bi, hb)))
264 :
265 : #define HASHlooploc(bi, h, hb, v) \
266 : for (hb = HASHget(h, HASHprobe(h, v)); \
267 : hb != BUN_NONE; \
268 : hb = HASHgetlink(h, hb)) \
269 : if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
270 : #define HASHloopvar(bi, h, hb, v) \
271 : for (hb = HASHget(h, HASHprobe(h, v)); \
272 : hb != BUN_NONE; \
273 : hb = HASHgetlink(h, hb)) \
274 : if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
275 :
276 : #define HASHloop_TYPE(bi, h, hb, v, TYPE) \
277 : for (hb = HASHget(h, hash_##TYPE(h, v)); \
278 : hb != BUN_NONE; \
279 : hb = HASHgetlink(h,hb)) \
280 : if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
281 :
282 : /* need to take special care comparing nil floating point values */
283 : #define HASHloop_fTYPE(bi, h, hb, v, TYPE) \
284 : for (hb = HASHget(h, hash_##TYPE(h, v)); \
285 : hb != BUN_NONE; \
286 : hb = HASHgetlink(h,hb)) \
287 : if (is_##TYPE##_nil(* (const TYPE *) (v)) \
288 : ? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
289 : : * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
290 :
291 : #define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
292 : #define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
293 : #define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
294 : #define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
295 : #ifdef HAVE_HGE
296 : #define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
297 : #endif
298 : #define HASHloop_flt(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, flt)
299 : #define HASHloop_dbl(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, dbl)
300 : #ifdef HAVE_HGE
301 : #define HASHloop_uuid(bi, hsh, hb, v) \
302 : for (hb = HASHget(hsh, hash_uuid(hsh, v)); \
303 : hb != BUN_NONE; \
304 : hb = HASHgetlink(hsh,hb)) \
305 : if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
306 : #else
307 : #define HASHloop_uuid(bi, h, hb, v) \
308 : for (hb = HASHget(h, hash_uuid(h, v)); \
309 : hb != BUN_NONE; \
310 : hb = HASHgetlink(h,hb)) \
311 : if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
312 : // if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
313 : #endif
314 :
315 : #endif /* _GDK_SEARCH_H_ */
|