Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #ifndef _GDK_SEARCH_H_
14 : #define _GDK_SEARCH_H_
15 :
16 : struct Hash {
17 : int type; /* type of index entity */
18 : uint8_t width; /* width of hash entries */
19 : BUN mask1; /* .mask1 < .nbucket <= .mask2 */
20 : BUN mask2; /* ... both are power-of-two minus one */
21 : BUN nbucket; /* number of valid hash buckets */
22 : BUN nunique; /* number of unique values */
23 : BUN nheads; /* number of chain heads */
24 : void *Bckt; /* hash buckets, points into .heapbckt */
25 : void *Link; /* collision list, points into .heaplink */
26 : Heap heaplink; /* heap where the hash links are stored */
27 : Heap heapbckt; /* heap where the hash buckets are stored */
28 : };
29 :
30 : static inline BUN
31 961526708 : HASHbucket(const Hash *h, BUN v)
32 : {
33 961526708 : return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
34 : }
35 :
36 : gdk_export gdk_return BAThash(BAT *b);
37 : gdk_export void HASHdestroy(BAT *b);
38 : gdk_export BUN HASHprobe(const Hash *h, const void *v);
39 : gdk_export BUN HASHlist(Hash *h, BUN i);
40 : gdk_export size_t HASHsize(BAT *b);
41 :
42 : #define BUN2 2
43 : #define BUN4 4
44 : #if SIZEOF_BUN == 8
45 : #define BUN8 8
46 : #endif
47 : #ifdef BUN2
48 : typedef uint16_t BUN2type;
49 : #endif
50 : typedef uint32_t BUN4type;
51 : #if SIZEOF_BUN > 4
52 : typedef uint64_t BUN8type;
53 : #endif
54 : #ifdef BUN2
55 : #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
56 : #endif
57 : #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
58 : #ifdef BUN8
59 : #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
60 : #endif
61 :
62 : /* play around with h->Bckt[i] and h->Link[j] */
63 :
64 : static inline void
65 286970359 : HASHput(Hash *h, BUN i, BUN v)
66 : {
67 : /* if v == BUN_NONE, assigning the value to a BUN2type
68 : * etc. automatically converts to BUN2_NONE etc. */
69 286970359 : switch (h->width) {
70 : #ifdef BUN2
71 176906754 : case BUN2:
72 176906754 : ((BUN2type *) h->Bckt)[i] = (BUN2type) v;
73 176906754 : break;
74 : #endif
75 110063605 : case BUN4:
76 110063605 : ((BUN4type *) h->Bckt)[i] = (BUN4type) v;
77 110063605 : break;
78 : #ifdef BUN8
79 0 : case BUN8:
80 0 : ((BUN8type *) h->Bckt)[i] = (BUN8type) v;
81 0 : break;
82 : #endif
83 : default:
84 0 : MT_UNREACHABLE();
85 : }
86 286970359 : }
87 :
88 : static inline void
89 287032563 : HASHputlink(Hash *h, BUN i, BUN v)
90 : {
91 : /* if v == BUN_NONE, assigning the value to a BUN2type
92 : * etc. automatically converts to BUN2_NONE etc. */
93 287032563 : switch (h->width) {
94 : #ifdef BUN2
95 176805805 : case BUN2:
96 176805805 : assert(v == BUN_NONE || v == BUN2_NONE || v < i);
97 176805805 : ((BUN2type *) h->Link)[i] = (BUN2type) v;
98 176805805 : break;
99 : #endif
100 110226758 : case BUN4:
101 110226758 : assert(v == BUN_NONE || v == BUN4_NONE || v < i);
102 110226758 : ((BUN4type *) h->Link)[i] = (BUN4type) v;
103 110226758 : break;
104 : #ifdef BUN8
105 0 : case BUN8:
106 0 : assert(v == BUN_NONE || v == BUN8_NONE || v < i);
107 0 : ((BUN8type *) h->Link)[i] = (BUN8type) v;
108 0 : break;
109 : #endif
110 : default:
111 0 : MT_UNREACHABLE();
112 : }
113 287032563 : }
114 :
115 : static inline BUN __attribute__((__pure__))
116 1156938702 : HASHget(const Hash *h, BUN i)
117 : {
118 1156938702 : switch (h->width) {
119 : #ifdef BUN2
120 683979515 : case BUN2:
121 683979515 : i = (BUN) ((BUN2type *) h->Bckt)[i];
122 683979515 : return i == BUN2_NONE ? BUN_NONE : i;
123 : #endif
124 472959187 : case BUN4:
125 472959187 : i = (BUN) ((BUN4type *) h->Bckt)[i];
126 472959187 : return i == BUN4_NONE ? BUN_NONE : i;
127 : #ifdef BUN8
128 0 : case BUN8:
129 0 : i = (BUN) ((BUN8type *) h->Bckt)[i];
130 0 : return i == BUN8_NONE ? BUN_NONE : i;
131 : #endif
132 : default:
133 0 : MT_UNREACHABLE();
134 : }
135 : }
136 :
137 : static inline BUN __attribute__((__pure__))
138 412574859 : HASHgetlink(const Hash *h, BUN i)
139 : {
140 412574859 : switch (h->width) {
141 : #ifdef BUN2
142 215672281 : case BUN2:
143 215672281 : i = (BUN) ((BUN2type *) h->Link)[i];
144 215672281 : return i == BUN2_NONE ? BUN_NONE : i;
145 : #endif
146 196902578 : case BUN4:
147 196902578 : i = (BUN) ((BUN4type *) h->Link)[i];
148 196902578 : return i == BUN4_NONE ? BUN_NONE : i;
149 : #ifdef BUN8
150 0 : case BUN8:
151 0 : i = (BUN) ((BUN8type *) h->Link)[i];
152 0 : return i == BUN8_NONE ? BUN_NONE : i;
153 : #endif
154 : default:
155 0 : MT_UNREACHABLE();
156 : }
157 : }
158 :
159 : /* mix_bte(0x80) == 0x80 */
160 : #define mix_bte(X) ((unsigned int) (unsigned char) (X))
161 : /* mix_sht(0x8000) == 0x8000 */
162 : #define mix_sht(X) ((unsigned int) (unsigned short) (X))
163 : /* mix_int(0x81060038) == 0x80000000 */
164 : #define mix_int(X) (((unsigned int) (X) >> 7) ^ \
165 : ((unsigned int) (X) >> 13) ^ \
166 : ((unsigned int) (X) >> 21) ^ \
167 : (unsigned int) (X))
168 : /* mix_lng(0x810600394347424F) == 0x8000000000000000 */
169 : #define mix_lng(X) (((ulng) (X) >> 7) ^ \
170 : ((ulng) (X) >> 13) ^ \
171 : ((ulng) (X) >> 21) ^ \
172 : ((ulng) (X) >> 31) ^ \
173 : ((ulng) (X) >> 38) ^ \
174 : ((ulng) (X) >> 46) ^ \
175 : ((ulng) (X) >> 56) ^ \
176 : (ulng) (X))
177 : #ifdef HAVE_HGE
178 : /* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
179 : * 0x80000000000000000000000000000000 */
180 : #define mix_hge(X) (((uhge) (X) >> 7) ^ \
181 : ((uhge) (X) >> 13) ^ \
182 : ((uhge) (X) >> 21) ^ \
183 : ((uhge) (X) >> 31) ^ \
184 : ((uhge) (X) >> 38) ^ \
185 : ((uhge) (X) >> 46) ^ \
186 : ((uhge) (X) >> 56) ^ \
187 : ((uhge) (X) >> 65) ^ \
188 : ((uhge) (X) >> 70) ^ \
189 : ((uhge) (X) >> 78) ^ \
190 : ((uhge) (X) >> 85) ^ \
191 : ((uhge) (X) >> 90) ^ \
192 : ((uhge) (X) >> 98) ^ \
193 : ((uhge) (X) >> 107) ^ \
194 : ((uhge) (X) >> 116) ^ \
195 : (uhge) (X))
196 : #endif
197 : #define hash_loc(H,V) hash_any(H,V)
198 : #define hash_var(H,V) hash_any(H,V)
199 : #define hash_any(H,V) HASHbucket(H, ATOMhash((H)->type, (V)))
200 : #define hash_bte(H,V) (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
201 : #define hash_sht(H,V) (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
202 : #define hash_int(H,V) HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
203 : /* XXX return size_t-sized value for 8-byte oid? */
204 : #define hash_lng(H,V) HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
205 : #ifdef HAVE_HGE
206 : #define hash_hge(H,V) HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
207 : #endif
208 : #if SIZEOF_OID == SIZEOF_INT
209 : #define hash_oid(H,V) hash_int(H,V)
210 : #else
211 : #define hash_oid(H,V) hash_lng(H,V)
212 : #endif
213 :
214 : #define hash_flt(H,V) HASHbucket(H, ATOMhash(TYPE_flt, (V)))
215 : #define hash_dbl(H,V) HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
216 :
217 : static inline BUN __attribute__((__pure__))
218 2166 : mix_uuid(const uuid *u)
219 : {
220 2166 : ulng u1, u2;
221 :
222 2166 : u1 = (ulng) (uint8_t) u->u[0] << 56 |
223 2166 : (ulng) (uint8_t) u->u[1] << 48 |
224 2166 : (ulng) (uint8_t) u->u[2] << 40 |
225 2166 : (ulng) (uint8_t) u->u[3] << 32 |
226 2166 : (ulng) (uint8_t) u->u[4] << 24 |
227 2166 : (ulng) (uint8_t) u->u[5] << 16 |
228 2166 : (ulng) (uint8_t) u->u[6] << 8 |
229 2166 : (ulng) (uint8_t) u->u[7];
230 2166 : u2 = (ulng) (uint8_t) u->u[8] << 56 |
231 2166 : (ulng) (uint8_t) u->u[9] << 48 |
232 2166 : (ulng) (uint8_t) u->u[10] << 40 |
233 2166 : (ulng) (uint8_t) u->u[11] << 32 |
234 2166 : (ulng) (uint8_t) u->u[12] << 24 |
235 2166 : (ulng) (uint8_t) u->u[13] << 16 |
236 2166 : (ulng) (uint8_t) u->u[14] << 8 |
237 2166 : (ulng) (uint8_t) u->u[15];
238 : /* we're not using mix_hge since this way we get the same result
239 : * on systems with and without 128 bit integer support */
240 2166 : return (BUN) (mix_lng(u1) ^ mix_lng(u2));
241 : }
242 : #define hash_uuid(H,V) HASHbucket(H, mix_uuid((const uuid *) (V)))
243 :
244 : /*
245 : * @- hash-table supported loop over BUNs The first parameter `bi' is
246 : * a BAT iterator, the second (`h') should point to the Hash
247 : * structure, and `v' a pointer to an atomic value (corresponding to
248 : * the head column of `b'). The 'hb' is an BUN index, pointing out the
249 : * `hb'-th BUN.
250 : */
251 : #define HASHloop(bi, h, hb, v) \
252 : for (hb = HASHget(h, HASHprobe(h, v)); \
253 : hb != BUN_NONE; \
254 : hb = HASHgetlink(h, hb)) \
255 : if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
256 : #define HASHloop_str(bi, h, hb, v) \
257 : for (hb = HASHget(h, HASHbucket(h, strHash(v))); \
258 : hb != BUN_NONE; \
259 : hb = HASHgetlink(h, hb)) \
260 : if (strEQ(v, BUNtvar(bi, hb)))
261 :
262 : #define HASHlooploc(bi, h, hb, v) \
263 : for (hb = HASHget(h, HASHprobe(h, v)); \
264 : hb != BUN_NONE; \
265 : hb = HASHgetlink(h, hb)) \
266 : if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
267 : #define HASHloopvar(bi, h, hb, v) \
268 : for (hb = HASHget(h, HASHprobe(h, v)); \
269 : hb != BUN_NONE; \
270 : hb = HASHgetlink(h, hb)) \
271 : if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
272 :
273 : #define HASHloop_TYPE(bi, h, hb, v, TYPE) \
274 : for (hb = HASHget(h, hash_##TYPE(h, v)); \
275 : hb != BUN_NONE; \
276 : hb = HASHgetlink(h,hb)) \
277 : if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
278 :
279 : /* need to take special care comparing nil floating point values */
280 : #define HASHloop_fTYPE(bi, h, hb, v, TYPE) \
281 : for (hb = HASHget(h, hash_##TYPE(h, v)); \
282 : hb != BUN_NONE; \
283 : hb = HASHgetlink(h,hb)) \
284 : if (is_##TYPE##_nil(* (const TYPE *) (v)) \
285 : ? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
286 : : * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
287 :
288 : #define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
289 : #define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
290 : #define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
291 : #define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
292 : #ifdef HAVE_HGE
293 : #define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
294 : #endif
295 : #define HASHloop_flt(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, flt)
296 : #define HASHloop_dbl(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, dbl)
297 : #ifdef HAVE_HGE
298 : #define HASHloop_uuid(bi, hsh, hb, v) \
299 : for (hb = HASHget(hsh, hash_uuid(hsh, v)); \
300 : hb != BUN_NONE; \
301 : hb = HASHgetlink(hsh,hb)) \
302 : if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
303 : #else
304 : #define HASHloop_uuid(bi, h, hb, v) \
305 : for (hb = HASHget(h, hash_uuid(h, v)); \
306 : hb != BUN_NONE; \
307 : hb = HASHgetlink(h,hb)) \
308 : if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
309 : // if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
310 : #endif
311 :
312 : #endif /* _GDK_SEARCH_H_ */
|