Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #ifndef _GDK_SEARCH_H_
14 : #define _GDK_SEARCH_H_
15 :
16 : struct Hash {
17 : int type; /* type of index entity */
18 : uint8_t width; /* width of hash entries */
19 : BUN mask1; /* .mask1 < .nbucket <= .mask2 */
20 : BUN mask2; /* ... both are power-of-two minus one */
21 : BUN nbucket; /* number of valid hash buckets */
22 : BUN nunique; /* number of unique values */
23 : BUN nheads; /* number of chain heads */
24 : void *Bckt; /* hash buckets, points into .heapbckt */
25 : void *Link; /* collision list, points into .heaplink */
26 : Heap heaplink; /* heap where the hash links are stored */
27 : Heap heapbckt; /* heap where the hash buckets are stored */
28 : };
29 :
30 : static inline BUN
31 989725476 : HASHbucket(const Hash *h, BUN v)
32 : {
33 989725476 : return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
34 : }
35 :
36 : gdk_export gdk_return BAThash(BAT *b);
37 : gdk_export void HASHdestroy(BAT *b);
38 : gdk_export BUN HASHprobe(const Hash *h, const void *v);
39 : gdk_export BUN HASHlist(Hash *h, BUN i);
40 :
41 : #define BUN2 2
42 : #define BUN4 4
43 : #if SIZEOF_BUN == 8
44 : #define BUN8 8
45 : #endif
46 : #ifdef BUN2
47 : typedef uint16_t BUN2type;
48 : #endif
49 : typedef uint32_t BUN4type;
50 : #if SIZEOF_BUN > 4
51 : typedef uint64_t BUN8type;
52 : #endif
53 : #ifdef BUN2
54 : #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
55 : #endif
56 : #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
57 : #ifdef BUN8
58 : #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
59 : #endif
60 :
61 : /* play around with h->Bckt[i] and h->Link[j] */
62 :
63 : static inline void
64 290028658 : HASHput(Hash *h, BUN i, BUN v)
65 : {
66 : /* if v == BUN_NONE, assigning the value to a BUN2type
67 : * etc. automatically converts to BUN2_NONE etc. */
68 290028658 : switch (h->width) {
69 : #ifdef BUN2
70 179136950 : case BUN2:
71 179136950 : ((BUN2type *) h->Bckt)[i] = (BUN2type) v;
72 179136950 : break;
73 : #endif
74 110891708 : case BUN4:
75 110891708 : ((BUN4type *) h->Bckt)[i] = (BUN4type) v;
76 110891708 : break;
77 : #ifdef BUN8
78 0 : case BUN8:
79 0 : ((BUN8type *) h->Bckt)[i] = (BUN8type) v;
80 0 : break;
81 : #endif
82 : default:
83 0 : MT_UNREACHABLE();
84 : }
85 290028658 : }
86 :
87 : static inline void
88 289905423 : HASHputlink(Hash *h, BUN i, BUN v)
89 : {
90 : /* if v == BUN_NONE, assigning the value to a BUN2type
91 : * etc. automatically converts to BUN2_NONE etc. */
92 289905423 : switch (h->width) {
93 : #ifdef BUN2
94 179024647 : case BUN2:
95 179024647 : assert(v == BUN_NONE || v == BUN2_NONE || v < i);
96 179024647 : ((BUN2type *) h->Link)[i] = (BUN2type) v;
97 179024647 : break;
98 : #endif
99 110880776 : case BUN4:
100 110880776 : assert(v == BUN_NONE || v == BUN4_NONE || v < i);
101 110880776 : ((BUN4type *) h->Link)[i] = (BUN4type) v;
102 110880776 : break;
103 : #ifdef BUN8
104 0 : case BUN8:
105 0 : assert(v == BUN_NONE || v == BUN8_NONE || v < i);
106 0 : ((BUN8type *) h->Link)[i] = (BUN8type) v;
107 0 : break;
108 : #endif
109 : default:
110 0 : MT_UNREACHABLE();
111 : }
112 289905423 : }
113 :
114 : static inline BUN __attribute__((__pure__))
115 1195638734 : HASHget(const Hash *h, BUN i)
116 : {
117 1195638734 : switch (h->width) {
118 : #ifdef BUN2
119 697366890 : case BUN2:
120 697366890 : i = (BUN) ((BUN2type *) h->Bckt)[i];
121 697366890 : return i == BUN2_NONE ? BUN_NONE : i;
122 : #endif
123 498271844 : case BUN4:
124 498271844 : i = (BUN) ((BUN4type *) h->Bckt)[i];
125 498271844 : return i == BUN4_NONE ? BUN_NONE : i;
126 : #ifdef BUN8
127 0 : case BUN8:
128 0 : i = (BUN) ((BUN8type *) h->Bckt)[i];
129 0 : return i == BUN8_NONE ? BUN_NONE : i;
130 : #endif
131 : default:
132 0 : MT_UNREACHABLE();
133 : }
134 : }
135 :
136 : static inline BUN __attribute__((__pure__))
137 308398714 : HASHgetlink(const Hash *h, BUN i)
138 : {
139 308398714 : switch (h->width) {
140 : #ifdef BUN2
141 103789627 : case BUN2:
142 103789627 : i = (BUN) ((BUN2type *) h->Link)[i];
143 103789627 : return i == BUN2_NONE ? BUN_NONE : i;
144 : #endif
145 204609087 : case BUN4:
146 204609087 : i = (BUN) ((BUN4type *) h->Link)[i];
147 204609087 : return i == BUN4_NONE ? BUN_NONE : i;
148 : #ifdef BUN8
149 0 : case BUN8:
150 0 : i = (BUN) ((BUN8type *) h->Link)[i];
151 0 : return i == BUN8_NONE ? BUN_NONE : i;
152 : #endif
153 : default:
154 0 : MT_UNREACHABLE();
155 : }
156 : }
157 :
158 : /* mix_bte(0x80) == 0x80 */
159 : #define mix_bte(X) ((unsigned int) (unsigned char) (X))
160 : /* mix_sht(0x8000) == 0x8000 */
161 : #define mix_sht(X) ((unsigned int) (unsigned short) (X))
162 : /* mix_int(0x81060038) == 0x80000000 */
163 : #define mix_int(X) (((unsigned int) (X) >> 7) ^ \
164 : ((unsigned int) (X) >> 13) ^ \
165 : ((unsigned int) (X) >> 21) ^ \
166 : (unsigned int) (X))
167 : /* mix_lng(0x810600394347424F) == 0x8000000000000000 */
168 : #define mix_lng(X) (((ulng) (X) >> 7) ^ \
169 : ((ulng) (X) >> 13) ^ \
170 : ((ulng) (X) >> 21) ^ \
171 : ((ulng) (X) >> 31) ^ \
172 : ((ulng) (X) >> 38) ^ \
173 : ((ulng) (X) >> 46) ^ \
174 : ((ulng) (X) >> 56) ^ \
175 : (ulng) (X))
176 : #ifdef HAVE_HGE
177 : /* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
178 : * 0x80000000000000000000000000000000 */
179 : #define mix_hge(X) (((uhge) (X) >> 7) ^ \
180 : ((uhge) (X) >> 13) ^ \
181 : ((uhge) (X) >> 21) ^ \
182 : ((uhge) (X) >> 31) ^ \
183 : ((uhge) (X) >> 38) ^ \
184 : ((uhge) (X) >> 46) ^ \
185 : ((uhge) (X) >> 56) ^ \
186 : ((uhge) (X) >> 65) ^ \
187 : ((uhge) (X) >> 70) ^ \
188 : ((uhge) (X) >> 78) ^ \
189 : ((uhge) (X) >> 85) ^ \
190 : ((uhge) (X) >> 90) ^ \
191 : ((uhge) (X) >> 98) ^ \
192 : ((uhge) (X) >> 107) ^ \
193 : ((uhge) (X) >> 116) ^ \
194 : (uhge) (X))
195 : #endif
196 : #define hash_loc(H,V) hash_any(H,V)
197 : #define hash_var(H,V) hash_any(H,V)
198 : #define hash_any(H,V) HASHbucket(H, ATOMhash((H)->type, (V)))
199 : #define hash_bte(H,V) (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
200 : #define hash_sht(H,V) (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
201 : #define hash_int(H,V) HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
202 : /* XXX return size_t-sized value for 8-byte oid? */
203 : #define hash_lng(H,V) HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
204 : #ifdef HAVE_HGE
205 : #define hash_hge(H,V) HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
206 : #endif
207 : #if SIZEOF_OID == SIZEOF_INT
208 : #define hash_oid(H,V) hash_int(H,V)
209 : #else
210 : #define hash_oid(H,V) hash_lng(H,V)
211 : #endif
212 :
213 : #define hash_flt(H,V) HASHbucket(H, ATOMhash(TYPE_flt, (V)))
214 : #define hash_dbl(H,V) HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
215 :
216 : static inline BUN __attribute__((__pure__))
217 2166 : mix_uuid(const uuid *u)
218 : {
219 2166 : ulng u1, u2;
220 :
221 2166 : u1 = (ulng) (uint8_t) u->u[0] << 56 |
222 2166 : (ulng) (uint8_t) u->u[1] << 48 |
223 2166 : (ulng) (uint8_t) u->u[2] << 40 |
224 2166 : (ulng) (uint8_t) u->u[3] << 32 |
225 2166 : (ulng) (uint8_t) u->u[4] << 24 |
226 2166 : (ulng) (uint8_t) u->u[5] << 16 |
227 2166 : (ulng) (uint8_t) u->u[6] << 8 |
228 2166 : (ulng) (uint8_t) u->u[7];
229 2166 : u2 = (ulng) (uint8_t) u->u[8] << 56 |
230 2166 : (ulng) (uint8_t) u->u[9] << 48 |
231 2166 : (ulng) (uint8_t) u->u[10] << 40 |
232 2166 : (ulng) (uint8_t) u->u[11] << 32 |
233 2166 : (ulng) (uint8_t) u->u[12] << 24 |
234 2166 : (ulng) (uint8_t) u->u[13] << 16 |
235 2166 : (ulng) (uint8_t) u->u[14] << 8 |
236 2166 : (ulng) (uint8_t) u->u[15];
237 : /* we're not using mix_hge since this way we get the same result
238 : * on systems with and without 128 bit integer support */
239 2166 : return (BUN) (mix_lng(u1) ^ mix_lng(u2));
240 : }
241 : #define hash_uuid(H,V) HASHbucket(H, mix_uuid((const uuid *) (V)))
242 :
243 : /*
244 : * @- hash-table supported loop over BUNs The first parameter `bi' is
245 : * a BAT iterator, the second (`h') should point to the Hash
246 : * structure, and `v' a pointer to an atomic value (corresponding to
247 : * the head column of `b'). The 'hb' is an BUN index, pointing out the
248 : * `hb'-th BUN.
249 : */
250 : #define HASHloop(bi, h, hb, v) \
251 : for (hb = HASHget(h, HASHprobe(h, v)); \
252 : hb != BUN_NONE; \
253 : hb = HASHgetlink(h, hb)) \
254 : if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
255 : #define HASHloop_str(bi, h, hb, v) \
256 : for (hb = HASHget(h, HASHbucket(h, strHash(v))); \
257 : hb != BUN_NONE; \
258 : hb = HASHgetlink(h, hb)) \
259 : if (strEQ(v, BUNtvar(bi, hb)))
260 :
261 : #define HASHlooploc(bi, h, hb, v) \
262 : for (hb = HASHget(h, HASHprobe(h, v)); \
263 : hb != BUN_NONE; \
264 : hb = HASHgetlink(h, hb)) \
265 : if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
266 : #define HASHloopvar(bi, h, hb, v) \
267 : for (hb = HASHget(h, HASHprobe(h, v)); \
268 : hb != BUN_NONE; \
269 : hb = HASHgetlink(h, hb)) \
270 : if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
271 :
272 : #define HASHloop_TYPE(bi, h, hb, v, TYPE) \
273 : for (hb = HASHget(h, hash_##TYPE(h, v)); \
274 : hb != BUN_NONE; \
275 : hb = HASHgetlink(h,hb)) \
276 : if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
277 :
278 : /* need to take special care comparing nil floating point values */
279 : #define HASHloop_fTYPE(bi, h, hb, v, TYPE) \
280 : for (hb = HASHget(h, hash_##TYPE(h, v)); \
281 : hb != BUN_NONE; \
282 : hb = HASHgetlink(h,hb)) \
283 : if (is_##TYPE##_nil(* (const TYPE *) (v)) \
284 : ? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
285 : : * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
286 :
287 : #define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
288 : #define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
289 : #define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
290 : #define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
291 : #ifdef HAVE_HGE
292 : #define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
293 : #endif
294 : #define HASHloop_flt(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, flt)
295 : #define HASHloop_dbl(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, dbl)
296 : #ifdef HAVE_HGE
297 : #define HASHloop_uuid(bi, hsh, hb, v) \
298 : for (hb = HASHget(hsh, hash_uuid(hsh, v)); \
299 : hb != BUN_NONE; \
300 : hb = HASHgetlink(hsh,hb)) \
301 : if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
302 : #else
303 : #define HASHloop_uuid(bi, h, hb, v) \
304 : for (hb = HASHget(h, hash_uuid(h, v)); \
305 : hb != BUN_NONE; \
306 : hb = HASHgetlink(h,hb)) \
307 : if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
308 : // if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
309 : #endif
310 :
311 : #endif /* _GDK_SEARCH_H_ */
|