Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : - * @- The Problem
15 : - * When creating a join, we want to make a unique key of the attributes on both
16 : - * sides and then join these keys. Consider the following BATs.
17 : - *
18 : - * @verbatim
19 : - * orders customer link
20 : - * ==================== ===================== ===========
21 : - * zipcode h_nr zipcode hnr oid cid
22 : - * o1 13 9 c1 11 10 o1 c5
23 : - * o2 11 10 c2 11 11 o2 c1
24 : - * o3 11 11 c3 12 2 o3 c2
25 : - * o4 12 5 c4 12 1 o4 nil
26 : - * o5 11 10 c5 13 9 o5 c1
27 : - * o6 12 2 c6 14 7 o6 c3
28 : - * o7 13 9 o7 c5
29 : - * o8 12 1 o8 c4
30 : - * o9 13 9 o9 c5
31 : - * @end verbatim
32 : - *
33 : - * The current approach is designed to take minimal memory, as our previous
34 : - * solutions to the problem did not scale well. In case of singular keys,
35 : - * the link is executed by a simple join. Before going into the join, we
36 : - * make sure the end result size is not too large, which is done by looking
37 : - * at relation sizes (if the other key is unique) or, if that is not possible,
38 : - * by computing the exact join size.
39 : - *
40 : - * The join algorithm was also improved to do dynamic sampling to determine
41 : - * with high accuracy the join size, so that we can alloc in one go a memory
42 : - * region of sufficient size. This also reduces the ds\_link memory requirements.
43 : - *
44 : - * For compound keys, those that consist of multiple attributes, we now compute
45 : - * a derived column that contains an integer hash value derived from all
46 : - * key columns.
47 : - * This is done by computing a hash value for each individual key column
48 : - * and combining those by bitwise XOR and left-rotation. That is, for each
49 : - * column,we rotate the working hash value by N bits and XOR the hash value
50 : - * of the column over it. The working hash value is initialized with zero,
51 : - * and after all columns are processed, this working value is used as output.
52 : - * Computing the hash value for all columns in the key for one table is done
53 : - * by the command hash(). Hence, we do hash on both sides, and join
54 : - * that together with a simple join:
55 : - *
56 : - * @code{join(hash(keys), hash(keys.reverse);}
57 : - *
58 : - * One complication of this procedure are nil values:
59 : - * @table
60 : - * @itemize
61 : - * @item
62 : - * it may happen that the final hash-value (an int formed by a
63 : - * random bit pattern) accidentally has the value of int(nil).
64 : - * Notice that join never matches nil values.
65 : - * Hence these accidental nils must be replaced by a begin value (currently: 0).
66 : - * @item
67 : - * in case any of the compound key values is nil, our nil semantics
68 : - * require us that those tuples may never match on a join. Consequently,
69 : - * during the hash() processing of all compound key columns for computing
70 : - * the hash value, we also maintain a bit-bat that records which tuples had
71 : - * a nil value. The bit-bat is initialized to false, and the results of the
72 : - * nil-check on each column is OR-ed to it.
73 : - * Afterwards, the hash-value of all tuples that have this nil-bit set to
74 : - * TRUE are forced to int(nil), which will exclude them from matching.
75 : - * @end itemize
76 : - *
77 : - * Joining on hash values produces a @emph{superset} of the join result:
78 : - * it may happen that two different key combinations hash on the same value,
79 : - * which will make them match on the join (false hits). The final part
80 : - * of the ds\_link therefore consists of filtering out the false hits.
81 : - * This is done incrementally by joining back the join result to the original
82 : - * columns, incrementally one by one for each pair of corresponding
83 : - * columns. These values are compared with each other and we AND the
84 : - * result of this comparison together for each pair of columns.
85 : - * The bat containing these bits is initialized to all TRUE and serves as
86 : - * final result after all column pairs have been compared.
87 : - * The initial join result is finally filtered with this bit-bat.
88 : - *
89 : - * Joining back from the initial join-result to the original columns on
90 : - * both sides takes quite a lot of memory. For this reason, the false
91 : - * hit-filtering is done in slices (not all tuples at one time).
92 : - * In this way the memory requirements of this phase are kept low.
93 : - * In fact, the most memory demanding part of the join is the int-join
94 : - * on hash number, which takes N*24 bytes (where N= |L| = |R|).
95 : - * In comparison, the previous CTmultigroup/CTmultiderive approach
96 : - * took N*48 bytes. Additionally, by making it possible to use merge-sort,
97 : - * it avoids severe performance degradation (memory thrashing) as produced
98 : - * by the old ds\_link when the inner join relation would be larger than memory.
99 : - *
100 : - * If ds\_link performance is still an issue, the sort-merge join used here
101 : - * could be replaced by partitioned hash-join with radix-cluster/decluster.
102 : - *
103 : - * @+ Implementation
104 : - */
105 :
106 : /*
107 : * (c) Peter Boncz, Stefan Manegold, Niels Nes
108 : *
109 : * new functionality for the low-resource-consumption. It will
110 : * first one by one create a hash value out of the multiple attributes.
111 : * This hash value is computed by xoring and rotating individual hash
112 : * values together. We create a hash and rotate command to do this.
113 : */
114 :
115 : #include "monetdb_config.h"
116 : #include "mal.h"
117 : #include "mal_interpreter.h"
118 : #include "mal_exception.h"
119 :
120 : #define MKEYHASH_bte(valp) ((lng) valp)
121 : #define MKEYHASH_sht(valp) ((lng) valp)
122 : #define MKEYHASH_int(valp) ((lng) valp)
123 : #define MKEYHASH_lng(valp) (valp)
124 : #ifdef HAVE_HGE
125 : #define MKEYHASH_hge(valp) ((lng) (valp >> 64) ^ (lng) (valp))
126 : #endif
127 : #if SIZEOF_OID == SIZEOF_INT
128 : #define MKEYHASH_oid(valp) MKEYHASH_int(valp)
129 : #else
130 : #define MKEYHASH_oid(valp) MKEYHASH_lng(valp)
131 : #endif
132 :
133 : static inline ulng
134 57457192 : GDK_ROTATE(ulng x, int y, int z)
135 : {
136 57457192 : return (x << y) | (x >> z);
137 : }
138 :
139 : static str
140 1038 : MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
141 : {
142 1038 : lng *res = getArgReference_lng(stk, pci, 0);
143 1038 : ptr val = getArgReference(stk, pci, 1);
144 1042 : int tpe = getArgType(mb, pci, 1);
145 :
146 1042 : (void) cntxt;
147 1042 : switch (ATOMstorage(tpe)) {
148 0 : case TYPE_void:
149 0 : *res = lng_nil; /* It can be called from SQL */
150 0 : break;
151 : case TYPE_ptr:
152 : // illegal types, avoid falling into the default case.
153 0 : assert(0);
154 : case TYPE_bte:
155 9 : *res = MKEYHASH_bte((*(bte *) val));
156 9 : break;
157 0 : case TYPE_sht:
158 0 : *res = MKEYHASH_sht((*(sht *) val));
159 0 : break;
160 953 : case TYPE_int:
161 : case TYPE_flt:
162 953 : *res = MKEYHASH_int((*(int *) val));
163 953 : break;
164 9 : case TYPE_lng:
165 : case TYPE_dbl:
166 9 : *res = MKEYHASH_lng((*(lng *) val));
167 9 : break;
168 : #ifdef HAVE_HGE
169 0 : case TYPE_hge:
170 0 : *res = MKEYHASH_hge((*(hge *) val));
171 0 : break;
172 : #endif
173 71 : default:
174 71 : if (ATOMextern(tpe))
175 71 : *res = (lng) ATOMhash(tpe, *(ptr *) val);
176 : else
177 0 : *res = (lng) ATOMhash(tpe, val);
178 : break;
179 : }
180 1042 : return MAL_SUCCEED;
181 : }
182 :
183 : #define MKEYbathashloop(TPE) \
184 : do { \
185 : const TPE *restrict v = (const TPE *) bi.base; \
186 : if (ci.tpe == cand_dense) { \
187 : for (BUN i = 0; i < ci.ncand; i++) { \
188 : oid p = (canditer_next_dense(&ci) - off); \
189 : r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
190 : } \
191 : } else { \
192 : for (BUN i = 0; i < ci.ncand; i++) { \
193 : oid p = (canditer_next(&ci) - off); \
194 : r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
195 : } \
196 : } \
197 : } while (0)
198 :
199 : static str
200 14644 : MKEYbathash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
201 : {
202 14644 : bat *res = getArgReference_bat(stk, pci, 0),
203 14644 : *bid = getArgReference_bat(stk, pci, 1),
204 14644 : *sid1 = pci->argc == 3 ? getArgReference_bat(stk, pci, 2) : NULL;
205 14644 : BAT *bn = NULL, *b = NULL, *bs = NULL;
206 14644 : str msg = MAL_SUCCEED;
207 14644 : struct canditer ci = { 0 };
208 14644 : oid off;
209 14644 : ulng *restrict r;
210 14644 : BATiter bi = { 0 };
211 :
212 14644 : (void) cntxt;
213 14644 : (void) mb;
214 14644 : if (!(b = BATdescriptor(*bid))) {
215 0 : msg = createException(MAL, "batmkey.bathash",
216 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
217 0 : goto bailout;
218 : }
219 14678 : if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
220 0 : msg = createException(MAL, "batmkey.bathash",
221 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
222 0 : goto bailout;
223 : }
224 14678 : canditer_init(&ci, b, bs);
225 14657 : if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
226 0 : msg = createException(MAL, "batmkey.bathash",
227 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
228 0 : goto bailout;
229 : }
230 14704 : off = b->hseqbase;
231 14704 : r = (ulng *) Tloc(bn, 0);
232 :
233 14704 : bi = bat_iterator(b);
234 14599 : switch (ATOMstorage(b->ttype)) {
235 0 : case TYPE_void:{
236 0 : oid o = b->tseqbase;
237 0 : if (is_oid_nil(o)) {
238 0 : for (BUN i = 0; i < ci.ncand; i++) {
239 0 : r[i] = (ulng) lng_nil;
240 : }
241 0 : } else if (ci.tpe == cand_dense) {
242 0 : for (BUN i = 0; i < ci.ncand; i++) {
243 0 : oid p = (canditer_next_dense(&ci) - off);
244 0 : r[i] = o + p;
245 : }
246 : } else {
247 0 : for (BUN i = 0; i < ci.ncand; i++) {
248 0 : oid p = (canditer_next(&ci) - off);
249 0 : r[i] = o + p;
250 : }
251 : }
252 : break;
253 : }
254 34 : case TYPE_bte:
255 1870 : MKEYbathashloop(bte);
256 : break;
257 7 : case TYPE_sht:
258 31498 : MKEYbathashloop(sht);
259 : break;
260 12089 : case TYPE_int:
261 : case TYPE_flt:
262 36638960 : MKEYbathashloop(int);
263 : break;
264 258 : case TYPE_lng:
265 : case TYPE_dbl:
266 16334 : MKEYbathashloop(lng);
267 : break;
268 : #ifdef HAVE_HGE
269 14 : case TYPE_hge:
270 32 : MKEYbathashloop(hge);
271 : break;
272 : #endif
273 2197 : default:{
274 2197 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
275 :
276 2197 : if (ci.tpe == cand_dense) {
277 576153 : for (BUN i = 0; i < ci.ncand; i++) {
278 573971 : oid p = (canditer_next_dense(&ci) - off);
279 573971 : r[i] = (ulng) hash(BUNtail(bi, p));
280 : }
281 : } else {
282 10 : for (BUN i = 0; i < ci.ncand; i++) {
283 10 : oid p = (canditer_next(&ci) - off);
284 0 : r[i] = (ulng) hash(BUNtail(bi, p));
285 : }
286 : }
287 : }
288 : }
289 14584 : bat_iterator_end(&bi);
290 :
291 14588 : bailout:
292 14588 : BBPreclaim(b);
293 14669 : BBPreclaim(bs);
294 14675 : if (bn) {
295 14675 : assert(msg == MAL_SUCCEED);
296 14675 : BATsetcount(bn, ci.ncand);
297 14699 : bn->tnonil = false;
298 14699 : bn->tnil = false;
299 14699 : bn->tkey = BATcount(bn) <= 1;
300 14699 : bn->tsorted = BATcount(bn) <= 1;
301 14699 : bn->trevsorted = BATcount(bn) <= 1;
302 14699 : *res = bn->batCacheid;
303 14699 : BBPkeepref(bn);
304 : }
305 14667 : return msg;
306 : }
307 :
308 : static str
309 1810 : MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
310 : {
311 1810 : lng *res = getArgReference_lng(stk, pci, 0);
312 1810 : ulng h = *getArgReference_lng(stk, pci, 1), val;
313 1810 : int lbit = *getArgReference_int(stk, pci, 2),
314 1810 : rbit = (int) sizeof(lng) * 8 - lbit,
315 1810 : tpe = getArgType(mb, pci, 3);
316 1810 : ptr pval = getArgReference(stk, pci, 3);
317 :
318 1810 : (void) cntxt;
319 1810 : switch (ATOMstorage(tpe)) {
320 8 : case TYPE_bte:
321 8 : val = (ulng) MKEYHASH_bte((*(bte *) pval));
322 8 : break;
323 1 : case TYPE_sht:
324 1 : val = (ulng) MKEYHASH_sht((*(sht *) pval));
325 1 : break;
326 1541 : case TYPE_int:
327 : case TYPE_flt:
328 1541 : val = (ulng) MKEYHASH_int((*(int *) pval));
329 1541 : break;
330 45 : case TYPE_lng:
331 : case TYPE_dbl:
332 45 : val = (ulng) MKEYHASH_lng((*(lng *) pval));
333 45 : break;
334 : #ifdef HAVE_HGE
335 0 : case TYPE_hge:
336 0 : val = (ulng) MKEYHASH_hge((*(hge *) pval));
337 0 : break;
338 : #endif
339 215 : default:
340 215 : if (ATOMextern(tpe))
341 215 : val = (ulng) ATOMhash(tpe, *(ptr *) pval);
342 : else
343 0 : val = (ulng) ATOMhash(tpe, pval);
344 : break;
345 : }
346 1810 : *res = (lng) (GDK_ROTATE(h, lbit, rbit) ^ val);
347 1810 : return MAL_SUCCEED;
348 : }
349 :
350 : #define MKEYbulk_rotate_xor_hashloop(TPE) \
351 : do { \
352 : const ulng *restrict h = (const ulng *) hbi.base; \
353 : const TPE *restrict v = (const TPE *) bi.base; \
354 : if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) { \
355 : for (BUN i = 0; i < ci1.ncand; i++) { \
356 : oid p1 = (canditer_next_dense(&ci1) - off1), p2 = (canditer_next_dense(&ci2) - off2); \
357 : r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
358 : } \
359 : } else { \
360 : for (BUN i = 0; i < ci1.ncand; i++) { \
361 : oid p1 = (canditer_next(&ci1) - off1), p2 = (canditer_next(&ci2) - off2); \
362 : r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
363 : } \
364 : } \
365 : } while (0)
366 :
367 : static str
368 15914 : MKEYbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
369 : InstrPtr pci)
370 : {
371 15914 : bat *res = getArgReference_bat(stk, pci, 0),
372 15914 : *hid = getArgReference_bat(stk, pci, 1),
373 15914 : *bid = getArgReference_bat(stk, pci, 3),
374 15914 : *sid1 = pci->argc == 6 ? getArgReference_bat(stk, pci, 4) : NULL,
375 15914 : *sid2 = pci->argc == 6 ? getArgReference_bat(stk, pci, 5) : NULL;
376 15914 : BAT *hb = NULL, *b = NULL, *bn = NULL, *s1 = NULL, *s2 = NULL;
377 15914 : int lbit = *getArgReference_int(stk, pci, 2),
378 15914 : rbit = (int) sizeof(lng) * 8 - lbit;
379 15914 : str msg = MAL_SUCCEED;
380 15914 : struct canditer ci1 = { 0 }, ci2 = { 0 };
381 15914 : oid off1, off2;
382 15914 : ulng *restrict r;
383 15914 : BATiter hbi = { 0 }, bi = { 0 };
384 :
385 15914 : (void) cntxt;
386 15914 : (void) mb;
387 15914 : if (!(hb = BATdescriptor(*hid))) {
388 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
389 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
390 0 : goto bailout;
391 : }
392 15962 : if (!(b = BATdescriptor(*bid))) {
393 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
394 : SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
395 0 : goto bailout;
396 : }
397 15957 : if (b->ttype == TYPE_msk || mask_cand(b)) {
398 0 : BAT *ob = b;
399 0 : b = BATunmask(b);
400 0 : BBPunfix(ob->batCacheid);
401 0 : if (!b) {
402 0 : BBPunfix(hb->batCacheid);
403 0 : throw(MAL, "batmkey.rotate_xor_hash", GDK_EXCEPTION);
404 : }
405 : }
406 15957 : if ((sid1 && !is_bat_nil(*sid1) && !(s1 = BATdescriptor(*sid1)))
407 15957 : || (sid2 && !is_bat_nil(*sid2) && !(s2 = BATdescriptor(*sid2)))) {
408 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
409 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
410 0 : goto bailout;
411 : }
412 :
413 15957 : canditer_init(&ci1, hb, s1);
414 15961 : canditer_init(&ci2, b, s2);
415 15961 : if (ci2.ncand != ci1.ncand || ci1.hseq != ci2.hseq) {
416 45 : msg = createException(MAL, "batmkey.rotate_xor_hash",
417 : ILLEGAL_ARGUMENT
418 : " Requires bats of identical size");
419 0 : goto bailout;
420 : }
421 :
422 15916 : if (!(bn = COLnew(ci1.hseq, TYPE_lng, ci1.ncand, TRANSIENT))) {
423 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
424 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
425 0 : goto bailout;
426 : }
427 :
428 15951 : r = (ulng *) Tloc(bn, 0);
429 :
430 15951 : off1 = hb->hseqbase;
431 15951 : off2 = b->hseqbase;
432 :
433 15951 : hbi = bat_iterator(hb);
434 15959 : bi = bat_iterator(b);
435 15856 : if (complex_cand(b)) {
436 0 : const ulng *restrict h = (const ulng *) hbi.base;
437 0 : for (BUN i = 0; i < ci1.ncand; i++) {
438 0 : oid p1 = canditer_next(&ci1) - off1;
439 0 : oid p2 = canditer_next(&ci2) - off2;
440 0 : r[i] = GDK_ROTATE(h[p1], lbit,
441 0 : rbit) ^ MKEYHASH_oid(*(oid *) Tpos(&bi, p2));
442 : }
443 : } else {
444 15856 : switch (ATOMstorage(b->ttype)) {
445 202 : case TYPE_bte:
446 23540 : MKEYbulk_rotate_xor_hashloop(bte);
447 : break;
448 29 : case TYPE_sht:
449 2090 : MKEYbulk_rotate_xor_hashloop(sht);
450 : break;
451 9491 : case TYPE_int:
452 : case TYPE_flt:
453 56022672 : MKEYbulk_rotate_xor_hashloop(int);
454 : break;
455 807 : case TYPE_lng:{ /* hb and b areas may overlap, so for this case the 'restrict' keyword cannot be used */
456 807 : const ulng *h = (const ulng *) hbi.base;
457 807 : const lng *v = (const lng *) bi.base;
458 807 : if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
459 306793 : for (BUN i = 0; i < ci1.ncand; i++) {
460 305986 : oid p1 = (canditer_next_dense(&ci1) - off1),
461 305986 : p2 = (canditer_next_dense(&ci2) - off2);
462 305986 : r[i] = GDK_ROTATE(h[p1], lbit,
463 305986 : rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
464 : }
465 : } else {
466 0 : for (BUN i = 0; i < ci1.ncand; i++) {
467 0 : oid p1 = (canditer_next(&ci1) - off1),
468 0 : p2 = (canditer_next(&ci2) - off2);
469 0 : r[i] = GDK_ROTATE(h[p1], lbit,
470 0 : rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
471 : }
472 : }
473 : break;
474 : }
475 54 : case TYPE_dbl:
476 262 : MKEYbulk_rotate_xor_hashloop(lng);
477 : break;
478 : #ifdef HAVE_HGE
479 4 : case TYPE_hge:
480 14 : MKEYbulk_rotate_xor_hashloop(hge);
481 : break;
482 : #endif
483 5269 : default:{
484 5269 : const ulng *restrict h = (const ulng *) hbi.base;
485 5269 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
486 :
487 5269 : if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
488 1115851 : for (BUN i = 0; i < ci1.ncand; i++) {
489 1110598 : oid p1 = (canditer_next_dense(&ci1) - off1),
490 1110598 : p2 = (canditer_next_dense(&ci2) - off2);
491 2221180 : r[i] = GDK_ROTATE(h[p1], lbit,
492 1110598 : rbit) ^ (ulng) hash(BUNtail(bi, p2));
493 : }
494 : } else {
495 0 : for (BUN i = 0; i < ci1.ncand; i++) {
496 0 : oid p1 = (canditer_next(&ci1) - off1),
497 0 : p2 = (canditer_next(&ci2) - off2);
498 0 : r[i] = GDK_ROTATE(h[p1], lbit,
499 0 : rbit) ^ (ulng) hash(BUNtail(bi, p2));
500 : }
501 : }
502 : break;
503 : }
504 : }
505 : }
506 15840 : bat_iterator_end(&hbi);
507 15886 : bat_iterator_end(&bi);
508 :
509 15956 : bailout:
510 15956 : BBPreclaim(b);
511 15938 : BBPreclaim(hb);
512 15912 : BBPreclaim(s1);
513 15907 : BBPreclaim(s2);
514 15924 : if (bn) {
515 15924 : assert(msg == MAL_SUCCEED);
516 15924 : BATsetcount(bn, ci1.ncand);
517 15958 : bn->tnonil = false;
518 15958 : bn->tnil = false;
519 15958 : bn->tkey = BATcount(bn) <= 1;
520 15958 : bn->tsorted = BATcount(bn) <= 1;
521 15958 : bn->trevsorted = BATcount(bn) <= 1;
522 15958 : *res = bn->batCacheid;
523 15958 : BBPkeepref(bn);
524 : }
525 : return msg;
526 : }
527 :
528 : static str
529 0 : MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
530 : InstrPtr pci)
531 : {
532 0 : bat *res = getArgReference_bat(stk, pci, 0),
533 0 : *hid = getArgReference_bat(stk, pci, 1),
534 0 : *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
535 0 : int lbit = *getArgReference_int(stk, pci, 2),
536 0 : tpe = getArgType(mb, pci, 3), rbit = (int) sizeof(lng) * 8 - lbit;
537 0 : ptr pval = getArgReference(stk, pci, 3);
538 0 : BAT *hb = NULL, *bn = NULL, *bs = NULL;
539 0 : str msg = MAL_SUCCEED;
540 0 : struct canditer ci = { 0 };
541 0 : oid off;
542 0 : ulng *restrict r, val;
543 0 : const ulng *restrict h;
544 0 : BATiter hbi = { 0 };
545 :
546 0 : (void) cntxt;
547 0 : if (!(hb = BATdescriptor(*hid))) {
548 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
549 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
550 0 : goto bailout;
551 : }
552 0 : if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
553 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
554 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
555 0 : goto bailout;
556 : }
557 0 : canditer_init(&ci, hb, bs);
558 0 : if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
559 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
560 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
561 0 : goto bailout;
562 : }
563 0 : off = hb->hseqbase;
564 :
565 0 : switch (ATOMstorage(tpe)) {
566 0 : case TYPE_bte:
567 0 : val = (ulng) MKEYHASH_bte((*(bte *) pval));
568 0 : break;
569 0 : case TYPE_sht:
570 0 : val = (ulng) MKEYHASH_sht((*(sht *) pval));
571 0 : break;
572 0 : case TYPE_int:
573 : case TYPE_flt:
574 0 : val = (ulng) MKEYHASH_int((*(int *) pval));
575 0 : break;
576 0 : case TYPE_lng:
577 : case TYPE_dbl:
578 0 : val = (ulng) MKEYHASH_lng((*(lng *) pval));
579 0 : break;
580 : #ifdef HAVE_HGE
581 0 : case TYPE_hge:
582 0 : val = (ulng) MKEYHASH_hge((*(hge *) pval));
583 0 : break;
584 : #endif
585 0 : default:
586 0 : if (ATOMextern(tpe))
587 0 : val = (ulng) ATOMhash(tpe, *(ptr *) pval);
588 : else
589 0 : val = (ulng) ATOMhash(tpe, pval);
590 : break;
591 : }
592 :
593 0 : r = (ulng *) Tloc(bn, 0);
594 0 : hbi = bat_iterator(hb);
595 0 : h = (const ulng *) hbi.base;
596 0 : if (ci.tpe == cand_dense) {
597 0 : for (BUN i = 0; i < ci.ncand; i++) {
598 0 : oid p = (canditer_next_dense(&ci) - off);
599 0 : r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
600 : }
601 : } else {
602 0 : for (BUN i = 0; i < ci.ncand; i++) {
603 0 : oid p = (canditer_next(&ci) - off);
604 0 : r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
605 : }
606 : }
607 0 : bat_iterator_end(&hbi);
608 :
609 0 : bailout:
610 0 : BBPreclaim(hb);
611 0 : BBPreclaim(bs);
612 0 : if (bn) {
613 0 : assert(msg == MAL_SUCCEED);
614 0 : BATsetcount(bn, ci.ncand);
615 0 : bn->tnonil = false;
616 0 : bn->tnil = false;
617 0 : bn->tkey = BATcount(bn) <= 1;
618 0 : bn->tsorted = BATcount(bn) <= 1;
619 0 : bn->trevsorted = BATcount(bn) <= 1;
620 0 : *res = bn->batCacheid;
621 0 : BBPkeepref(bn);
622 : }
623 0 : return msg;
624 : }
625 :
626 : #define MKEYconstbulk_rotate_xor_hashloop(TPE) \
627 : do { \
628 : const TPE *restrict v = (const TPE *) bi.base; \
629 : if (ci.tpe == cand_dense) { \
630 : for (BUN i = 0; i < ci.ncand; i++) { \
631 : oid p = (canditer_next_dense(&ci) - off); \
632 : r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
633 : } \
634 : } else { \
635 : for (BUN i = 0; i < ci.ncand; i++) { \
636 : oid p = (canditer_next(&ci) - off); \
637 : r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
638 : } \
639 : } \
640 : } while (0)
641 :
642 : static str
643 0 : MKEYconstbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
644 : InstrPtr pci)
645 : {
646 0 : bat *res = getArgReference_bat(stk, pci, 0),
647 0 : *bid = getArgReference_bat(stk, pci, 3),
648 0 : *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
649 0 : int lbit = *getArgReference_int(stk, pci, 2),
650 0 : rbit = (int) sizeof(lng) * 8 - lbit;
651 0 : BAT *b = NULL, *bn = NULL, *bs = NULL;
652 0 : str msg = MAL_SUCCEED;
653 0 : struct canditer ci = { 0 };
654 0 : oid off;
655 0 : ulng *restrict r,
656 0 : h = GDK_ROTATE((ulng) *getArgReference_lng(stk, pci, 1), lbit, rbit);
657 0 : BATiter bi = { 0 };
658 :
659 0 : (void) cntxt;
660 0 : (void) mb;
661 0 : if (!(b = BATdescriptor(*bid))) {
662 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
663 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
664 0 : goto bailout;
665 : }
666 0 : if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
667 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
668 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
669 0 : goto bailout;
670 : }
671 0 : canditer_init(&ci, b, bs);
672 0 : if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
673 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
674 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
675 0 : goto bailout;
676 : }
677 0 : off = b->hseqbase;
678 0 : r = (ulng *) Tloc(bn, 0);
679 :
680 0 : bi = bat_iterator(b);
681 0 : switch (ATOMstorage(b->ttype)) {
682 0 : case TYPE_bte:
683 0 : MKEYconstbulk_rotate_xor_hashloop(bte);
684 : break;
685 0 : case TYPE_sht:
686 0 : MKEYconstbulk_rotate_xor_hashloop(sht);
687 : break;
688 0 : case TYPE_int:
689 : case TYPE_flt:
690 0 : MKEYconstbulk_rotate_xor_hashloop(int);
691 : break;
692 0 : case TYPE_lng:
693 : case TYPE_dbl:
694 0 : MKEYconstbulk_rotate_xor_hashloop(lng);
695 : break;
696 : #ifdef HAVE_HGE
697 0 : case TYPE_hge:
698 0 : MKEYconstbulk_rotate_xor_hashloop(hge);
699 : break;
700 : #endif
701 0 : default:{
702 0 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
703 :
704 0 : if (ci.tpe == cand_dense) {
705 0 : for (BUN i = 0; i < ci.ncand; i++) {
706 0 : oid p = (canditer_next_dense(&ci) - off);
707 0 : r[i] = h ^ (ulng) hash(BUNtail(bi, p));
708 : }
709 : } else {
710 0 : for (BUN i = 0; i < ci.ncand; i++) {
711 0 : oid p = (canditer_next(&ci) - off);
712 0 : r[i] = h ^ (ulng) hash(BUNtail(bi, p));
713 : }
714 : }
715 : break;
716 : }
717 : }
718 0 : bat_iterator_end(&bi);
719 :
720 0 : bailout:
721 0 : BBPreclaim(b);
722 0 : BBPreclaim(bs);
723 0 : if (bn) {
724 0 : assert(msg == MAL_SUCCEED);
725 0 : BATsetcount(bn, ci.ncand);
726 0 : bn->tnonil = false;
727 0 : bn->tnil = false;
728 0 : bn->tkey = BATcount(bn) <= 1;
729 0 : bn->tsorted = BATcount(bn) <= 1;
730 0 : bn->trevsorted = BATcount(bn) <= 1;
731 0 : *res = bn->batCacheid;
732 0 : BBPkeepref(bn);
733 : }
734 0 : return msg;
735 : }
736 :
737 : #include "mel.h"
738 : mel_func mkey_init_funcs[] = {
739 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),argany("v",0))),
740 : pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",0))),
741 : pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value, with a candidate list", args(1,3, batarg("",lng),batargany("b",0),batarg("s",oid))),
742 : pattern("mkey", "rotate_xor_hash", MKEYrotate_xor_hash, false, "post: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",0))),
743 : pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))),
744 : pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0),batarg("s",oid))),
745 : pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0))),
746 : pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0),batarg("s",oid))),
747 : pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0))),
748 : pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with candidate lists", args(1,6, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0),batarg("s1",oid),batarg("s2",oid))),
749 : { .imp=NULL }
750 : };
751 : #include "mal_import.h"
752 : #ifdef _MSC_VER
753 : #undef read
754 : #pragma section(".CRT$XCU",read)
755 : #endif
756 334 : LIB_STARTUP_FUNC(init_mkey_mal)
757 334 : { mal_module("mkey", NULL, mkey_init_funcs); }
|