Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : - * @- The Problem
15 : - * When creating a join, we want to make a unique key of the attributes on both
16 : - * sides and then join these keys. Consider the following BATs.
17 : - *
18 : - * @verbatim
19 : - * orders customer link
20 : - * ==================== ===================== ===========
21 : - * zipcode h_nr zipcode hnr oid cid
22 : - * o1 13 9 c1 11 10 o1 c5
23 : - * o2 11 10 c2 11 11 o2 c1
24 : - * o3 11 11 c3 12 2 o3 c2
25 : - * o4 12 5 c4 12 1 o4 nil
26 : - * o5 11 10 c5 13 9 o5 c1
27 : - * o6 12 2 c6 14 7 o6 c3
28 : - * o7 13 9 o7 c5
29 : - * o8 12 1 o8 c4
30 : - * o9 13 9 o9 c5
31 : - * @end verbatim
32 : - *
33 : - * The current approach is designed to take minimal memory, as our previous
34 : - * solutions to the problem did not scale well. In case of singular keys,
35 : - * the link is executed by a simple join. Before going into the join, we
36 : - * make sure the end result size is not too large, which is done by looking
37 : - * at relation sizes (if the other key is unique) or, if that is not possible,
38 : - * by computing the exact join size.
39 : - *
40 : - * The join algorithm was also improved to do dynamic sampling to determine
41 : - * with high accuracy the join size, so that we can alloc in one go a memory
42 : - * region of sufficient size. This also reduces the ds\_link memory requirements.
43 : - *
44 : - * For compound keys, those that consist of multiple attributes, we now compute
45 : - * a derived column that contains an integer hash value derived from all
46 : - * key columns.
47 : - * This is done by computing a hash value for each individual key column
48 : - * and combining those by bitwise XOR and left-rotation. That is, for each
49 : - * column,we rotate the working hash value by N bits and XOR the hash value
50 : - * of the column over it. The working hash value is initialized with zero,
51 : - * and after all columns are processed, this working value is used as output.
52 : - * Computing the hash value for all columns in the key for one table is done
53 : - * by the command hash(). Hence, we do hash on both sides, and join
54 : - * that together with a simple join:
55 : - *
56 : - * @code{join(hash(keys), hash(keys.reverse);}
57 : - *
58 : - * One complication of this procedure are nil values:
59 : - * @table
60 : - * @itemize
61 : - * @item
62 : - * it may happen that the final hash-value (an int formed by a
63 : - * random bit pattern) accidentally has the value of int(nil).
64 : - * Notice that join never matches nil values.
65 : - * Hence these accidental nils must be replaced by a begin value (currently: 0).
66 : - * @item
67 : - * in case any of the compound key values is nil, our nil semantics
68 : - * require us that those tuples may never match on a join. Consequently,
69 : - * during the hash() processing of all compound key columns for computing
70 : - * the hash value, we also maintain a bit-bat that records which tuples had
71 : - * a nil value. The bit-bat is initialized to false, and the results of the
72 : - * nil-check on each column is OR-ed to it.
73 : - * Afterwards, the hash-value of all tuples that have this nil-bit set to
74 : - * TRUE are forced to int(nil), which will exclude them from matching.
75 : - * @end itemize
76 : - *
77 : - * Joining on hash values produces a @emph{superset} of the join result:
78 : - * it may happen that two different key combinations hash on the same value,
79 : - * which will make them match on the join (false hits). The final part
80 : - * of the ds\_link therefore consists of filtering out the false hits.
81 : - * This is done incrementally by joining back the join result to the original
82 : - * columns, incrementally one by one for each pair of corresponding
83 : - * columns. These values are compared with each other and we AND the
84 : - * result of this comparison together for each pair of columns.
85 : - * The bat containing these bits is initialized to all TRUE and serves as
86 : - * final result after all column pairs have been compared.
87 : - * The initial join result is finally filtered with this bit-bat.
88 : - *
89 : - * Joining back from the initial join-result to the original columns on
90 : - * both sides takes quite a lot of memory. For this reason, the false
91 : - * hit-filtering is done in slices (not all tuples at one time).
92 : - * In this way the memory requirements of this phase are kept low.
93 : - * In fact, the most memory demanding part of the join is the int-join
94 : - * on hash number, which takes N*24 bytes (where N= |L| = |R|).
95 : - * In comparison, the previous CTmultigroup/CTmultiderive approach
96 : - * took N*48 bytes. Additionally, by making it possible to use merge-sort,
97 : - * it avoids severe performance degradation (memory thrashing) as produced
98 : - * by the old ds\_link when the inner join relation would be larger than memory.
99 : - *
100 : - * If ds\_link performance is still an issue, the sort-merge join used here
101 : - * could be replaced by partitioned hash-join with radix-cluster/decluster.
102 : - *
103 : - * @+ Implementation
104 : - */
105 :
106 : /*
107 : * (c) Peter Boncz, Stefan Manegold, Niels Nes
108 : *
109 : * new functionality for the low-resource-consumption. It will
110 : * first one by one create a hash value out of the multiple attributes.
111 : * This hash value is computed by xoring and rotating individual hash
112 : * values together. We create a hash and rotate command to do this.
113 : */
114 :
115 : #include "monetdb_config.h"
116 : #include "mal.h"
117 : #include "mal_interpreter.h"
118 : #include "mal_exception.h"
119 :
120 : #define MKEYHASH_bte(valp) ((lng) valp)
121 : #define MKEYHASH_sht(valp) ((lng) valp)
122 : #define MKEYHASH_int(valp) ((lng) valp)
123 : #define MKEYHASH_lng(valp) (valp)
124 : #ifdef HAVE_HGE
125 : #define MKEYHASH_hge(valp) ((lng) (valp >> 64) ^ (lng) (valp))
126 : #endif
127 : #if SIZEOF_OID == SIZEOF_INT
128 : #define MKEYHASH_oid(valp) MKEYHASH_int(valp)
129 : #else
130 : #define MKEYHASH_oid(valp) MKEYHASH_lng(valp)
131 : #endif
132 :
133 : static inline ulng
134 56938794 : GDK_ROTATE(ulng x, int y, int z)
135 : {
136 56938794 : return (x << y) | (x >> z);
137 : }
138 :
139 : static str
140 1042 : MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
141 : {
142 1042 : lng *res = getArgReference_lng(stk, pci, 0);
143 1042 : ptr val = getArgReference(stk, pci, 1);
144 1042 : int tpe = getArgType(mb, pci, 1);
145 :
146 1042 : (void) cntxt;
147 1042 : switch (ATOMstorage(tpe)) {
148 0 : case TYPE_void:
149 0 : *res = lng_nil; /* It can be called from SQL */
150 0 : break;
151 : case TYPE_bat:
152 : case TYPE_ptr:
153 : // illegal types, avoid falling into the default case.
154 0 : assert(0);
155 : case TYPE_bte:
156 9 : *res = MKEYHASH_bte((*(bte *) val));
157 9 : break;
158 0 : case TYPE_sht:
159 0 : *res = MKEYHASH_sht((*(sht *) val));
160 0 : break;
161 953 : case TYPE_int:
162 : case TYPE_flt:
163 953 : *res = MKEYHASH_int((*(int *) val));
164 953 : break;
165 9 : case TYPE_lng:
166 : case TYPE_dbl:
167 9 : *res = MKEYHASH_lng((*(lng *) val));
168 9 : break;
169 : #ifdef HAVE_HGE
170 0 : case TYPE_hge:
171 0 : *res = MKEYHASH_hge((*(hge *) val));
172 0 : break;
173 : #endif
174 71 : default:
175 71 : if (ATOMextern(tpe))
176 71 : *res = (lng) ATOMhash(tpe, *(ptr *) val);
177 : else
178 0 : *res = (lng) ATOMhash(tpe, val);
179 : break;
180 : }
181 1042 : return MAL_SUCCEED;
182 : }
183 :
184 : #define MKEYbathashloop(TPE) \
185 : do { \
186 : const TPE *restrict v = (const TPE *) bi.base; \
187 : if (ci.tpe == cand_dense) { \
188 : for (BUN i = 0; i < ci.ncand; i++) { \
189 : oid p = (canditer_next_dense(&ci) - off); \
190 : r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
191 : } \
192 : } else { \
193 : for (BUN i = 0; i < ci.ncand; i++) { \
194 : oid p = (canditer_next(&ci) - off); \
195 : r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
196 : } \
197 : } \
198 : } while (0)
199 :
200 : static str
201 9175 : MKEYbathash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
202 : {
203 9175 : bat *res = getArgReference_bat(stk, pci, 0),
204 9175 : *bid = getArgReference_bat(stk, pci, 1),
205 9175 : *sid1 = pci->argc == 3 ? getArgReference_bat(stk, pci, 2) : NULL;
206 9175 : BAT *bn = NULL, *b = NULL, *bs = NULL;
207 9175 : str msg = MAL_SUCCEED;
208 9175 : struct canditer ci = { 0 };
209 9175 : oid off;
210 9175 : ulng *restrict r;
211 9175 : BATiter bi = { 0 };
212 :
213 9175 : (void) cntxt;
214 9175 : (void) mb;
215 9175 : if (!(b = BATdescriptor(*bid))) {
216 0 : msg = createException(MAL, "batmkey.bathash",
217 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
218 0 : goto bailout;
219 : }
220 9175 : if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
221 0 : msg = createException(MAL, "batmkey.bathash",
222 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
223 0 : goto bailout;
224 : }
225 9175 : canditer_init(&ci, b, bs);
226 9174 : if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
227 0 : msg = createException(MAL, "batmkey.bathash",
228 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
229 0 : goto bailout;
230 : }
231 9175 : off = b->hseqbase;
232 9175 : r = (ulng *) Tloc(bn, 0);
233 :
234 9175 : bi = bat_iterator(b);
235 9175 : switch (ATOMstorage(b->ttype)) {
236 0 : case TYPE_void:{
237 0 : oid o = b->tseqbase;
238 0 : if (is_oid_nil(o)) {
239 0 : for (BUN i = 0; i < ci.ncand; i++) {
240 0 : r[i] = (ulng) lng_nil;
241 : }
242 0 : } else if (ci.tpe == cand_dense) {
243 0 : for (BUN i = 0; i < ci.ncand; i++) {
244 0 : oid p = (canditer_next_dense(&ci) - off);
245 0 : r[i] = o + p;
246 : }
247 : } else {
248 0 : for (BUN i = 0; i < ci.ncand; i++) {
249 0 : oid p = (canditer_next(&ci) - off);
250 0 : r[i] = o + p;
251 : }
252 : }
253 : break;
254 : }
255 35 : case TYPE_bte:
256 1874 : MKEYbathashloop(bte);
257 : break;
258 7 : case TYPE_sht:
259 31498 : MKEYbathashloop(sht);
260 : break;
261 7273 : case TYPE_int:
262 : case TYPE_flt:
263 38249800 : MKEYbathashloop(int);
264 : break;
265 188 : case TYPE_lng:
266 : case TYPE_dbl:
267 16298 : MKEYbathashloop(lng);
268 : break;
269 : #ifdef HAVE_HGE
270 18 : case TYPE_hge:
271 76 : MKEYbathashloop(hge);
272 : break;
273 : #endif
274 1654 : default:{
275 1654 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
276 :
277 1654 : if (ci.tpe == cand_dense) {
278 587140 : for (BUN i = 0; i < ci.ncand; i++) {
279 585486 : oid p = (canditer_next_dense(&ci) - off);
280 585486 : r[i] = (ulng) hash(BUNtail(bi, p));
281 : }
282 : } else {
283 0 : for (BUN i = 0; i < ci.ncand; i++) {
284 0 : oid p = (canditer_next(&ci) - off);
285 0 : r[i] = (ulng) hash(BUNtail(bi, p));
286 : }
287 : }
288 : }
289 : }
290 9175 : bat_iterator_end(&bi);
291 :
292 9175 : bailout:
293 9175 : BBPreclaim(b);
294 9174 : BBPreclaim(bs);
295 9174 : if (bn) {
296 9174 : assert(msg == MAL_SUCCEED);
297 9174 : BATsetcount(bn, ci.ncand);
298 9175 : bn->tnonil = false;
299 9175 : bn->tnil = false;
300 9175 : bn->tkey = BATcount(bn) <= 1;
301 9175 : bn->tsorted = BATcount(bn) <= 1;
302 9175 : bn->trevsorted = BATcount(bn) <= 1;
303 9175 : *res = bn->batCacheid;
304 9175 : BBPkeepref(bn);
305 : }
306 9175 : return msg;
307 : }
308 :
309 : static str
310 1803 : MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
311 : {
312 1803 : lng *res = getArgReference_lng(stk, pci, 0);
313 1803 : ulng h = *getArgReference_lng(stk, pci, 1), val;
314 1803 : int lbit = *getArgReference_int(stk, pci, 2),
315 1803 : rbit = (int) sizeof(lng) * 8 - lbit,
316 1803 : tpe = getArgType(mb, pci, 3);
317 1803 : ptr pval = getArgReference(stk, pci, 3);
318 :
319 1803 : (void) cntxt;
320 1803 : switch (ATOMstorage(tpe)) {
321 10 : case TYPE_bte:
322 10 : val = (ulng) MKEYHASH_bte((*(bte *) pval));
323 10 : break;
324 1 : case TYPE_sht:
325 1 : val = (ulng) MKEYHASH_sht((*(sht *) pval));
326 1 : break;
327 1533 : case TYPE_int:
328 : case TYPE_flt:
329 1533 : val = (ulng) MKEYHASH_int((*(int *) pval));
330 1533 : break;
331 45 : case TYPE_lng:
332 : case TYPE_dbl:
333 45 : val = (ulng) MKEYHASH_lng((*(lng *) pval));
334 45 : break;
335 : #ifdef HAVE_HGE
336 0 : case TYPE_hge:
337 0 : val = (ulng) MKEYHASH_hge((*(hge *) pval));
338 0 : break;
339 : #endif
340 214 : default:
341 214 : if (ATOMextern(tpe))
342 214 : val = (ulng) ATOMhash(tpe, *(ptr *) pval);
343 : else
344 0 : val = (ulng) ATOMhash(tpe, pval);
345 : break;
346 : }
347 1803 : *res = (lng) (GDK_ROTATE(h, lbit, rbit) ^ val);
348 1803 : return MAL_SUCCEED;
349 : }
350 :
351 : #define MKEYbulk_rotate_xor_hashloop(TPE) \
352 : do { \
353 : const ulng *restrict h = (const ulng *) hbi.base; \
354 : const TPE *restrict v = (const TPE *) bi.base; \
355 : if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) { \
356 : for (BUN i = 0; i < ci1.ncand; i++) { \
357 : oid p1 = (canditer_next_dense(&ci1) - off1), p2 = (canditer_next_dense(&ci2) - off2); \
358 : r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
359 : } \
360 : } else { \
361 : for (BUN i = 0; i < ci1.ncand; i++) { \
362 : oid p1 = (canditer_next(&ci1) - off1), p2 = (canditer_next(&ci2) - off2); \
363 : r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
364 : } \
365 : } \
366 : } while (0)
367 :
368 : static str
369 10268 : MKEYbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
370 : InstrPtr pci)
371 : {
372 10268 : bat *res = getArgReference_bat(stk, pci, 0),
373 10268 : *hid = getArgReference_bat(stk, pci, 1),
374 10268 : *bid = getArgReference_bat(stk, pci, 3),
375 10268 : *sid1 = pci->argc == 6 ? getArgReference_bat(stk, pci, 4) : NULL,
376 10268 : *sid2 = pci->argc == 6 ? getArgReference_bat(stk, pci, 5) : NULL;
377 10268 : BAT *hb = NULL, *b = NULL, *bn = NULL, *s1 = NULL, *s2 = NULL;
378 10268 : int lbit = *getArgReference_int(stk, pci, 2),
379 10268 : rbit = (int) sizeof(lng) * 8 - lbit;
380 10268 : str msg = MAL_SUCCEED;
381 10268 : struct canditer ci1 = { 0 }, ci2 = { 0 };
382 10268 : oid off1, off2;
383 10268 : ulng *restrict r;
384 10268 : BATiter hbi = { 0 }, bi = { 0 };
385 :
386 10268 : (void) cntxt;
387 10268 : (void) mb;
388 10268 : if (!(hb = BATdescriptor(*hid))) {
389 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
390 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
391 0 : goto bailout;
392 : }
393 10268 : if (!(b = BATdescriptor(*bid))) {
394 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
395 : SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
396 0 : goto bailout;
397 : }
398 10267 : if (b->ttype == TYPE_msk || mask_cand(b)) {
399 0 : BAT *ob = b;
400 0 : b = BATunmask(b);
401 0 : BBPunfix(ob->batCacheid);
402 0 : if (!b) {
403 0 : BBPunfix(hb->batCacheid);
404 0 : throw(MAL, "batmkey.rotate_xor_hash", GDK_EXCEPTION);
405 : }
406 : }
407 10267 : if ((sid1 && !is_bat_nil(*sid1) && !(s1 = BATdescriptor(*sid1)))
408 10267 : || (sid2 && !is_bat_nil(*sid2) && !(s2 = BATdescriptor(*sid2)))) {
409 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
410 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
411 0 : goto bailout;
412 : }
413 :
414 10267 : canditer_init(&ci1, hb, s1);
415 10267 : canditer_init(&ci2, b, s2);
416 10268 : if (ci2.ncand != ci1.ncand || ci1.hseq != ci2.hseq) {
417 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
418 : ILLEGAL_ARGUMENT
419 : " Requires bats of identical size");
420 0 : goto bailout;
421 : }
422 :
423 10268 : if (!(bn = COLnew(ci1.hseq, TYPE_lng, ci1.ncand, TRANSIENT))) {
424 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
425 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
426 0 : goto bailout;
427 : }
428 :
429 10268 : r = (ulng *) Tloc(bn, 0);
430 :
431 10268 : off1 = hb->hseqbase;
432 10268 : off2 = b->hseqbase;
433 :
434 10268 : hbi = bat_iterator(hb);
435 10267 : bi = bat_iterator(b);
436 10267 : if (complex_cand(b)) {
437 0 : const ulng *restrict h = (const ulng *) hbi.base;
438 0 : for (BUN i = 0; i < ci1.ncand; i++) {
439 0 : oid p1 = canditer_next(&ci1) - off1;
440 0 : oid p2 = canditer_next(&ci2) - off2;
441 0 : r[i] = GDK_ROTATE(h[p1], lbit,
442 0 : rbit) ^ MKEYHASH_oid(*(oid *) Tpos(&bi, p2));
443 : }
444 : } else {
445 10267 : switch (ATOMstorage(b->ttype)) {
446 243 : case TYPE_bte:
447 23630 : MKEYbulk_rotate_xor_hashloop(bte);
448 : break;
449 94 : case TYPE_sht:
450 2549 : MKEYbulk_rotate_xor_hashloop(sht);
451 : break;
452 6180 : case TYPE_int:
453 : case TYPE_flt:
454 55120479 : MKEYbulk_rotate_xor_hashloop(int);
455 : break;
456 526 : case TYPE_lng:{ /* hb and b areas may overlap, so for this case the 'restrict' keyword cannot be used */
457 526 : const ulng *h = (const ulng *) hbi.base;
458 526 : const lng *v = (const lng *) bi.base;
459 526 : if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
460 339355 : for (BUN i = 0; i < ci1.ncand; i++) {
461 338829 : oid p1 = (canditer_next_dense(&ci1) - off1),
462 338829 : p2 = (canditer_next_dense(&ci2) - off2);
463 338829 : r[i] = GDK_ROTATE(h[p1], lbit,
464 338829 : rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
465 : }
466 : } else {
467 0 : for (BUN i = 0; i < ci1.ncand; i++) {
468 0 : oid p1 = (canditer_next(&ci1) - off1),
469 0 : p2 = (canditer_next(&ci2) - off2);
470 0 : r[i] = GDK_ROTATE(h[p1], lbit,
471 0 : rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
472 : }
473 : }
474 : break;
475 : }
476 60 : case TYPE_dbl:
477 310 : MKEYbulk_rotate_xor_hashloop(lng);
478 : break;
479 : #ifdef HAVE_HGE
480 4 : case TYPE_hge:
481 14 : MKEYbulk_rotate_xor_hashloop(hge);
482 : break;
483 : #endif
484 3160 : default:{
485 3160 : const ulng *restrict h = (const ulng *) hbi.base;
486 3160 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
487 :
488 3160 : if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
489 1460919 : for (BUN i = 0; i < ci1.ncand; i++) {
490 1457761 : oid p1 = (canditer_next_dense(&ci1) - off1),
491 1457761 : p2 = (canditer_next_dense(&ci2) - off2);
492 2915520 : r[i] = GDK_ROTATE(h[p1], lbit,
493 1457761 : rbit) ^ (ulng) hash(BUNtail(bi, p2));
494 : }
495 : } else {
496 0 : for (BUN i = 0; i < ci1.ncand; i++) {
497 0 : oid p1 = (canditer_next(&ci1) - off1),
498 0 : p2 = (canditer_next(&ci2) - off2);
499 0 : r[i] = GDK_ROTATE(h[p1], lbit,
500 0 : rbit) ^ (ulng) hash(BUNtail(bi, p2));
501 : }
502 : }
503 : break;
504 : }
505 : }
506 : }
507 10265 : bat_iterator_end(&hbi);
508 10268 : bat_iterator_end(&bi);
509 :
510 10268 : bailout:
511 10268 : BBPreclaim(b);
512 10268 : BBPreclaim(hb);
513 10267 : BBPreclaim(s1);
514 10268 : BBPreclaim(s2);
515 10268 : if (bn) {
516 10268 : assert(msg == MAL_SUCCEED);
517 10268 : BATsetcount(bn, ci1.ncand);
518 10268 : bn->tnonil = false;
519 10268 : bn->tnil = false;
520 10268 : bn->tkey = BATcount(bn) <= 1;
521 10268 : bn->tsorted = BATcount(bn) <= 1;
522 10268 : bn->trevsorted = BATcount(bn) <= 1;
523 10268 : *res = bn->batCacheid;
524 10268 : BBPkeepref(bn);
525 : }
526 : return msg;
527 : }
528 :
529 : static str
530 0 : MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
531 : InstrPtr pci)
532 : {
533 0 : bat *res = getArgReference_bat(stk, pci, 0),
534 0 : *hid = getArgReference_bat(stk, pci, 1),
535 0 : *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
536 0 : int lbit = *getArgReference_int(stk, pci, 2),
537 0 : tpe = getArgType(mb, pci, 3), rbit = (int) sizeof(lng) * 8 - lbit;
538 0 : ptr pval = getArgReference(stk, pci, 3);
539 0 : BAT *hb = NULL, *bn = NULL, *bs = NULL;
540 0 : str msg = MAL_SUCCEED;
541 0 : struct canditer ci = { 0 };
542 0 : oid off;
543 0 : ulng *restrict r, val;
544 0 : const ulng *restrict h;
545 0 : BATiter hbi = { 0 };
546 :
547 0 : (void) cntxt;
548 0 : if (!(hb = BATdescriptor(*hid))) {
549 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
550 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
551 0 : goto bailout;
552 : }
553 0 : if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
554 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
555 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
556 0 : goto bailout;
557 : }
558 0 : canditer_init(&ci, hb, bs);
559 0 : if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
560 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
561 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
562 0 : goto bailout;
563 : }
564 0 : off = hb->hseqbase;
565 :
566 0 : switch (ATOMstorage(tpe)) {
567 0 : case TYPE_bte:
568 0 : val = (ulng) MKEYHASH_bte((*(bte *) pval));
569 0 : break;
570 0 : case TYPE_sht:
571 0 : val = (ulng) MKEYHASH_sht((*(sht *) pval));
572 0 : break;
573 0 : case TYPE_int:
574 : case TYPE_flt:
575 0 : val = (ulng) MKEYHASH_int((*(int *) pval));
576 0 : break;
577 0 : case TYPE_lng:
578 : case TYPE_dbl:
579 0 : val = (ulng) MKEYHASH_lng((*(lng *) pval));
580 0 : break;
581 : #ifdef HAVE_HGE
582 0 : case TYPE_hge:
583 0 : val = (ulng) MKEYHASH_hge((*(hge *) pval));
584 0 : break;
585 : #endif
586 0 : default:
587 0 : if (ATOMextern(tpe))
588 0 : val = (ulng) ATOMhash(tpe, *(ptr *) pval);
589 : else
590 0 : val = (ulng) ATOMhash(tpe, pval);
591 : break;
592 : }
593 :
594 0 : r = (ulng *) Tloc(bn, 0);
595 0 : hbi = bat_iterator(hb);
596 0 : h = (const ulng *) hbi.base;
597 0 : if (ci.tpe == cand_dense) {
598 0 : for (BUN i = 0; i < ci.ncand; i++) {
599 0 : oid p = (canditer_next_dense(&ci) - off);
600 0 : r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
601 : }
602 : } else {
603 0 : for (BUN i = 0; i < ci.ncand; i++) {
604 0 : oid p = (canditer_next(&ci) - off);
605 0 : r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
606 : }
607 : }
608 0 : bat_iterator_end(&hbi);
609 :
610 0 : bailout:
611 0 : BBPreclaim(hb);
612 0 : BBPreclaim(bs);
613 0 : if (bn) {
614 0 : assert(msg == MAL_SUCCEED);
615 0 : BATsetcount(bn, ci.ncand);
616 0 : bn->tnonil = false;
617 0 : bn->tnil = false;
618 0 : bn->tkey = BATcount(bn) <= 1;
619 0 : bn->tsorted = BATcount(bn) <= 1;
620 0 : bn->trevsorted = BATcount(bn) <= 1;
621 0 : *res = bn->batCacheid;
622 0 : BBPkeepref(bn);
623 : }
624 0 : return msg;
625 : }
626 :
627 : #define MKEYconstbulk_rotate_xor_hashloop(TPE) \
628 : do { \
629 : const TPE *restrict v = (const TPE *) bi.base; \
630 : if (ci.tpe == cand_dense) { \
631 : for (BUN i = 0; i < ci.ncand; i++) { \
632 : oid p = (canditer_next_dense(&ci) - off); \
633 : r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
634 : } \
635 : } else { \
636 : for (BUN i = 0; i < ci.ncand; i++) { \
637 : oid p = (canditer_next(&ci) - off); \
638 : r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
639 : } \
640 : } \
641 : } while (0)
642 :
643 : static str
644 0 : MKEYconstbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
645 : InstrPtr pci)
646 : {
647 0 : bat *res = getArgReference_bat(stk, pci, 0),
648 0 : *bid = getArgReference_bat(stk, pci, 3),
649 0 : *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
650 0 : int lbit = *getArgReference_int(stk, pci, 2),
651 0 : rbit = (int) sizeof(lng) * 8 - lbit;
652 0 : BAT *b = NULL, *bn = NULL, *bs = NULL;
653 0 : str msg = MAL_SUCCEED;
654 0 : struct canditer ci = { 0 };
655 0 : oid off;
656 0 : ulng *restrict r,
657 0 : h = GDK_ROTATE((ulng) *getArgReference_lng(stk, pci, 1), lbit, rbit);
658 0 : BATiter bi = { 0 };
659 :
660 0 : (void) cntxt;
661 0 : (void) mb;
662 0 : if (!(b = BATdescriptor(*bid))) {
663 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
664 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
665 0 : goto bailout;
666 : }
667 0 : if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
668 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
669 : SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
670 0 : goto bailout;
671 : }
672 0 : canditer_init(&ci, b, bs);
673 0 : if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
674 0 : msg = createException(MAL, "batmkey.rotate_xor_hash",
675 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
676 0 : goto bailout;
677 : }
678 0 : off = b->hseqbase;
679 0 : r = (ulng *) Tloc(bn, 0);
680 :
681 0 : bi = bat_iterator(b);
682 0 : switch (ATOMstorage(b->ttype)) {
683 0 : case TYPE_bte:
684 0 : MKEYconstbulk_rotate_xor_hashloop(bte);
685 : break;
686 0 : case TYPE_sht:
687 0 : MKEYconstbulk_rotate_xor_hashloop(sht);
688 : break;
689 0 : case TYPE_int:
690 : case TYPE_flt:
691 0 : MKEYconstbulk_rotate_xor_hashloop(int);
692 : break;
693 0 : case TYPE_lng:
694 : case TYPE_dbl:
695 0 : MKEYconstbulk_rotate_xor_hashloop(lng);
696 : break;
697 : #ifdef HAVE_HGE
698 0 : case TYPE_hge:
699 0 : MKEYconstbulk_rotate_xor_hashloop(hge);
700 : break;
701 : #endif
702 0 : default:{
703 0 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
704 :
705 0 : if (ci.tpe == cand_dense) {
706 0 : for (BUN i = 0; i < ci.ncand; i++) {
707 0 : oid p = (canditer_next_dense(&ci) - off);
708 0 : r[i] = h ^ (ulng) hash(BUNtail(bi, p));
709 : }
710 : } else {
711 0 : for (BUN i = 0; i < ci.ncand; i++) {
712 0 : oid p = (canditer_next(&ci) - off);
713 0 : r[i] = h ^ (ulng) hash(BUNtail(bi, p));
714 : }
715 : }
716 : break;
717 : }
718 : }
719 0 : bat_iterator_end(&bi);
720 :
721 0 : bailout:
722 0 : BBPreclaim(b);
723 0 : BBPreclaim(bs);
724 0 : if (bn) {
725 0 : assert(msg == MAL_SUCCEED);
726 0 : BATsetcount(bn, ci.ncand);
727 0 : bn->tnonil = false;
728 0 : bn->tnil = false;
729 0 : bn->tkey = BATcount(bn) <= 1;
730 0 : bn->tsorted = BATcount(bn) <= 1;
731 0 : bn->trevsorted = BATcount(bn) <= 1;
732 0 : *res = bn->batCacheid;
733 0 : BBPkeepref(bn);
734 : }
735 0 : return msg;
736 : }
737 :
738 : #include "mel.h"
739 : mel_func mkey_init_funcs[] = {
740 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),argany("v",0))),
741 : pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",0))),
742 : pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value, with a candidate list", args(1,3, batarg("",lng),batargany("b",0),batarg("s",oid))),
743 : pattern("mkey", "rotate_xor_hash", MKEYrotate_xor_hash, false, "post: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",0))),
744 : pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))),
745 : pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0),batarg("s",oid))),
746 : pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0))),
747 : pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0),batarg("s",oid))),
748 : pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0))),
749 : pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with candidate lists", args(1,6, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0),batarg("s1",oid),batarg("s2",oid))),
750 : { .imp=NULL }
751 : };
752 : #include "mal_import.h"
753 : #ifdef _MSC_VER
754 : #undef read
755 : #pragma section(".CRT$XCU",read)
756 : #endif
757 329 : LIB_STARTUP_FUNC(init_mkey_mal)
758 329 : { mal_module("mkey", NULL, mkey_init_funcs); }
|