LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - mkey.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 179 378 47.4 %
Date: 2024-04-26 00:35:57 Functions: 5 7 71.4 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             : - * @- The Problem
      15             : - * When creating a join, we want to make a unique key of the attributes on both
      16             : - * sides and then join these keys. Consider the following BATs.
      17             : - *
      18             : - * @verbatim
      19             : - * orders                  customer                link
      20             : - * ====================    =====================   ===========
      21             : - *         zipcode h_nr            zipcode hnr    oid     cid
      22             : - * o1      13      9       c1      11      10      o1      c5
      23             : - * o2      11      10      c2      11      11      o2      c1
      24             : - * o3      11      11      c3      12      2       o3      c2
      25             : - * o4      12      5       c4      12      1       o4      nil
      26             : - * o5      11      10      c5      13      9       o5      c1
      27             : - * o6      12      2       c6      14      7       o6      c3
      28             : - * o7      13      9                               o7      c5
      29             : - * o8      12      1                               o8      c4
      30             : - * o9      13      9                               o9      c5
      31             : - * @end verbatim
      32             : - *
      33             : - * The current approach is designed to take minimal memory, as our previous
      34             : - * solutions to the problem did not scale well. In case of singular keys,
      35             : - * the link is executed by a simple join. Before going into the join, we
      36             : - * make sure the end result size is not too large, which is done by looking
      37             : - * at relation sizes (if the other key is unique) or, if that is not possible,
      38             : - * by computing the exact join size.
      39             : - *
      40             : - * The join algorithm was also improved to do dynamic sampling to determine
      41             : - * with high accuracy the join size, so that we can alloc in one go a memory
      42             : - * region of sufficient size. This also reduces the ds\_link memory requirements.
      43             : - *
      44             : - * For compound keys, those that consist of multiple attributes, we now compute
      45             : - * a derived column that contains an integer hash value derived from all
      46             : - * key columns.
      47             : - * This is done by computing a hash value for each individual key column
      48             : - * and combining those by bitwise XOR and left-rotation. That is, for each
      49             : - * column,we rotate the working hash value by N bits and XOR the hash value
      50             : - * of the column over it. The working hash value is initialized with zero,
      51             : - * and after all columns are processed, this working value is used as output.
      52             : - * Computing the hash value for all columns in the key for one table is done
      53             : - * by the command hash(). Hence, we do hash on both sides, and join
      54             : - * that together with a simple join:
      55             : - *
      56             : - * @code{join(hash(keys), hash(keys.reverse);}
      57             : - *
      58             : - * One complication of this procedure are nil values:
      59             : - * @table
      60             : - * @itemize
      61             : - * @item
      62             : - * it may happen that the final hash-value (an int formed by a
      63             : - * random bit pattern) accidentally has the value of int(nil).
      64             : - * Notice that join never matches nil values.
      65             : - * Hence these accidental nils must be replaced by a begin value (currently: 0).
      66             : - * @item
      67             : - * in case any of the compound key values is nil, our nil semantics
      68             : - * require us that those tuples may never match on a join. Consequently,
      69             : - * during the hash() processing of all compound key columns for computing
      70             : - * the hash value, we also maintain a bit-bat that records which tuples had
      71             : - * a nil value. The bit-bat is initialized to false, and the results of the
      72             : - * nil-check on each column is OR-ed to it.
      73             : - * Afterwards, the hash-value of all tuples that have this nil-bit set to
      74             : - * TRUE are forced to int(nil), which will exclude them from matching.
      75             : - * @end itemize
      76             : - *
      77             : - * Joining on hash values produces a @emph{superset} of the join result:
      78             : - * it may happen that  two different key combinations hash on the same value,
      79             : - * which will make them match on the join (false hits). The final part
      80             : - * of the ds\_link therefore consists of filtering out the false hits.
      81             : - * This is done incrementally by joining back the join result to the original
      82             : - * columns, incrementally one by one for each pair of corresponding
      83             : - * columns. These values are compared with each other and we AND the
      84             : - * result of this comparison together for each pair of columns.
      85             : - * The bat containing these bits is initialized to all TRUE and serves as
      86             : - * final result after all column pairs have been compared.
      87             : - * The initial join result is finally filtered with this bit-bat.
      88             : - *
      89             : - * Joining back from the initial join-result to the original columns on
      90             : - * both sides takes quite a lot of memory. For this reason, the false
      91             : - * hit-filtering is done in slices (not all tuples at one time).
      92             : - * In this way the memory requirements of this phase are kept low.
      93             : - * In fact, the most memory demanding part of the join is the int-join
      94             : - * on hash number, which takes N*24 bytes (where N= |L| = |R|).
      95             : - * In comparison, the previous CTmultigroup/CTmultiderive approach
      96             : - * took N*48 bytes. Additionally, by making it possible to use merge-sort,
      97             : - * it avoids severe performance degradation (memory thrashing) as produced
      98             : - * by the old ds\_link when the inner join relation would be larger than memory.
      99             : - *
     100             : - * If ds\_link performance is still an issue, the sort-merge join used here
     101             : - * could be replaced by partitioned hash-join with radix-cluster/decluster.
     102             : - *
     103             : - * @+ Implementation
     104             : - */
     105             : 
     106             : /*
     107             :  * (c) Peter Boncz, Stefan Manegold, Niels Nes
     108             :  *
     109             :  * new functionality for the low-resource-consumption. It will
     110             :  * first one by one create a hash value out of the multiple attributes.
     111             :  * This hash value is computed by xoring and rotating individual hash
     112             :  * values together. We create a hash and rotate command to do this.
     113             :  */
     114             : 
     115             : #include "monetdb_config.h"
     116             : #include "mal.h"
     117             : #include "mal_interpreter.h"
     118             : #include "mal_exception.h"
     119             : 
     120             : #define MKEYHASH_bte(valp)      ((lng) valp)
     121             : #define MKEYHASH_sht(valp)      ((lng) valp)
     122             : #define MKEYHASH_int(valp)      ((lng) valp)
     123             : #define MKEYHASH_lng(valp)      (valp)
     124             : #ifdef HAVE_HGE
     125             : #define MKEYHASH_hge(valp)      ((lng) (valp >> 64) ^ (lng) (valp))
     126             : #endif
     127             : #if SIZEOF_OID == SIZEOF_INT
     128             : #define MKEYHASH_oid(valp)      MKEYHASH_int(valp)
     129             : #else
     130             : #define MKEYHASH_oid(valp)      MKEYHASH_lng(valp)
     131             : #endif
     132             : 
     133             : static inline ulng
     134    57457192 : GDK_ROTATE(ulng x, int y, int z)
     135             : {
     136    57457192 :         return (x << y) | (x >> z);
     137             : }
     138             : 
     139             : static str
     140        1038 : MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     141             : {
     142        1038 :         lng *res = getArgReference_lng(stk, pci, 0);
     143        1038 :         ptr val = getArgReference(stk, pci, 1);
     144        1042 :         int tpe = getArgType(mb, pci, 1);
     145             : 
     146        1042 :         (void) cntxt;
     147        1042 :         switch (ATOMstorage(tpe)) {
     148           0 :         case TYPE_void:
     149           0 :                 *res = lng_nil;                 /* It can be called from SQL */
     150           0 :                 break;
     151             :         case TYPE_ptr:
     152             :                 // illegal types, avoid falling into the default case.
     153           0 :                 assert(0);
     154             :         case TYPE_bte:
     155           9 :                 *res = MKEYHASH_bte((*(bte *) val));
     156           9 :                 break;
     157           0 :         case TYPE_sht:
     158           0 :                 *res = MKEYHASH_sht((*(sht *) val));
     159           0 :                 break;
     160         953 :         case TYPE_int:
     161             :         case TYPE_flt:
     162         953 :                 *res = MKEYHASH_int((*(int *) val));
     163         953 :                 break;
     164           9 :         case TYPE_lng:
     165             :         case TYPE_dbl:
     166           9 :                 *res = MKEYHASH_lng((*(lng *) val));
     167           9 :                 break;
     168             : #ifdef HAVE_HGE
     169           0 :         case TYPE_hge:
     170           0 :                 *res = MKEYHASH_hge((*(hge *) val));
     171           0 :                 break;
     172             : #endif
     173          71 :         default:
     174          71 :                 if (ATOMextern(tpe))
     175          71 :                         *res = (lng) ATOMhash(tpe, *(ptr *) val);
     176             :                 else
     177           0 :                         *res = (lng) ATOMhash(tpe, val);
     178             :                 break;
     179             :         }
     180        1042 :         return MAL_SUCCEED;
     181             : }
     182             : 
     183             : #define MKEYbathashloop(TPE) \
     184             :         do { \
     185             :                 const TPE *restrict v = (const TPE *) bi.base; \
     186             :                 if (ci.tpe == cand_dense) { \
     187             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     188             :                                 oid p = (canditer_next_dense(&ci) - off); \
     189             :                                 r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
     190             :                         } \
     191             :                 } else { \
     192             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     193             :                                 oid p = (canditer_next(&ci) - off); \
     194             :                                 r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
     195             :                         } \
     196             :                 } \
     197             :         } while (0)
     198             : 
     199             : static str
     200       14644 : MKEYbathash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     201             : {
     202       14644 :         bat *res = getArgReference_bat(stk, pci, 0),
     203       14644 :                 *bid = getArgReference_bat(stk, pci, 1),
     204       14644 :                 *sid1 = pci->argc == 3 ? getArgReference_bat(stk, pci, 2) : NULL;
     205       14644 :         BAT *bn = NULL, *b = NULL, *bs = NULL;
     206       14644 :         str msg = MAL_SUCCEED;
     207       14644 :         struct canditer ci = { 0 };
     208       14644 :         oid off;
     209       14644 :         ulng *restrict r;
     210       14644 :         BATiter bi = { 0 };
     211             : 
     212       14644 :         (void) cntxt;
     213       14644 :         (void) mb;
     214       14644 :         if (!(b = BATdescriptor(*bid))) {
     215           0 :                 msg = createException(MAL, "batmkey.bathash",
     216             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     217           0 :                 goto bailout;
     218             :         }
     219       14678 :         if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
     220           0 :                 msg = createException(MAL, "batmkey.bathash",
     221             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     222           0 :                 goto bailout;
     223             :         }
     224       14678 :         canditer_init(&ci, b, bs);
     225       14657 :         if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
     226           0 :                 msg = createException(MAL, "batmkey.bathash",
     227             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     228           0 :                 goto bailout;
     229             :         }
     230       14704 :         off = b->hseqbase;
     231       14704 :         r = (ulng *) Tloc(bn, 0);
     232             : 
     233       14704 :         bi = bat_iterator(b);
     234       14599 :         switch (ATOMstorage(b->ttype)) {
     235           0 :         case TYPE_void:{
     236           0 :                 oid o = b->tseqbase;
     237           0 :                 if (is_oid_nil(o)) {
     238           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     239           0 :                                 r[i] = (ulng) lng_nil;
     240             :                         }
     241           0 :                 } else if (ci.tpe == cand_dense) {
     242           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     243           0 :                                 oid p = (canditer_next_dense(&ci) - off);
     244           0 :                                 r[i] = o + p;
     245             :                         }
     246             :                 } else {
     247           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     248           0 :                                 oid p = (canditer_next(&ci) - off);
     249           0 :                                 r[i] = o + p;
     250             :                         }
     251             :                 }
     252             :                 break;
     253             :         }
     254          34 :         case TYPE_bte:
     255        1870 :                 MKEYbathashloop(bte);
     256             :                 break;
     257           7 :         case TYPE_sht:
     258       31498 :                 MKEYbathashloop(sht);
     259             :                 break;
     260       12089 :         case TYPE_int:
     261             :         case TYPE_flt:
     262    36638960 :                 MKEYbathashloop(int);
     263             :                 break;
     264         258 :         case TYPE_lng:
     265             :         case TYPE_dbl:
     266       16334 :                 MKEYbathashloop(lng);
     267             :                 break;
     268             : #ifdef HAVE_HGE
     269          14 :         case TYPE_hge:
     270          32 :                 MKEYbathashloop(hge);
     271             :                 break;
     272             : #endif
     273        2197 :         default:{
     274        2197 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     275             : 
     276        2197 :                 if (ci.tpe == cand_dense) {
     277      576153 :                         for (BUN i = 0; i < ci.ncand; i++) {
     278      573971 :                                 oid p = (canditer_next_dense(&ci) - off);
     279      573971 :                                 r[i] = (ulng) hash(BUNtail(bi, p));
     280             :                         }
     281             :                 } else {
     282          10 :                         for (BUN i = 0; i < ci.ncand; i++) {
     283          10 :                                 oid p = (canditer_next(&ci) - off);
     284           0 :                                 r[i] = (ulng) hash(BUNtail(bi, p));
     285             :                         }
     286             :                 }
     287             :         }
     288             :         }
     289       14584 :         bat_iterator_end(&bi);
     290             : 
     291       14588 :   bailout:
     292       14588 :         BBPreclaim(b);
     293       14669 :         BBPreclaim(bs);
     294       14675 :         if (bn) {
     295       14675 :                 assert(msg == MAL_SUCCEED);
     296       14675 :                 BATsetcount(bn, ci.ncand);
     297       14699 :                 bn->tnonil = false;
     298       14699 :                 bn->tnil = false;
     299       14699 :                 bn->tkey = BATcount(bn) <= 1;
     300       14699 :                 bn->tsorted = BATcount(bn) <= 1;
     301       14699 :                 bn->trevsorted = BATcount(bn) <= 1;
     302       14699 :                 *res = bn->batCacheid;
     303       14699 :                 BBPkeepref(bn);
     304             :         }
     305       14667 :         return msg;
     306             : }
     307             : 
     308             : static str
     309        1810 : MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     310             : {
     311        1810 :         lng *res = getArgReference_lng(stk, pci, 0);
     312        1810 :         ulng h = *getArgReference_lng(stk, pci, 1), val;
     313        1810 :         int lbit = *getArgReference_int(stk, pci, 2),
     314        1810 :                 rbit = (int) sizeof(lng) * 8 - lbit,
     315        1810 :                 tpe = getArgType(mb, pci, 3);
     316        1810 :         ptr pval = getArgReference(stk, pci, 3);
     317             : 
     318        1810 :         (void) cntxt;
     319        1810 :         switch (ATOMstorage(tpe)) {
     320           8 :         case TYPE_bte:
     321           8 :                 val = (ulng) MKEYHASH_bte((*(bte *) pval));
     322           8 :                 break;
     323           1 :         case TYPE_sht:
     324           1 :                 val = (ulng) MKEYHASH_sht((*(sht *) pval));
     325           1 :                 break;
     326        1541 :         case TYPE_int:
     327             :         case TYPE_flt:
     328        1541 :                 val = (ulng) MKEYHASH_int((*(int *) pval));
     329        1541 :                 break;
     330          45 :         case TYPE_lng:
     331             :         case TYPE_dbl:
     332          45 :                 val = (ulng) MKEYHASH_lng((*(lng *) pval));
     333          45 :                 break;
     334             : #ifdef HAVE_HGE
     335           0 :         case TYPE_hge:
     336           0 :                 val = (ulng) MKEYHASH_hge((*(hge *) pval));
     337           0 :                 break;
     338             : #endif
     339         215 :         default:
     340         215 :                 if (ATOMextern(tpe))
     341         215 :                         val = (ulng) ATOMhash(tpe, *(ptr *) pval);
     342             :                 else
     343           0 :                         val = (ulng) ATOMhash(tpe, pval);
     344             :                 break;
     345             :         }
     346        1810 :         *res = (lng) (GDK_ROTATE(h, lbit, rbit) ^ val);
     347        1810 :         return MAL_SUCCEED;
     348             : }
     349             : 
     350             : #define MKEYbulk_rotate_xor_hashloop(TPE) \
     351             :         do { \
     352             :                 const ulng *restrict h = (const ulng *) hbi.base; \
     353             :                 const TPE *restrict v = (const TPE *) bi.base; \
     354             :                 if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) { \
     355             :                         for (BUN i = 0; i < ci1.ncand; i++) { \
     356             :                                 oid p1 = (canditer_next_dense(&ci1) - off1), p2 = (canditer_next_dense(&ci2) - off2); \
     357             :                                 r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
     358             :                         } \
     359             :                 } else { \
     360             :                         for (BUN i = 0; i < ci1.ncand; i++) { \
     361             :                                 oid p1 = (canditer_next(&ci1) - off1), p2 = (canditer_next(&ci2) - off2); \
     362             :                                 r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
     363             :                         } \
     364             :                 } \
     365             :         } while (0)
     366             : 
     367             : static str
     368       15914 : MKEYbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
     369             :                                                  InstrPtr pci)
     370             : {
     371       15914 :         bat *res = getArgReference_bat(stk, pci, 0),
     372       15914 :                 *hid = getArgReference_bat(stk, pci, 1),
     373       15914 :                 *bid = getArgReference_bat(stk, pci, 3),
     374       15914 :                 *sid1 = pci->argc == 6 ? getArgReference_bat(stk, pci, 4) : NULL,
     375       15914 :                 *sid2 = pci->argc == 6 ? getArgReference_bat(stk, pci, 5) : NULL;
     376       15914 :         BAT *hb = NULL, *b = NULL, *bn = NULL, *s1 = NULL, *s2 = NULL;
     377       15914 :         int lbit = *getArgReference_int(stk, pci, 2),
     378       15914 :                 rbit = (int) sizeof(lng) * 8 - lbit;
     379       15914 :         str msg = MAL_SUCCEED;
     380       15914 :         struct canditer ci1 = { 0 }, ci2 = { 0 };
     381       15914 :         oid off1, off2;
     382       15914 :         ulng *restrict r;
     383       15914 :         BATiter hbi = { 0 }, bi = { 0 };
     384             : 
     385       15914 :         (void) cntxt;
     386       15914 :         (void) mb;
     387       15914 :         if (!(hb = BATdescriptor(*hid))) {
     388           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     389             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     390           0 :                 goto bailout;
     391             :         }
     392       15962 :         if (!(b = BATdescriptor(*bid))) {
     393           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     394             :                                                           SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     395           0 :                 goto bailout;
     396             :         }
     397       15957 :         if (b->ttype == TYPE_msk || mask_cand(b)) {
     398           0 :                 BAT *ob = b;
     399           0 :                 b = BATunmask(b);
     400           0 :                 BBPunfix(ob->batCacheid);
     401           0 :                 if (!b) {
     402           0 :                         BBPunfix(hb->batCacheid);
     403           0 :                         throw(MAL, "batmkey.rotate_xor_hash", GDK_EXCEPTION);
     404             :                 }
     405             :         }
     406       15957 :         if ((sid1 && !is_bat_nil(*sid1) && !(s1 = BATdescriptor(*sid1)))
     407       15957 :                 || (sid2 && !is_bat_nil(*sid2) && !(s2 = BATdescriptor(*sid2)))) {
     408           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     409             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     410           0 :                 goto bailout;
     411             :         }
     412             : 
     413       15957 :         canditer_init(&ci1, hb, s1);
     414       15961 :         canditer_init(&ci2, b, s2);
     415       15961 :         if (ci2.ncand != ci1.ncand || ci1.hseq != ci2.hseq) {
     416          45 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     417             :                                                           ILLEGAL_ARGUMENT
     418             :                                                           " Requires bats of identical size");
     419           0 :                 goto bailout;
     420             :         }
     421             : 
     422       15916 :         if (!(bn = COLnew(ci1.hseq, TYPE_lng, ci1.ncand, TRANSIENT))) {
     423           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     424             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     425           0 :                 goto bailout;
     426             :         }
     427             : 
     428       15951 :         r = (ulng *) Tloc(bn, 0);
     429             : 
     430       15951 :         off1 = hb->hseqbase;
     431       15951 :         off2 = b->hseqbase;
     432             : 
     433       15951 :         hbi = bat_iterator(hb);
     434       15959 :         bi = bat_iterator(b);
     435       15856 :         if (complex_cand(b)) {
     436           0 :                 const ulng *restrict h = (const ulng *) hbi.base;
     437           0 :                 for (BUN i = 0; i < ci1.ncand; i++) {
     438           0 :                         oid p1 = canditer_next(&ci1) - off1;
     439           0 :                         oid p2 = canditer_next(&ci2) - off2;
     440           0 :                         r[i] = GDK_ROTATE(h[p1], lbit,
     441           0 :                                                           rbit) ^ MKEYHASH_oid(*(oid *) Tpos(&bi, p2));
     442             :                 }
     443             :         } else {
     444       15856 :                 switch (ATOMstorage(b->ttype)) {
     445         202 :                 case TYPE_bte:
     446       23540 :                         MKEYbulk_rotate_xor_hashloop(bte);
     447             :                         break;
     448          29 :                 case TYPE_sht:
     449        2090 :                         MKEYbulk_rotate_xor_hashloop(sht);
     450             :                         break;
     451        9491 :                 case TYPE_int:
     452             :                 case TYPE_flt:
     453    56022672 :                         MKEYbulk_rotate_xor_hashloop(int);
     454             :                         break;
     455         807 :                 case TYPE_lng:{         /* hb and b areas may overlap, so for this case the 'restrict' keyword cannot be used */
     456         807 :                         const ulng *h = (const ulng *) hbi.base;
     457         807 :                         const lng *v = (const lng *) bi.base;
     458         807 :                         if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
     459      306793 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     460      305986 :                                         oid p1 = (canditer_next_dense(&ci1) - off1),
     461      305986 :                                                 p2 = (canditer_next_dense(&ci2) - off2);
     462      305986 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     463      305986 :                                                                           rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
     464             :                                 }
     465             :                         } else {
     466           0 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     467           0 :                                         oid p1 = (canditer_next(&ci1) - off1),
     468           0 :                                                 p2 = (canditer_next(&ci2) - off2);
     469           0 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     470           0 :                                                                           rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
     471             :                                 }
     472             :                         }
     473             :                         break;
     474             :                 }
     475          54 :                 case TYPE_dbl:
     476         262 :                         MKEYbulk_rotate_xor_hashloop(lng);
     477             :                         break;
     478             : #ifdef HAVE_HGE
     479           4 :                 case TYPE_hge:
     480          14 :                         MKEYbulk_rotate_xor_hashloop(hge);
     481             :                         break;
     482             : #endif
     483        5269 :                 default:{
     484        5269 :                         const ulng *restrict h = (const ulng *) hbi.base;
     485        5269 :                         BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     486             : 
     487        5269 :                         if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
     488     1115851 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     489     1110598 :                                         oid p1 = (canditer_next_dense(&ci1) - off1),
     490     1110598 :                                                 p2 = (canditer_next_dense(&ci2) - off2);
     491     2221180 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     492     1110598 :                                                                           rbit) ^ (ulng) hash(BUNtail(bi, p2));
     493             :                                 }
     494             :                         } else {
     495           0 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     496           0 :                                         oid p1 = (canditer_next(&ci1) - off1),
     497           0 :                                                 p2 = (canditer_next(&ci2) - off2);
     498           0 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     499           0 :                                                                           rbit) ^ (ulng) hash(BUNtail(bi, p2));
     500             :                                 }
     501             :                         }
     502             :                         break;
     503             :                 }
     504             :                 }
     505             :         }
     506       15840 :         bat_iterator_end(&hbi);
     507       15886 :         bat_iterator_end(&bi);
     508             : 
     509       15956 :   bailout:
     510       15956 :         BBPreclaim(b);
     511       15938 :         BBPreclaim(hb);
     512       15912 :         BBPreclaim(s1);
     513       15907 :         BBPreclaim(s2);
     514       15924 :         if (bn) {
     515       15924 :                 assert(msg == MAL_SUCCEED);
     516       15924 :                 BATsetcount(bn, ci1.ncand);
     517       15958 :                 bn->tnonil = false;
     518       15958 :                 bn->tnil = false;
     519       15958 :                 bn->tkey = BATcount(bn) <= 1;
     520       15958 :                 bn->tsorted = BATcount(bn) <= 1;
     521       15958 :                 bn->trevsorted = BATcount(bn) <= 1;
     522       15958 :                 *res = bn->batCacheid;
     523       15958 :                 BBPkeepref(bn);
     524             :         }
     525             :         return msg;
     526             : }
     527             : 
     528             : static str
     529           0 : MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
     530             :                                                           InstrPtr pci)
     531             : {
     532           0 :         bat *res = getArgReference_bat(stk, pci, 0),
     533           0 :                 *hid = getArgReference_bat(stk, pci, 1),
     534           0 :                 *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
     535           0 :         int lbit = *getArgReference_int(stk, pci, 2),
     536           0 :                 tpe = getArgType(mb, pci, 3), rbit = (int) sizeof(lng) * 8 - lbit;
     537           0 :         ptr pval = getArgReference(stk, pci, 3);
     538           0 :         BAT *hb = NULL, *bn = NULL, *bs = NULL;
     539           0 :         str msg = MAL_SUCCEED;
     540           0 :         struct canditer ci = { 0 };
     541           0 :         oid off;
     542           0 :         ulng *restrict r, val;
     543           0 :         const ulng *restrict h;
     544           0 :         BATiter hbi = { 0 };
     545             : 
     546           0 :         (void) cntxt;
     547           0 :         if (!(hb = BATdescriptor(*hid))) {
     548           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     549             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     550           0 :                 goto bailout;
     551             :         }
     552           0 :         if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
     553           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     554             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     555           0 :                 goto bailout;
     556             :         }
     557           0 :         canditer_init(&ci, hb, bs);
     558           0 :         if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
     559           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     560             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     561           0 :                 goto bailout;
     562             :         }
     563           0 :         off = hb->hseqbase;
     564             : 
     565           0 :         switch (ATOMstorage(tpe)) {
     566           0 :         case TYPE_bte:
     567           0 :                 val = (ulng) MKEYHASH_bte((*(bte *) pval));
     568           0 :                 break;
     569           0 :         case TYPE_sht:
     570           0 :                 val = (ulng) MKEYHASH_sht((*(sht *) pval));
     571           0 :                 break;
     572           0 :         case TYPE_int:
     573             :         case TYPE_flt:
     574           0 :                 val = (ulng) MKEYHASH_int((*(int *) pval));
     575           0 :                 break;
     576           0 :         case TYPE_lng:
     577             :         case TYPE_dbl:
     578           0 :                 val = (ulng) MKEYHASH_lng((*(lng *) pval));
     579           0 :                 break;
     580             : #ifdef HAVE_HGE
     581           0 :         case TYPE_hge:
     582           0 :                 val = (ulng) MKEYHASH_hge((*(hge *) pval));
     583           0 :                 break;
     584             : #endif
     585           0 :         default:
     586           0 :                 if (ATOMextern(tpe))
     587           0 :                         val = (ulng) ATOMhash(tpe, *(ptr *) pval);
     588             :                 else
     589           0 :                         val = (ulng) ATOMhash(tpe, pval);
     590             :                 break;
     591             :         }
     592             : 
     593           0 :         r = (ulng *) Tloc(bn, 0);
     594           0 :         hbi = bat_iterator(hb);
     595           0 :         h = (const ulng *) hbi.base;
     596           0 :         if (ci.tpe == cand_dense) {
     597           0 :                 for (BUN i = 0; i < ci.ncand; i++) {
     598           0 :                         oid p = (canditer_next_dense(&ci) - off);
     599           0 :                         r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
     600             :                 }
     601             :         } else {
     602           0 :                 for (BUN i = 0; i < ci.ncand; i++) {
     603           0 :                         oid p = (canditer_next(&ci) - off);
     604           0 :                         r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
     605             :                 }
     606             :         }
     607           0 :         bat_iterator_end(&hbi);
     608             : 
     609           0 :   bailout:
     610           0 :         BBPreclaim(hb);
     611           0 :         BBPreclaim(bs);
     612           0 :         if (bn) {
     613           0 :                 assert(msg == MAL_SUCCEED);
     614           0 :                 BATsetcount(bn, ci.ncand);
     615           0 :                 bn->tnonil = false;
     616           0 :                 bn->tnil = false;
     617           0 :                 bn->tkey = BATcount(bn) <= 1;
     618           0 :                 bn->tsorted = BATcount(bn) <= 1;
     619           0 :                 bn->trevsorted = BATcount(bn) <= 1;
     620           0 :                 *res = bn->batCacheid;
     621           0 :                 BBPkeepref(bn);
     622             :         }
     623           0 :         return msg;
     624             : }
     625             : 
     626             : #define MKEYconstbulk_rotate_xor_hashloop(TPE) \
     627             :         do { \
     628             :                 const TPE *restrict v = (const TPE *) bi.base; \
     629             :                 if (ci.tpe == cand_dense) { \
     630             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     631             :                                 oid p = (canditer_next_dense(&ci) - off); \
     632             :                                 r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
     633             :                         } \
     634             :                 } else { \
     635             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     636             :                                 oid p = (canditer_next(&ci) - off); \
     637             :                                 r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
     638             :                         } \
     639             :                 } \
     640             :         } while (0)
     641             : 
     642             : static str
     643           0 : MKEYconstbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
     644             :                                                           InstrPtr pci)
     645             : {
     646           0 :         bat *res = getArgReference_bat(stk, pci, 0),
     647           0 :                 *bid = getArgReference_bat(stk, pci, 3),
     648           0 :                 *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
     649           0 :         int lbit = *getArgReference_int(stk, pci, 2),
     650           0 :                 rbit = (int) sizeof(lng) * 8 - lbit;
     651           0 :         BAT *b = NULL, *bn = NULL, *bs = NULL;
     652           0 :         str msg = MAL_SUCCEED;
     653           0 :         struct canditer ci = { 0 };
     654           0 :         oid off;
     655           0 :         ulng *restrict r,
     656           0 :                 h = GDK_ROTATE((ulng) *getArgReference_lng(stk, pci, 1), lbit, rbit);
     657           0 :         BATiter bi = { 0 };
     658             : 
     659           0 :         (void) cntxt;
     660           0 :         (void) mb;
     661           0 :         if (!(b = BATdescriptor(*bid))) {
     662           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     663             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     664           0 :                 goto bailout;
     665             :         }
     666           0 :         if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
     667           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     668             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     669           0 :                 goto bailout;
     670             :         }
     671           0 :         canditer_init(&ci, b, bs);
     672           0 :         if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
     673           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     674             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     675           0 :                 goto bailout;
     676             :         }
     677           0 :         off = b->hseqbase;
     678           0 :         r = (ulng *) Tloc(bn, 0);
     679             : 
     680           0 :         bi = bat_iterator(b);
     681           0 :         switch (ATOMstorage(b->ttype)) {
     682           0 :         case TYPE_bte:
     683           0 :                 MKEYconstbulk_rotate_xor_hashloop(bte);
     684             :                 break;
     685           0 :         case TYPE_sht:
     686           0 :                 MKEYconstbulk_rotate_xor_hashloop(sht);
     687             :                 break;
     688           0 :         case TYPE_int:
     689             :         case TYPE_flt:
     690           0 :                 MKEYconstbulk_rotate_xor_hashloop(int);
     691             :                 break;
     692           0 :         case TYPE_lng:
     693             :         case TYPE_dbl:
     694           0 :                 MKEYconstbulk_rotate_xor_hashloop(lng);
     695             :                 break;
     696             : #ifdef HAVE_HGE
     697           0 :         case TYPE_hge:
     698           0 :                 MKEYconstbulk_rotate_xor_hashloop(hge);
     699             :                 break;
     700             : #endif
     701           0 :         default:{
     702           0 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     703             : 
     704           0 :                 if (ci.tpe == cand_dense) {
     705           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     706           0 :                                 oid p = (canditer_next_dense(&ci) - off);
     707           0 :                                 r[i] = h ^ (ulng) hash(BUNtail(bi, p));
     708             :                         }
     709             :                 } else {
     710           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     711           0 :                                 oid p = (canditer_next(&ci) - off);
     712           0 :                                 r[i] = h ^ (ulng) hash(BUNtail(bi, p));
     713             :                         }
     714             :                 }
     715             :                 break;
     716             :         }
     717             :         }
     718           0 :         bat_iterator_end(&bi);
     719             : 
     720           0 :   bailout:
     721           0 :         BBPreclaim(b);
     722           0 :         BBPreclaim(bs);
     723           0 :         if (bn) {
     724           0 :                 assert(msg == MAL_SUCCEED);
     725           0 :                 BATsetcount(bn, ci.ncand);
     726           0 :                 bn->tnonil = false;
     727           0 :                 bn->tnil = false;
     728           0 :                 bn->tkey = BATcount(bn) <= 1;
     729           0 :                 bn->tsorted = BATcount(bn) <= 1;
     730           0 :                 bn->trevsorted = BATcount(bn) <= 1;
     731           0 :                 *res = bn->batCacheid;
     732           0 :                 BBPkeepref(bn);
     733             :         }
     734           0 :         return msg;
     735             : }
     736             : 
     737             : #include "mel.h"
     738             : mel_func mkey_init_funcs[] = {
     739             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),argany("v",0))),
     740             :  pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",0))),
     741             :  pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value, with a candidate list", args(1,3, batarg("",lng),batargany("b",0),batarg("s",oid))),
     742             :  pattern("mkey", "rotate_xor_hash", MKEYrotate_xor_hash, false, "post: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",0))),
     743             :  pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))),
     744             :  pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0),batarg("s",oid))),
     745             :  pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0))),
     746             :  pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0),batarg("s",oid))),
     747             :  pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0))),
     748             :  pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with candidate lists", args(1,6, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0),batarg("s1",oid),batarg("s2",oid))),
     749             :  { .imp=NULL }
     750             : };
     751             : #include "mal_import.h"
     752             : #ifdef _MSC_VER
     753             : #undef read
     754             : #pragma section(".CRT$XCU",read)
     755             : #endif
     756         334 : LIB_STARTUP_FUNC(init_mkey_mal)
     757         334 : { mal_module("mkey", NULL, mkey_init_funcs); }

Generated by: LCOV version 1.14