LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - mkey.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 176 378 46.6 %
Date: 2024-04-25 20:03:45 Functions: 5 7 71.4 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             : - * @- The Problem
      15             : - * When creating a join, we want to make a unique key of the attributes on both
      16             : - * sides and then join these keys. Consider the following BATs.
      17             : - *
      18             : - * @verbatim
      19             : - * orders                  customer                link
      20             : - * ====================    =====================   ===========
      21             : - *         zipcode h_nr            zipcode hnr    oid     cid
      22             : - * o1      13      9       c1      11      10      o1      c5
      23             : - * o2      11      10      c2      11      11      o2      c1
      24             : - * o3      11      11      c3      12      2       o3      c2
      25             : - * o4      12      5       c4      12      1       o4      nil
      26             : - * o5      11      10      c5      13      9       o5      c1
      27             : - * o6      12      2       c6      14      7       o6      c3
      28             : - * o7      13      9                               o7      c5
      29             : - * o8      12      1                               o8      c4
      30             : - * o9      13      9                               o9      c5
      31             : - * @end verbatim
      32             : - *
      33             : - * The current approach is designed to take minimal memory, as our previous
      34             : - * solutions to the problem did not scale well. In case of singular keys,
      35             : - * the link is executed by a simple join. Before going into the join, we
      36             : - * make sure the end result size is not too large, which is done by looking
      37             : - * at relation sizes (if the other key is unique) or, if that is not possible,
      38             : - * by computing the exact join size.
      39             : - *
      40             : - * The join algorithm was also improved to do dynamic sampling to determine
      41             : - * with high accuracy the join size, so that we can alloc in one go a memory
      42             : - * region of sufficient size. This also reduces the ds\_link memory requirements.
      43             : - *
      44             : - * For compound keys, those that consist of multiple attributes, we now compute
      45             : - * a derived column that contains an integer hash value derived from all
      46             : - * key columns.
      47             : - * This is done by computing a hash value for each individual key column
      48             : - * and combining those by bitwise XOR and left-rotation. That is, for each
      49             : - * column,we rotate the working hash value by N bits and XOR the hash value
      50             : - * of the column over it. The working hash value is initialized with zero,
      51             : - * and after all columns are processed, this working value is used as output.
      52             : - * Computing the hash value for all columns in the key for one table is done
      53             : - * by the command hash(). Hence, we do hash on both sides, and join
      54             : - * that together with a simple join:
      55             : - *
      56             : - * @code{join(hash(keys), hash(keys.reverse);}
      57             : - *
      58             : - * One complication of this procedure are nil values:
      59             : - * @table
      60             : - * @itemize
      61             : - * @item
      62             : - * it may happen that the final hash-value (an int formed by a
      63             : - * random bit pattern) accidentally has the value of int(nil).
      64             : - * Notice that join never matches nil values.
      65             : - * Hence these accidental nils must be replaced by a begin value (currently: 0).
      66             : - * @item
      67             : - * in case any of the compound key values is nil, our nil semantics
      68             : - * require us that those tuples may never match on a join. Consequently,
      69             : - * during the hash() processing of all compound key columns for computing
      70             : - * the hash value, we also maintain a bit-bat that records which tuples had
      71             : - * a nil value. The bit-bat is initialized to false, and the results of the
      72             : - * nil-check on each column is OR-ed to it.
      73             : - * Afterwards, the hash-value of all tuples that have this nil-bit set to
      74             : - * TRUE are forced to int(nil), which will exclude them from matching.
      75             : - * @end itemize
      76             : - *
      77             : - * Joining on hash values produces a @emph{superset} of the join result:
      78             : - * it may happen that  two different key combinations hash on the same value,
      79             : - * which will make them match on the join (false hits). The final part
      80             : - * of the ds\_link therefore consists of filtering out the false hits.
      81             : - * This is done incrementally by joining back the join result to the original
      82             : - * columns, incrementally one by one for each pair of corresponding
      83             : - * columns. These values are compared with each other and we AND the
      84             : - * result of this comparison together for each pair of columns.
      85             : - * The bat containing these bits is initialized to all TRUE and serves as
      86             : - * final result after all column pairs have been compared.
      87             : - * The initial join result is finally filtered with this bit-bat.
      88             : - *
      89             : - * Joining back from the initial join-result to the original columns on
      90             : - * both sides takes quite a lot of memory. For this reason, the false
      91             : - * hit-filtering is done in slices (not all tuples at one time).
      92             : - * In this way the memory requirements of this phase are kept low.
      93             : - * In fact, the most memory demanding part of the join is the int-join
      94             : - * on hash number, which takes N*24 bytes (where N= |L| = |R|).
      95             : - * In comparison, the previous CTmultigroup/CTmultiderive approach
      96             : - * took N*48 bytes. Additionally, by making it possible to use merge-sort,
      97             : - * it avoids severe performance degradation (memory thrashing) as produced
      98             : - * by the old ds\_link when the inner join relation would be larger than memory.
      99             : - *
     100             : - * If ds\_link performance is still an issue, the sort-merge join used here
     101             : - * could be replaced by partitioned hash-join with radix-cluster/decluster.
     102             : - *
     103             : - * @+ Implementation
     104             : - */
     105             : 
     106             : /*
     107             :  * (c) Peter Boncz, Stefan Manegold, Niels Nes
     108             :  *
     109             :  * new functionality for the low-resource-consumption. It will
     110             :  * first one by one create a hash value out of the multiple attributes.
     111             :  * This hash value is computed by xoring and rotating individual hash
     112             :  * values together. We create a hash and rotate command to do this.
     113             :  */
     114             : 
     115             : #include "monetdb_config.h"
     116             : #include "mal.h"
     117             : #include "mal_interpreter.h"
     118             : #include "mal_exception.h"
     119             : 
     120             : #define MKEYHASH_bte(valp)      ((lng) valp)
     121             : #define MKEYHASH_sht(valp)      ((lng) valp)
     122             : #define MKEYHASH_int(valp)      ((lng) valp)
     123             : #define MKEYHASH_lng(valp)      (valp)
     124             : #ifdef HAVE_HGE
     125             : #define MKEYHASH_hge(valp)      ((lng) (valp >> 64) ^ (lng) (valp))
     126             : #endif
     127             : #if SIZEOF_OID == SIZEOF_INT
     128             : #define MKEYHASH_oid(valp)      MKEYHASH_int(valp)
     129             : #else
     130             : #define MKEYHASH_oid(valp)      MKEYHASH_lng(valp)
     131             : #endif
     132             : 
     133             : static inline ulng
     134    56938794 : GDK_ROTATE(ulng x, int y, int z)
     135             : {
     136    56938794 :         return (x << y) | (x >> z);
     137             : }
     138             : 
     139             : static str
     140        1042 : MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     141             : {
     142        1042 :         lng *res = getArgReference_lng(stk, pci, 0);
     143        1042 :         ptr val = getArgReference(stk, pci, 1);
     144        1042 :         int tpe = getArgType(mb, pci, 1);
     145             : 
     146        1042 :         (void) cntxt;
     147        1042 :         switch (ATOMstorage(tpe)) {
     148           0 :         case TYPE_void:
     149           0 :                 *res = lng_nil;                 /* It can be called from SQL */
     150           0 :                 break;
     151             :         case TYPE_bat:
     152             :         case TYPE_ptr:
     153             :                 // illegal types, avoid falling into the default case.
     154           0 :                 assert(0);
     155             :         case TYPE_bte:
     156           9 :                 *res = MKEYHASH_bte((*(bte *) val));
     157           9 :                 break;
     158           0 :         case TYPE_sht:
     159           0 :                 *res = MKEYHASH_sht((*(sht *) val));
     160           0 :                 break;
     161         953 :         case TYPE_int:
     162             :         case TYPE_flt:
     163         953 :                 *res = MKEYHASH_int((*(int *) val));
     164         953 :                 break;
     165           9 :         case TYPE_lng:
     166             :         case TYPE_dbl:
     167           9 :                 *res = MKEYHASH_lng((*(lng *) val));
     168           9 :                 break;
     169             : #ifdef HAVE_HGE
     170           0 :         case TYPE_hge:
     171           0 :                 *res = MKEYHASH_hge((*(hge *) val));
     172           0 :                 break;
     173             : #endif
     174          71 :         default:
     175          71 :                 if (ATOMextern(tpe))
     176          71 :                         *res = (lng) ATOMhash(tpe, *(ptr *) val);
     177             :                 else
     178           0 :                         *res = (lng) ATOMhash(tpe, val);
     179             :                 break;
     180             :         }
     181        1042 :         return MAL_SUCCEED;
     182             : }
     183             : 
     184             : #define MKEYbathashloop(TPE) \
     185             :         do { \
     186             :                 const TPE *restrict v = (const TPE *) bi.base; \
     187             :                 if (ci.tpe == cand_dense) { \
     188             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     189             :                                 oid p = (canditer_next_dense(&ci) - off); \
     190             :                                 r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
     191             :                         } \
     192             :                 } else { \
     193             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     194             :                                 oid p = (canditer_next(&ci) - off); \
     195             :                                 r[i] = (ulng) MKEYHASH_##TPE(v[p]); \
     196             :                         } \
     197             :                 } \
     198             :         } while (0)
     199             : 
     200             : static str
     201        9175 : MKEYbathash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     202             : {
     203        9175 :         bat *res = getArgReference_bat(stk, pci, 0),
     204        9175 :                 *bid = getArgReference_bat(stk, pci, 1),
     205        9175 :                 *sid1 = pci->argc == 3 ? getArgReference_bat(stk, pci, 2) : NULL;
     206        9175 :         BAT *bn = NULL, *b = NULL, *bs = NULL;
     207        9175 :         str msg = MAL_SUCCEED;
     208        9175 :         struct canditer ci = { 0 };
     209        9175 :         oid off;
     210        9175 :         ulng *restrict r;
     211        9175 :         BATiter bi = { 0 };
     212             : 
     213        9175 :         (void) cntxt;
     214        9175 :         (void) mb;
     215        9175 :         if (!(b = BATdescriptor(*bid))) {
     216           0 :                 msg = createException(MAL, "batmkey.bathash",
     217             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     218           0 :                 goto bailout;
     219             :         }
     220        9175 :         if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
     221           0 :                 msg = createException(MAL, "batmkey.bathash",
     222             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     223           0 :                 goto bailout;
     224             :         }
     225        9175 :         canditer_init(&ci, b, bs);
     226        9174 :         if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
     227           0 :                 msg = createException(MAL, "batmkey.bathash",
     228             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     229           0 :                 goto bailout;
     230             :         }
     231        9175 :         off = b->hseqbase;
     232        9175 :         r = (ulng *) Tloc(bn, 0);
     233             : 
     234        9175 :         bi = bat_iterator(b);
     235        9175 :         switch (ATOMstorage(b->ttype)) {
     236           0 :         case TYPE_void:{
     237           0 :                 oid o = b->tseqbase;
     238           0 :                 if (is_oid_nil(o)) {
     239           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     240           0 :                                 r[i] = (ulng) lng_nil;
     241             :                         }
     242           0 :                 } else if (ci.tpe == cand_dense) {
     243           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     244           0 :                                 oid p = (canditer_next_dense(&ci) - off);
     245           0 :                                 r[i] = o + p;
     246             :                         }
     247             :                 } else {
     248           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     249           0 :                                 oid p = (canditer_next(&ci) - off);
     250           0 :                                 r[i] = o + p;
     251             :                         }
     252             :                 }
     253             :                 break;
     254             :         }
     255          35 :         case TYPE_bte:
     256        1874 :                 MKEYbathashloop(bte);
     257             :                 break;
     258           7 :         case TYPE_sht:
     259       31498 :                 MKEYbathashloop(sht);
     260             :                 break;
     261        7273 :         case TYPE_int:
     262             :         case TYPE_flt:
     263    38249800 :                 MKEYbathashloop(int);
     264             :                 break;
     265         188 :         case TYPE_lng:
     266             :         case TYPE_dbl:
     267       16298 :                 MKEYbathashloop(lng);
     268             :                 break;
     269             : #ifdef HAVE_HGE
     270          18 :         case TYPE_hge:
     271          76 :                 MKEYbathashloop(hge);
     272             :                 break;
     273             : #endif
     274        1654 :         default:{
     275        1654 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     276             : 
     277        1654 :                 if (ci.tpe == cand_dense) {
     278      587140 :                         for (BUN i = 0; i < ci.ncand; i++) {
     279      585486 :                                 oid p = (canditer_next_dense(&ci) - off);
     280      585486 :                                 r[i] = (ulng) hash(BUNtail(bi, p));
     281             :                         }
     282             :                 } else {
     283           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     284           0 :                                 oid p = (canditer_next(&ci) - off);
     285           0 :                                 r[i] = (ulng) hash(BUNtail(bi, p));
     286             :                         }
     287             :                 }
     288             :         }
     289             :         }
     290        9175 :         bat_iterator_end(&bi);
     291             : 
     292        9175 :   bailout:
     293        9175 :         BBPreclaim(b);
     294        9174 :         BBPreclaim(bs);
     295        9174 :         if (bn) {
     296        9174 :                 assert(msg == MAL_SUCCEED);
     297        9174 :                 BATsetcount(bn, ci.ncand);
     298        9175 :                 bn->tnonil = false;
     299        9175 :                 bn->tnil = false;
     300        9175 :                 bn->tkey = BATcount(bn) <= 1;
     301        9175 :                 bn->tsorted = BATcount(bn) <= 1;
     302        9175 :                 bn->trevsorted = BATcount(bn) <= 1;
     303        9175 :                 *res = bn->batCacheid;
     304        9175 :                 BBPkeepref(bn);
     305             :         }
     306        9175 :         return msg;
     307             : }
     308             : 
     309             : static str
     310        1803 : MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     311             : {
     312        1803 :         lng *res = getArgReference_lng(stk, pci, 0);
     313        1803 :         ulng h = *getArgReference_lng(stk, pci, 1), val;
     314        1803 :         int lbit = *getArgReference_int(stk, pci, 2),
     315        1803 :                 rbit = (int) sizeof(lng) * 8 - lbit,
     316        1803 :                 tpe = getArgType(mb, pci, 3);
     317        1803 :         ptr pval = getArgReference(stk, pci, 3);
     318             : 
     319        1803 :         (void) cntxt;
     320        1803 :         switch (ATOMstorage(tpe)) {
     321          10 :         case TYPE_bte:
     322          10 :                 val = (ulng) MKEYHASH_bte((*(bte *) pval));
     323          10 :                 break;
     324           1 :         case TYPE_sht:
     325           1 :                 val = (ulng) MKEYHASH_sht((*(sht *) pval));
     326           1 :                 break;
     327        1533 :         case TYPE_int:
     328             :         case TYPE_flt:
     329        1533 :                 val = (ulng) MKEYHASH_int((*(int *) pval));
     330        1533 :                 break;
     331          45 :         case TYPE_lng:
     332             :         case TYPE_dbl:
     333          45 :                 val = (ulng) MKEYHASH_lng((*(lng *) pval));
     334          45 :                 break;
     335             : #ifdef HAVE_HGE
     336           0 :         case TYPE_hge:
     337           0 :                 val = (ulng) MKEYHASH_hge((*(hge *) pval));
     338           0 :                 break;
     339             : #endif
     340         214 :         default:
     341         214 :                 if (ATOMextern(tpe))
     342         214 :                         val = (ulng) ATOMhash(tpe, *(ptr *) pval);
     343             :                 else
     344           0 :                         val = (ulng) ATOMhash(tpe, pval);
     345             :                 break;
     346             :         }
     347        1803 :         *res = (lng) (GDK_ROTATE(h, lbit, rbit) ^ val);
     348        1803 :         return MAL_SUCCEED;
     349             : }
     350             : 
     351             : #define MKEYbulk_rotate_xor_hashloop(TPE) \
     352             :         do { \
     353             :                 const ulng *restrict h = (const ulng *) hbi.base; \
     354             :                 const TPE *restrict v = (const TPE *) bi.base; \
     355             :                 if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) { \
     356             :                         for (BUN i = 0; i < ci1.ncand; i++) { \
     357             :                                 oid p1 = (canditer_next_dense(&ci1) - off1), p2 = (canditer_next_dense(&ci2) - off2); \
     358             :                                 r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
     359             :                         } \
     360             :                 } else { \
     361             :                         for (BUN i = 0; i < ci1.ncand; i++) { \
     362             :                                 oid p1 = (canditer_next(&ci1) - off1), p2 = (canditer_next(&ci2) - off2); \
     363             :                                 r[i] = GDK_ROTATE(h[p1], lbit, rbit) ^ (ulng) MKEYHASH_##TPE(v[p2]); \
     364             :                         } \
     365             :                 } \
     366             :         } while (0)
     367             : 
     368             : static str
     369       10268 : MKEYbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
     370             :                                                  InstrPtr pci)
     371             : {
     372       10268 :         bat *res = getArgReference_bat(stk, pci, 0),
     373       10268 :                 *hid = getArgReference_bat(stk, pci, 1),
     374       10268 :                 *bid = getArgReference_bat(stk, pci, 3),
     375       10268 :                 *sid1 = pci->argc == 6 ? getArgReference_bat(stk, pci, 4) : NULL,
     376       10268 :                 *sid2 = pci->argc == 6 ? getArgReference_bat(stk, pci, 5) : NULL;
     377       10268 :         BAT *hb = NULL, *b = NULL, *bn = NULL, *s1 = NULL, *s2 = NULL;
     378       10268 :         int lbit = *getArgReference_int(stk, pci, 2),
     379       10268 :                 rbit = (int) sizeof(lng) * 8 - lbit;
     380       10268 :         str msg = MAL_SUCCEED;
     381       10268 :         struct canditer ci1 = { 0 }, ci2 = { 0 };
     382       10268 :         oid off1, off2;
     383       10268 :         ulng *restrict r;
     384       10268 :         BATiter hbi = { 0 }, bi = { 0 };
     385             : 
     386       10268 :         (void) cntxt;
     387       10268 :         (void) mb;
     388       10268 :         if (!(hb = BATdescriptor(*hid))) {
     389           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     390             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     391           0 :                 goto bailout;
     392             :         }
     393       10268 :         if (!(b = BATdescriptor(*bid))) {
     394           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     395             :                                                           SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     396           0 :                 goto bailout;
     397             :         }
     398       10267 :         if (b->ttype == TYPE_msk || mask_cand(b)) {
     399           0 :                 BAT *ob = b;
     400           0 :                 b = BATunmask(b);
     401           0 :                 BBPunfix(ob->batCacheid);
     402           0 :                 if (!b) {
     403           0 :                         BBPunfix(hb->batCacheid);
     404           0 :                         throw(MAL, "batmkey.rotate_xor_hash", GDK_EXCEPTION);
     405             :                 }
     406             :         }
     407       10267 :         if ((sid1 && !is_bat_nil(*sid1) && !(s1 = BATdescriptor(*sid1)))
     408       10267 :                 || (sid2 && !is_bat_nil(*sid2) && !(s2 = BATdescriptor(*sid2)))) {
     409           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     410             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     411           0 :                 goto bailout;
     412             :         }
     413             : 
     414       10267 :         canditer_init(&ci1, hb, s1);
     415       10267 :         canditer_init(&ci2, b, s2);
     416       10268 :         if (ci2.ncand != ci1.ncand || ci1.hseq != ci2.hseq) {
     417           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     418             :                                                           ILLEGAL_ARGUMENT
     419             :                                                           " Requires bats of identical size");
     420           0 :                 goto bailout;
     421             :         }
     422             : 
     423       10268 :         if (!(bn = COLnew(ci1.hseq, TYPE_lng, ci1.ncand, TRANSIENT))) {
     424           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     425             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     426           0 :                 goto bailout;
     427             :         }
     428             : 
     429       10268 :         r = (ulng *) Tloc(bn, 0);
     430             : 
     431       10268 :         off1 = hb->hseqbase;
     432       10268 :         off2 = b->hseqbase;
     433             : 
     434       10268 :         hbi = bat_iterator(hb);
     435       10267 :         bi = bat_iterator(b);
     436       10267 :         if (complex_cand(b)) {
     437           0 :                 const ulng *restrict h = (const ulng *) hbi.base;
     438           0 :                 for (BUN i = 0; i < ci1.ncand; i++) {
     439           0 :                         oid p1 = canditer_next(&ci1) - off1;
     440           0 :                         oid p2 = canditer_next(&ci2) - off2;
     441           0 :                         r[i] = GDK_ROTATE(h[p1], lbit,
     442           0 :                                                           rbit) ^ MKEYHASH_oid(*(oid *) Tpos(&bi, p2));
     443             :                 }
     444             :         } else {
     445       10267 :                 switch (ATOMstorage(b->ttype)) {
     446         243 :                 case TYPE_bte:
     447       23630 :                         MKEYbulk_rotate_xor_hashloop(bte);
     448             :                         break;
     449          94 :                 case TYPE_sht:
     450        2549 :                         MKEYbulk_rotate_xor_hashloop(sht);
     451             :                         break;
     452        6180 :                 case TYPE_int:
     453             :                 case TYPE_flt:
     454    55120479 :                         MKEYbulk_rotate_xor_hashloop(int);
     455             :                         break;
     456         526 :                 case TYPE_lng:{         /* hb and b areas may overlap, so for this case the 'restrict' keyword cannot be used */
     457         526 :                         const ulng *h = (const ulng *) hbi.base;
     458         526 :                         const lng *v = (const lng *) bi.base;
     459         526 :                         if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
     460      339355 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     461      338829 :                                         oid p1 = (canditer_next_dense(&ci1) - off1),
     462      338829 :                                                 p2 = (canditer_next_dense(&ci2) - off2);
     463      338829 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     464      338829 :                                                                           rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
     465             :                                 }
     466             :                         } else {
     467           0 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     468           0 :                                         oid p1 = (canditer_next(&ci1) - off1),
     469           0 :                                                 p2 = (canditer_next(&ci2) - off2);
     470           0 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     471           0 :                                                                           rbit) ^ (ulng) MKEYHASH_lng(v[p2]);
     472             :                                 }
     473             :                         }
     474             :                         break;
     475             :                 }
     476          60 :                 case TYPE_dbl:
     477         310 :                         MKEYbulk_rotate_xor_hashloop(lng);
     478             :                         break;
     479             : #ifdef HAVE_HGE
     480           4 :                 case TYPE_hge:
     481          14 :                         MKEYbulk_rotate_xor_hashloop(hge);
     482             :                         break;
     483             : #endif
     484        3160 :                 default:{
     485        3160 :                         const ulng *restrict h = (const ulng *) hbi.base;
     486        3160 :                         BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     487             : 
     488        3160 :                         if (ci1.tpe == cand_dense && ci2.tpe == cand_dense) {
     489     1460919 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     490     1457761 :                                         oid p1 = (canditer_next_dense(&ci1) - off1),
     491     1457761 :                                                 p2 = (canditer_next_dense(&ci2) - off2);
     492     2915520 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     493     1457761 :                                                                           rbit) ^ (ulng) hash(BUNtail(bi, p2));
     494             :                                 }
     495             :                         } else {
     496           0 :                                 for (BUN i = 0; i < ci1.ncand; i++) {
     497           0 :                                         oid p1 = (canditer_next(&ci1) - off1),
     498           0 :                                                 p2 = (canditer_next(&ci2) - off2);
     499           0 :                                         r[i] = GDK_ROTATE(h[p1], lbit,
     500           0 :                                                                           rbit) ^ (ulng) hash(BUNtail(bi, p2));
     501             :                                 }
     502             :                         }
     503             :                         break;
     504             :                 }
     505             :                 }
     506             :         }
     507       10265 :         bat_iterator_end(&hbi);
     508       10268 :         bat_iterator_end(&bi);
     509             : 
     510       10268 :   bailout:
     511       10268 :         BBPreclaim(b);
     512       10268 :         BBPreclaim(hb);
     513       10267 :         BBPreclaim(s1);
     514       10268 :         BBPreclaim(s2);
     515       10268 :         if (bn) {
     516       10268 :                 assert(msg == MAL_SUCCEED);
     517       10268 :                 BATsetcount(bn, ci1.ncand);
     518       10268 :                 bn->tnonil = false;
     519       10268 :                 bn->tnil = false;
     520       10268 :                 bn->tkey = BATcount(bn) <= 1;
     521       10268 :                 bn->tsorted = BATcount(bn) <= 1;
     522       10268 :                 bn->trevsorted = BATcount(bn) <= 1;
     523       10268 :                 *res = bn->batCacheid;
     524       10268 :                 BBPkeepref(bn);
     525             :         }
     526             :         return msg;
     527             : }
     528             : 
     529             : static str
     530           0 : MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
     531             :                                                           InstrPtr pci)
     532             : {
     533           0 :         bat *res = getArgReference_bat(stk, pci, 0),
     534           0 :                 *hid = getArgReference_bat(stk, pci, 1),
     535           0 :                 *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
     536           0 :         int lbit = *getArgReference_int(stk, pci, 2),
     537           0 :                 tpe = getArgType(mb, pci, 3), rbit = (int) sizeof(lng) * 8 - lbit;
     538           0 :         ptr pval = getArgReference(stk, pci, 3);
     539           0 :         BAT *hb = NULL, *bn = NULL, *bs = NULL;
     540           0 :         str msg = MAL_SUCCEED;
     541           0 :         struct canditer ci = { 0 };
     542           0 :         oid off;
     543           0 :         ulng *restrict r, val;
     544           0 :         const ulng *restrict h;
     545           0 :         BATiter hbi = { 0 };
     546             : 
     547           0 :         (void) cntxt;
     548           0 :         if (!(hb = BATdescriptor(*hid))) {
     549           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     550             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     551           0 :                 goto bailout;
     552             :         }
     553           0 :         if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
     554           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     555             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     556           0 :                 goto bailout;
     557             :         }
     558           0 :         canditer_init(&ci, hb, bs);
     559           0 :         if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
     560           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     561             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     562           0 :                 goto bailout;
     563             :         }
     564           0 :         off = hb->hseqbase;
     565             : 
     566           0 :         switch (ATOMstorage(tpe)) {
     567           0 :         case TYPE_bte:
     568           0 :                 val = (ulng) MKEYHASH_bte((*(bte *) pval));
     569           0 :                 break;
     570           0 :         case TYPE_sht:
     571           0 :                 val = (ulng) MKEYHASH_sht((*(sht *) pval));
     572           0 :                 break;
     573           0 :         case TYPE_int:
     574             :         case TYPE_flt:
     575           0 :                 val = (ulng) MKEYHASH_int((*(int *) pval));
     576           0 :                 break;
     577           0 :         case TYPE_lng:
     578             :         case TYPE_dbl:
     579           0 :                 val = (ulng) MKEYHASH_lng((*(lng *) pval));
     580           0 :                 break;
     581             : #ifdef HAVE_HGE
     582           0 :         case TYPE_hge:
     583           0 :                 val = (ulng) MKEYHASH_hge((*(hge *) pval));
     584           0 :                 break;
     585             : #endif
     586           0 :         default:
     587           0 :                 if (ATOMextern(tpe))
     588           0 :                         val = (ulng) ATOMhash(tpe, *(ptr *) pval);
     589             :                 else
     590           0 :                         val = (ulng) ATOMhash(tpe, pval);
     591             :                 break;
     592             :         }
     593             : 
     594           0 :         r = (ulng *) Tloc(bn, 0);
     595           0 :         hbi = bat_iterator(hb);
     596           0 :         h = (const ulng *) hbi.base;
     597           0 :         if (ci.tpe == cand_dense) {
     598           0 :                 for (BUN i = 0; i < ci.ncand; i++) {
     599           0 :                         oid p = (canditer_next_dense(&ci) - off);
     600           0 :                         r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
     601             :                 }
     602             :         } else {
     603           0 :                 for (BUN i = 0; i < ci.ncand; i++) {
     604           0 :                         oid p = (canditer_next(&ci) - off);
     605           0 :                         r[i] = GDK_ROTATE(h[p], lbit, rbit) ^ val;
     606             :                 }
     607             :         }
     608           0 :         bat_iterator_end(&hbi);
     609             : 
     610           0 :   bailout:
     611           0 :         BBPreclaim(hb);
     612           0 :         BBPreclaim(bs);
     613           0 :         if (bn) {
     614           0 :                 assert(msg == MAL_SUCCEED);
     615           0 :                 BATsetcount(bn, ci.ncand);
     616           0 :                 bn->tnonil = false;
     617           0 :                 bn->tnil = false;
     618           0 :                 bn->tkey = BATcount(bn) <= 1;
     619           0 :                 bn->tsorted = BATcount(bn) <= 1;
     620           0 :                 bn->trevsorted = BATcount(bn) <= 1;
     621           0 :                 *res = bn->batCacheid;
     622           0 :                 BBPkeepref(bn);
     623             :         }
     624           0 :         return msg;
     625             : }
     626             : 
     627             : #define MKEYconstbulk_rotate_xor_hashloop(TPE) \
     628             :         do { \
     629             :                 const TPE *restrict v = (const TPE *) bi.base; \
     630             :                 if (ci.tpe == cand_dense) { \
     631             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     632             :                                 oid p = (canditer_next_dense(&ci) - off); \
     633             :                                 r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
     634             :                         } \
     635             :                 } else { \
     636             :                         for (BUN i = 0; i < ci.ncand; i++) { \
     637             :                                 oid p = (canditer_next(&ci) - off); \
     638             :                                 r[i] = h ^ (ulng) MKEYHASH_##TPE(v[p]); \
     639             :                         } \
     640             :                 } \
     641             :         } while (0)
     642             : 
     643             : static str
     644           0 : MKEYconstbulk_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
     645             :                                                           InstrPtr pci)
     646             : {
     647           0 :         bat *res = getArgReference_bat(stk, pci, 0),
     648           0 :                 *bid = getArgReference_bat(stk, pci, 3),
     649           0 :                 *sid1 = pci->argc == 5 ? getArgReference_bat(stk, pci, 4) : NULL;
     650           0 :         int lbit = *getArgReference_int(stk, pci, 2),
     651           0 :                 rbit = (int) sizeof(lng) * 8 - lbit;
     652           0 :         BAT *b = NULL, *bn = NULL, *bs = NULL;
     653           0 :         str msg = MAL_SUCCEED;
     654           0 :         struct canditer ci = { 0 };
     655           0 :         oid off;
     656           0 :         ulng *restrict r,
     657           0 :                 h = GDK_ROTATE((ulng) *getArgReference_lng(stk, pci, 1), lbit, rbit);
     658           0 :         BATiter bi = { 0 };
     659             : 
     660           0 :         (void) cntxt;
     661           0 :         (void) mb;
     662           0 :         if (!(b = BATdescriptor(*bid))) {
     663           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     664             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     665           0 :                 goto bailout;
     666             :         }
     667           0 :         if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) {
     668           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     669             :                                                           SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
     670           0 :                 goto bailout;
     671             :         }
     672           0 :         canditer_init(&ci, b, bs);
     673           0 :         if (!(bn = COLnew(ci.hseq, TYPE_lng, ci.ncand, TRANSIENT))) {
     674           0 :                 msg = createException(MAL, "batmkey.rotate_xor_hash",
     675             :                                                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     676           0 :                 goto bailout;
     677             :         }
     678           0 :         off = b->hseqbase;
     679           0 :         r = (ulng *) Tloc(bn, 0);
     680             : 
     681           0 :         bi = bat_iterator(b);
     682           0 :         switch (ATOMstorage(b->ttype)) {
     683           0 :         case TYPE_bte:
     684           0 :                 MKEYconstbulk_rotate_xor_hashloop(bte);
     685             :                 break;
     686           0 :         case TYPE_sht:
     687           0 :                 MKEYconstbulk_rotate_xor_hashloop(sht);
     688             :                 break;
     689           0 :         case TYPE_int:
     690             :         case TYPE_flt:
     691           0 :                 MKEYconstbulk_rotate_xor_hashloop(int);
     692             :                 break;
     693           0 :         case TYPE_lng:
     694             :         case TYPE_dbl:
     695           0 :                 MKEYconstbulk_rotate_xor_hashloop(lng);
     696             :                 break;
     697             : #ifdef HAVE_HGE
     698           0 :         case TYPE_hge:
     699           0 :                 MKEYconstbulk_rotate_xor_hashloop(hge);
     700             :                 break;
     701             : #endif
     702           0 :         default:{
     703           0 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     704             : 
     705           0 :                 if (ci.tpe == cand_dense) {
     706           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     707           0 :                                 oid p = (canditer_next_dense(&ci) - off);
     708           0 :                                 r[i] = h ^ (ulng) hash(BUNtail(bi, p));
     709             :                         }
     710             :                 } else {
     711           0 :                         for (BUN i = 0; i < ci.ncand; i++) {
     712           0 :                                 oid p = (canditer_next(&ci) - off);
     713           0 :                                 r[i] = h ^ (ulng) hash(BUNtail(bi, p));
     714             :                         }
     715             :                 }
     716             :                 break;
     717             :         }
     718             :         }
     719           0 :         bat_iterator_end(&bi);
     720             : 
     721           0 :   bailout:
     722           0 :         BBPreclaim(b);
     723           0 :         BBPreclaim(bs);
     724           0 :         if (bn) {
     725           0 :                 assert(msg == MAL_SUCCEED);
     726           0 :                 BATsetcount(bn, ci.ncand);
     727           0 :                 bn->tnonil = false;
     728           0 :                 bn->tnil = false;
     729           0 :                 bn->tkey = BATcount(bn) <= 1;
     730           0 :                 bn->tsorted = BATcount(bn) <= 1;
     731           0 :                 bn->trevsorted = BATcount(bn) <= 1;
     732           0 :                 *res = bn->batCacheid;
     733           0 :                 BBPkeepref(bn);
     734             :         }
     735           0 :         return msg;
     736             : }
     737             : 
     738             : #include "mel.h"
     739             : mel_func mkey_init_funcs[] = {
     740             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),argany("v",0))),
     741             :  pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",0))),
     742             :  pattern("batmkey", "hash", MKEYbathash, false, "calculate a hash value, with a candidate list", args(1,3, batarg("",lng),batargany("b",0),batarg("s",oid))),
     743             :  pattern("mkey", "rotate_xor_hash", MKEYrotate_xor_hash, false, "post: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",0))),
     744             :  pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))),
     745             :  pattern("batmkey", "rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0),batarg("s",oid))),
     746             :  pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0))),
     747             :  pattern("batmkey", "rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with a candidate list", args(1,5, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",0),batarg("s",oid))),
     748             :  pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0))),
     749             :  pattern("batmkey", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b)), with candidate lists", args(1,6, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",0),batarg("s1",oid),batarg("s2",oid))),
     750             :  { .imp=NULL }
     751             : };
     752             : #include "mal_import.h"
     753             : #ifdef _MSC_VER
     754             : #undef read
     755             : #pragma section(".CRT$XCU",read)
     756             : #endif
     757         329 : LIB_STARTUP_FUNC(init_mkey_mal)
     758         329 : { mal_module("mkey", NULL, mkey_init_funcs); }

Generated by: LCOV version 1.14