LCOV - code coverage report
Current view: top level - gdk - gdk_hash.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 560 780 71.8 %
Date: 2024-12-20 21:24:02 Functions: 20 24 83.3 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * - Hash Table Creation
      15             :  * The hash indexing scheme for BATs reserves a block of memory to
      16             :  * maintain the hash table and a collision list. A one-to-one mapping
      17             :  * exists between the BAT and the collision list using the BUN
      18             :  * index. NOTE: we alloc the link list as a parallel array to the BUN
      19             :  * array; hence the hash link array has the same size as
      20             :  * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
      21             :  * and delete to assume that there is hash space if there is BUN
      22             :  * space.
      23             :  *
      24             :  * The hash mask size is a power of two, so we can do bitwise AND on
      25             :  * the hash (integer) number to quickly find the head of the bucket
      26             :  * chain.  Clearly, the hash mask size is a crucial parameter. If we
      27             :  * know that the column is unique (tkey), we use direct hashing (mask
      28             :  * size ~= BATcount). Otherwise we dynamically determine the mask size
      29             :  * by starting out with mask size = BATcount/64 (just 1.5% of memory
      30             :  * storage overhead). Then we start building the hash table on the
      31             :  * first 25% of the BAT. As we aim for max-collisions list length of
      32             :  * 4, the list on 25% should not exceed length 1. So, if a small
      33             :  * number of collisssions occurs (mask/2) then we abandon the attempt
      34             :  * and restart with a mask that is 4 times larger. This converges
      35             :  * after three cycles to direct hashing.
      36             :  */
      37             : 
      38             : #include "monetdb_config.h"
      39             : #include "gdk.h"
      40             : #include "gdk_private.h"
      41             : 
      42             : __attribute__((__const__))
      43             : static inline uint8_t
      44     1353708 : HASHwidth(BUN hashsize)
      45             : {
      46     1353708 :         (void) hashsize;
      47             : #ifdef BUN2
      48     1353708 :         if (hashsize <= (BUN) BUN2_NONE)
      49             :                 return BUN2;
      50             : #endif
      51             : #ifdef BUN8
      52         764 :         if (hashsize > (BUN) BUN4_NONE)
      53           0 :                 return BUN8;
      54             : #endif
      55             :         return BUN4;
      56             : }
      57             : 
      58             : __attribute__((__const__))
      59             : static inline BUN
      60        8458 : hashmask(BUN m)
      61             : {
      62        8458 :         m |= m >> 1;
      63        8458 :         m |= m >> 2;
      64        8458 :         m |= m >> 4;
      65        8458 :         m |= m >> 8;
      66        8458 :         m |= m >> 16;
      67             : #if SIZEOF_BUN == 8
      68        8458 :         m |= m >> 32;
      69             : #endif
      70        8458 :         return m;
      71             : }
      72             : 
      73             : static inline void
      74      859434 : HASHclear(Hash *h)
      75             : {
      76             :         /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
      77             :          * are all equal to ~0, i.e., have all bits set,
      78             :          * we can use a simple memset() to clear the Hash,
      79             :          * rather than iteratively assigning individual
      80             :          * BUNi_NONE values in a for-loop
      81             :          */
      82      859434 :         memset(h->Bckt, 0xFF, h->nbucket * h->width);
      83      859434 : }
      84             : 
      85             : #define HASH_VERSION            6
      86             : #define HASH_HEADER_SIZE        7       /* nr of size_t fields in header */
      87             : 
      88             : void
      89    23858134 : doHASHdestroy(BAT *b, Hash *hs)
      90             : {
      91    23858134 :         if (hs == (Hash *) 1) {
      92          55 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
      93             :                           BATDIR,
      94          55 :                           BBP_physical(b->batCacheid),
      95             :                           "thashl");
      96          55 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
      97             :                           BATDIR,
      98          55 :                           BBP_physical(b->batCacheid),
      99             :                           "thashb");
     100    23858079 :         } else if (hs) {
     101       11851 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": removing%s hash\n", ALGOBATPAR(b), *(size_t *) hs->heapbckt.base & (1 << 24) ? " persisted" : "");
     102       11851 :                 HEAPfree(&hs->heapbckt, true);
     103       11844 :                 HEAPfree(&hs->heaplink, true);
     104       11853 :                 GDKfree(hs);
     105             :         }
     106    23858142 : }
     107             : 
     108             : gdk_return
     109      859350 : HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
     110             : {
     111      859350 :         if (h->width == 0)
     112      846466 :                 h->width = HASHwidth(size);
     113             : 
     114      859350 :         if (!bcktonly) {
     115      845753 :                 if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED)
     116             :                         return GDK_FAIL;
     117      845802 :                 h->heaplink.free = size * h->width;
     118      845802 :                 h->heaplink.dirty = true;
     119      845802 :                 h->Link = h->heaplink.base;
     120             :         }
     121      859399 :         if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width) != GDK_SUCCEED) {
     122           0 :                 if (!bcktonly) {
     123           0 :                         HEAPfree(&h->heaplink, true);
     124           0 :                         h->heaplink.free = 0;
     125           0 :                         h->Link = NULL;
     126             :                 }
     127           0 :                 return GDK_FAIL;
     128             :         }
     129      859450 :         h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     130      859450 :         h->heapbckt.dirty = true;
     131      859450 :         h->nbucket = mask;
     132      859450 :         if (mask & (mask - 1)) {
     133        8269 :                 h->mask2 = hashmask(mask);
     134        8269 :                 h->mask1 = h->mask2 >> 1;
     135             :         } else {
     136             :                 /* mask is a power of two */
     137      851181 :                 h->mask1 = mask - 1;
     138      851181 :                 h->mask2 = h->mask1 << 1 | 1;
     139             :         }
     140      859450 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     141      859450 :         h->type = tpe;
     142      859450 :         HASHclear(h);           /* zero the mask */
     143      859423 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     144      859423 :         ((size_t *) h->heapbckt.base)[1] = (size_t) size;
     145      859423 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     146      859423 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     147      859423 :         ((size_t *) h->heapbckt.base)[4] = (size_t) count;
     148      859423 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     149      859423 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     150      859423 :         TRC_DEBUG(ACCELERATOR,
     151             :                   "create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width);
     152             :         return GDK_SUCCEED;
     153             : }
     154             : 
     155             : /* collect HASH statistics for analysis */
     156             : static void
     157           0 : HASHcollisions(BAT *b, Hash *h, const char *func)
     158             : {
     159           0 :         lng cnt, entries = 0, max = 0;
     160           0 :         double total = 0;
     161           0 :         BUN p, i, j;
     162             : 
     163           0 :         if (b == 0 || h == 0)
     164             :                 return;
     165           0 :         for (i = 0, j = h->nbucket; i < j; i++)
     166           0 :                 if ((p = HASHget(h, i)) != BUN_NONE) {
     167           0 :                         entries++;
     168           0 :                         cnt = 0;
     169           0 :                         for (; p != BUN_NONE; p = HASHgetlink(h, p))
     170           0 :                                 cnt++;
     171           0 :                         if (cnt > max)
     172             :                                 max = cnt;
     173           0 :                         total += cnt;
     174             :                 }
     175           0 :         TRC_DEBUG_ENDIF(ACCELERATOR,
     176             :                         "%s(" ALGOBATFMT "): statistics " BUNFMT ", "
     177             :                         "entries " LLFMT ", nunique " BUNFMT ", "
     178             :                         "nbucket " BUNFMT ", max " LLFMT ", avg %2.6f;\n",
     179             :                         func, ALGOBATPAR(b), BATcount(b), entries,
     180             :                         h->nunique, h->nbucket, max,
     181             :                         entries == 0 ? 0 : total / entries);
     182             : }
     183             : 
     184             : static gdk_return
     185           0 : HASHupgradehashheap(BAT *b)
     186             : {
     187             : #if defined(BUN2) || defined(BUN8)
     188           0 :         Hash *h = b->thash;
     189           0 :         int nwidth = h->width << 1;
     190           0 :         BUN i;
     191             : 
     192           0 :         assert(nwidth <= SIZEOF_BUN);
     193           0 :         assert((nwidth & (nwidth - 1)) == 0);
     194             : 
     195           0 :         if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED ||
     196           0 :             HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) {
     197           0 :                 b->thash = NULL;
     198           0 :                 doHASHdestroy(b, h);
     199           0 :                 return GDK_FAIL;
     200             :         }
     201           0 :         h->Link = h->heaplink.base;
     202           0 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     203           0 :         switch (nwidth) {
     204           0 :         case BUN4:
     205             : #ifdef BUN2
     206           0 :                 switch (h->width) {
     207           0 :                 case BUN2:
     208           0 :                         i = h->heaplink.free / h->width;
     209           0 :                         h->heaplink.free = i * nwidth;
     210           0 :                         while (i > 0) {
     211           0 :                                 i--;
     212           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     213           0 :                                 ((BUN4type *) h->Link)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     214             :                         }
     215           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     216           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     217           0 :                         while (i > 0) {
     218           0 :                                 i--;
     219           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     220           0 :                                 ((BUN4type *) h->Bckt)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     221             :                         }
     222           0 :                         h->heapbckt.dirty = true;
     223           0 :                         h->heaplink.dirty = true;
     224           0 :                         break;
     225             :                 }
     226             : #endif
     227             :                 break;
     228             : #ifdef BUN8
     229           0 :         case BUN8:
     230           0 :                 switch (h->width) {
     231             : #ifdef BUN2
     232           0 :                 case BUN2:
     233           0 :                         i = h->heaplink.free / h->width;
     234           0 :                         h->heaplink.free = i * nwidth;
     235           0 :                         while (i > 0) {
     236           0 :                                 i--;
     237           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     238           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     239             :                         }
     240           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     241           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     242           0 :                         while (i > 0) {
     243           0 :                                 i--;
     244           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     245           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     246             :                         }
     247           0 :                         h->heapbckt.dirty = true;
     248           0 :                         h->heaplink.dirty = true;
     249           0 :                         break;
     250             : #endif
     251           0 :                 case BUN4:
     252           0 :                         i = h->heaplink.free / h->width;
     253           0 :                         h->heaplink.free = i * nwidth;
     254           0 :                         while (i > 0) {
     255           0 :                                 i--;
     256           0 :                                 BUN4type v = ((BUN4type *) h->Link)[i];
     257           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     258             :                         }
     259           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     260           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     261           0 :                         while (i > 0) {
     262           0 :                                 i--;
     263           0 :                                 BUN4type v = ((BUN4type *) h->Bckt)[i];
     264           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     265             :                         }
     266           0 :                         h->heapbckt.dirty = true;
     267           0 :                         h->heaplink.dirty = true;
     268           0 :                         break;
     269             :                 }
     270             :                 break;
     271             : #endif
     272             :         }
     273           0 :         h->width = nwidth;
     274             : #else
     275             :         (void) b;
     276             : #endif
     277           0 :         return GDK_SUCCEED;
     278             : }
     279             : 
     280             : /* write/remove the bit into/from the hash file that indicates the hash
     281             :  * is good to go; the bit is the last part to be written and the first
     282             :  * to be removed */
     283             : static inline gdk_return
     284      558707 : HASHfix(Hash *h, bool save, bool dosync)
     285             : {
     286      558707 :         if (!h->heapbckt.dirty && !h->heaplink.dirty) {
     287       24349 :                 const size_t mask = (size_t) 1 << 24;
     288       24349 :                 if (((size_t *) h->heapbckt.base)[0] & mask) {
     289       10718 :                         if (save)
     290             :                                 return GDK_SUCCEED;
     291       10718 :                         ((size_t *) h->heapbckt.base)[0] &= ~mask;
     292             :                 } else {
     293       13631 :                         if (!save)
     294             :                                 return GDK_SUCCEED;
     295       13631 :                         ((size_t *) h->heapbckt.base)[0] |= mask;
     296             :                 }
     297       24349 :                 if (h->heapbckt.storage == STORE_MEM) {
     298       24322 :                         gdk_return rc = GDK_FAIL;
     299       24322 :                         int fd = GDKfdlocate(h->heapbckt.farmid, h->heapbckt.filename, "rb+", NULL);
     300       24322 :                         if (fd >= 0) {
     301       24322 :                                 if (write(fd, h->heapbckt.base, SIZEOF_SIZE_T) == SIZEOF_SIZE_T) {
     302       24322 :                                         if (dosync &&
     303       24322 :                                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
     304             : #if defined(NATIVE_WIN32)
     305             :                                                 _commit(fd);
     306             : #elif defined(HAVE_FDATASYNC)
     307          13 :                                                 fdatasync(fd);
     308             : #elif defined(HAVE_FSYNC)
     309             :                                                 fsync(fd);
     310             : #endif
     311             :                                         }
     312             :                                         rc = GDK_SUCCEED;
     313             :                                 }
     314       24322 :                                 close(fd);
     315             :                         }
     316       24322 :                         if (rc != GDK_SUCCEED)
     317           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     318       24322 :                         return rc;
     319             :                 } else {
     320          27 :                         if (dosync &&
     321          27 :                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK) &&
     322           0 :                             MT_msync(h->heapbckt.base, SIZEOF_SIZE_T) < 0) {
     323           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     324           0 :                                 return GDK_FAIL;
     325             :                         }
     326             :                 }
     327             :         }
     328             :         return GDK_SUCCEED;
     329             : }
     330             : 
     331             : static gdk_return
     332      494289 : HASHgrowbucket(BAT *b)
     333             : {
     334      494289 :         Hash *h = b->thash;
     335      494289 :         BUN nbucket;
     336      494289 :         BUN onbucket = h->nbucket;
     337      494289 :         lng t0 = 0;
     338             : 
     339      494289 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     340             : 
     341             :         /* only needed to fix hash tables built before this fix was
     342             :          * introduced */
     343      494289 :         if (h->width < SIZEOF_BUN &&
     344      494289 :             ((BUN) 1 << (h->width * 8)) - 1 <= h->mask2 &&
     345           0 :             HASHupgradehashheap(b) != GDK_SUCCEED)
     346             :                 return GDK_FAIL;
     347             : 
     348      494289 :         h->heapbckt.dirty = true;
     349      494289 :         h->heaplink.dirty = true;
     350      750650 :         while (h->nunique >= (nbucket = h->nbucket) * 7 / 8) {
     351      256361 :                 BUN new = h->nbucket;
     352      256361 :                 BUN old = new & h->mask1;
     353      256361 :                 BUN mask = h->mask1 + 1; /* == h->mask2 - h->mask1 */
     354             : 
     355      256361 :                 assert(h->heapbckt.free == nbucket * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T);
     356      256361 :                 if (h->heapbckt.free + h->width > h->heapbckt.size) {
     357        4075 :                         if (HEAPextend(&h->heapbckt,
     358             :                                        h->heapbckt.size + GDK_mmap_pagesize,
     359             :                                        true) != GDK_SUCCEED) {
     360           0 :                                 return GDK_FAIL;
     361             :                         }
     362        4075 :                         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     363             :                 }
     364      256361 :                 assert(h->heapbckt.free + h->width <= h->heapbckt.size);
     365      256361 :                 if (h->nbucket == h->mask2) {
     366         290 :                         h->mask1 = h->mask2;
     367         290 :                         h->mask2 |= h->mask2 << 1;
     368         290 :                         if (h->width < SIZEOF_BUN &&
     369         290 :                             h->mask2 == ((BUN) 1 << (h->width * 8)) - 1) {
     370             :                                 /* time to widen the hash table */
     371           0 :                                 if (HASHupgradehashheap(b) != GDK_SUCCEED)
     372             :                                         return GDK_FAIL;
     373             :                         }
     374             :                 }
     375      256361 :                 h->nbucket++;
     376      256361 :                 h->heapbckt.free += h->width;
     377      256361 :                 BUN lold, lnew, hb;
     378      256361 :                 lold = lnew = BUN_NONE;
     379      256361 :                 BATiter bi = bat_iterator(b);
     380      256361 :                 if ((hb = HASHget(h, old)) != BUN_NONE) {
     381      207199 :                         h->nheads--;
     382      332710 :                         do {
     383      332710 :                                 const void *v = BUNtail(bi, hb);
     384      332710 :                                 BUN hsh = ATOMhash(h->type, v);
     385      332710 :                                 assert((hsh & (mask - 1)) == old);
     386      332710 :                                 if (hsh & mask) {
     387             :                                         /* move to new list */
     388      121089 :                                         if (lnew == BUN_NONE) {
     389      106026 :                                                 HASHput(h, new, hb);
     390      106026 :                                                 h->nheads++;
     391             :                                         } else
     392       15063 :                                                 HASHputlink(h, lnew, hb);
     393             :                                         lnew = hb;
     394             :                                 } else {
     395             :                                         /* move to old list */
     396      211621 :                                         if (lold == BUN_NONE) {
     397      160231 :                                                 h->nheads++;
     398      160231 :                                                 HASHput(h, old, hb);
     399             :                                         } else
     400       51390 :                                                 HASHputlink(h, lold, hb);
     401             :                                         lold = hb;
     402             :                                 }
     403      332710 :                                 hb = HASHgetlink(h, hb);
     404      332710 :                         } while (hb != BUN_NONE);
     405             :                 }
     406      256361 :                 bat_iterator_end(&bi);
     407      256361 :                 if (lnew == BUN_NONE)
     408      150335 :                         HASHput(h, new, BUN_NONE);
     409             :                 else
     410      106026 :                         HASHputlink(h, lnew, BUN_NONE);
     411      256361 :                 if (lold == BUN_NONE)
     412       96130 :                         HASHput(h, old, BUN_NONE);
     413             :                 else
     414      160231 :                         HASHputlink(h, lold, BUN_NONE);
     415             :         }
     416      494289 :         TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
     417           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
     418             :                         " -> " BUNFMT " buckets (" LLFMT " usec)\n",
     419             :                         ALGOBATPAR(b),
     420             :                         onbucket, h->nbucket, GDKusec() - t0);
     421           0 :                 HASHcollisions(b, h, __func__);
     422             :         }
     423             :         return GDK_SUCCEED;
     424             : }
     425             : 
     426             : /* Return TRUE if we have a hash on the tail, even if we need to read
     427             :  * one from disk.
     428             :  *
     429             :  * Note that the b->thash pointer can be NULL, meaning there is no
     430             :  * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist
     431             :  * on disk; or a valid pointer to a loaded hash.  These values are
     432             :  * maintained here, in the HASHdestroy and HASHfree functions, and in
     433             :  * BBPdiskscan during initialization. */
     434             : bool
     435    62985526 : BATcheckhash(BAT *b)
     436             : {
     437    62985526 :         lng t = 0;
     438    62985526 :         Hash *h;
     439             : 
     440    62985526 :         MT_rwlock_rdlock(&b->thashlock);
     441    63063988 :         h = b->thash;
     442    63063988 :         MT_rwlock_rdunlock(&b->thashlock);
     443    63049479 :         if (h == (Hash *) 1) {
     444             :                 /* but when we want to change it, we need the lock */
     445         419 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec();
     446         419 :                 MT_rwlock_wrlock(&b->thashlock);
     447         419 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec() - t;
     448             :                 /* if still 1 now that we have the lock, we can update */
     449         419 :                 if (b->thash == (Hash *) 1) {
     450         419 :                         int fd;
     451             : 
     452         419 :                         assert(!GDKinmemory(b->theap->farmid));
     453         419 :                         b->thash = NULL;
     454         419 :                         if ((h = GDKzalloc(sizeof(*h))) != NULL &&
     455         419 :                             (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
     456         419 :                             (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) {
     457         419 :                                 const char *nme = BBP_physical(b->batCacheid);
     458         419 :                                 strconcat_len(h->heaplink.filename,
     459             :                                               sizeof(h->heaplink.filename),
     460             :                                               nme, ".thashl", NULL);
     461         419 :                                 strconcat_len(h->heapbckt.filename,
     462             :                                               sizeof(h->heapbckt.filename),
     463             :                                               nme, ".thashb", NULL);
     464         419 :                                 h->heaplink.storage = STORE_INVALID;
     465         419 :                                 h->heaplink.newstorage = STORE_INVALID;
     466         419 :                                 h->heapbckt.storage = STORE_INVALID;
     467         419 :                                 h->heapbckt.newstorage = STORE_INVALID;
     468             : 
     469             :                                 /* check whether a persisted hash can be found */
     470         419 :                                 if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) {
     471         419 :                                         size_t hdata[HASH_HEADER_SIZE];
     472         419 :                                         struct stat st;
     473             : 
     474         419 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     475         419 :                                             (hdata[0] == (((size_t) 1 << 24) | HASH_VERSION)) &&
     476         360 :                                             hdata[1] > 0 &&
     477             :                                             (
     478             : #ifdef BUN2
     479         360 :                                                     hdata[3] == BUN2 ||
     480             : #endif
     481             :                                                     hdata[3] == BUN4
     482             : #ifdef BUN8
     483             :                                                     || hdata[3] == BUN8
     484             : #endif
     485         360 :                                                     ) &&
     486         720 :                                             hdata[4] == (size_t) BATcount(b) &&
     487         360 :                                             fstat(fd, &st) == 0 &&
     488         720 :                                             st.st_size >= (off_t) (h->heapbckt.size = h->heapbckt.free = (h->nbucket = (BUN) hdata[2]) * (BUN) (h->width = (uint8_t) hdata[3]) + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
     489         360 :                                             close(fd) == 0 &&
     490         720 :                                             (fd = GDKfdlocate(h->heaplink.farmid, nme, "rb+", "thashl")) >= 0 &&
     491         360 :                                             fstat(fd, &st) == 0 &&
     492         360 :                                             st.st_size > 0 &&
     493         720 :                                             st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
     494         360 :                                             HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) {
     495         360 :                                                 if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) {
     496         360 :                                                         if (h->nbucket & (h->nbucket - 1)) {
     497         190 :                                                                 h->mask2 = hashmask(h->nbucket);
     498         190 :                                                                 h->mask1 = h->mask2 >> 1;
     499             :                                                         } else {
     500         170 :                                                                 h->mask1 = h->nbucket - 1;
     501         170 :                                                                 h->mask2 = h->mask1 << 1 | 1;
     502             :                                                         }
     503         360 :                                                         h->nunique = hdata[5];
     504         360 :                                                         h->nheads = hdata[6];
     505         360 :                                                         h->type = ATOMtype(b->ttype);
     506         360 :                                                         if (h->width < SIZEOF_BUN &&
     507         360 :                                                             ((BUN) 1 << (8 * h->width)) - 1 > h->nbucket) {
     508         360 :                                                                 close(fd);
     509         360 :                                                                 h->Link = h->heaplink.base;
     510         360 :                                                                 h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     511         360 :                                                                 h->heaplink.parentid = b->batCacheid;
     512         360 :                                                                 h->heapbckt.parentid = b->batCacheid;
     513         360 :                                                                 h->heaplink.dirty = false;
     514         360 :                                                                 h->heapbckt.dirty = false;
     515         360 :                                                                 b->thash = h;
     516         360 :                                                                 h->heapbckt.hasfile = true;
     517         360 :                                                                 h->heaplink.hasfile = true;
     518         360 :                                                                 TRC_DEBUG(ACCELERATOR,
     519             :                                                                           ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
     520         360 :                                                                 MT_rwlock_wrunlock(&b->thashlock);
     521         360 :                                                                 return true;
     522             :                                                         }
     523             :                                                         /* if h->nbucket
     524             :                                                          * equals the
     525             :                                                          * BUN_NONE
     526             :                                                          * representation
     527             :                                                          * for the
     528             :                                                          * current hash
     529             :                                                          * width (was
     530             :                                                          * possible in
     531             :                                                          * previous
     532             :                                                          * iterations of
     533             :                                                          * the code),
     534             :                                                          * then we can't
     535             :                                                          * use the hash
     536             :                                                          * since we
     537             :                                                          * can't
     538             :                                                          * distinguish
     539             :                                                          * between
     540             :                                                          * end-of-list
     541             :                                                          * and a valid
     542             :                                                          * link */
     543           0 :                                                         HEAPfree(&h->heapbckt, false);
     544             :                                                 }
     545           0 :                                                 HEAPfree(&h->heaplink, false);
     546             :                                         }
     547          59 :                                         close(fd);
     548             :                                         /* unlink unusable file */
     549          59 :                                         GDKunlink(h->heaplink.farmid, BATDIR, nme, "thashl");
     550          59 :                                         GDKunlink(h->heapbckt.farmid, BATDIR, nme, "thashb");
     551          59 :                                         h->heapbckt.hasfile = false;
     552          59 :                                         h->heaplink.hasfile = false;
     553             :                                 }
     554             :                         }
     555          59 :                         GDKfree(h);
     556          59 :                         GDKclrerr();    /* we're not currently interested in errors */
     557             :                 }
     558          59 :                 h = b->thash;
     559          59 :                 MT_rwlock_wrunlock(&b->thashlock);
     560             :         }
     561    63049119 :         if (h != NULL) {
     562     3784669 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": already has hash, waited " LLFMT " usec\n", ALGOBATPAR(b), t);
     563             :         }
     564    63049119 :         return h != NULL;
     565             : }
     566             : 
     567             : /* figure out size of the hash (sum of the sizes of the two hash files)
     568             :  * without loading them */
     569             : size_t
     570       91013 : HASHsize(BAT *b)
     571             : {
     572       91013 :         size_t sz = 0;
     573       91013 :         MT_rwlock_rdlock(&b->thashlock);
     574       91013 :         if (b->thash == NULL) {
     575             :                 sz = 0;
     576        5186 :         } else if (b->thash != (Hash *) 1) {
     577        4368 :                 sz = b->thash->heaplink.size + b->thash->heapbckt.size;
     578             :         } else {
     579         818 :                 int farmid = BBPselectfarm(b->batRole, b->ttype, hashheap);
     580         818 :                 if (farmid >= 0) {
     581         818 :                         const char *nme = BBP_physical(b->batCacheid);
     582         818 :                         char fname[MAXPATH];
     583             : 
     584         818 :                         if (GDKfilepath(fname, sizeof(fname), farmid, BATDIR, nme, "thashb") == GDK_SUCCEED) {
     585         818 :                                 struct stat st;
     586         818 :                                 if (stat(fname, &st) == 0) {
     587         818 :                                         sz = (size_t) st.st_size;
     588         818 :                                         fname[strlen(fname) - 1] = 'l';
     589         818 :                                         if (stat(fname, &st) == 0)
     590         818 :                                                 sz += (size_t) st.st_size;
     591             :                                         else
     592             :                                                 sz = 0;
     593             :                                 }
     594             :                         }
     595             :                 }
     596             :         }
     597       91013 :         MT_rwlock_rdunlock(&b->thashlock);
     598       91013 :         return sz;
     599             : }
     600             : 
     601             : static void
     602       14277 : BAThashsave_intern(BAT *b, bool dosync)
     603             : {
     604       14277 :         Hash *h;
     605       14277 :         lng t0 = 0;
     606             : 
     607       14277 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     608             : 
     609       14277 :         if ((h = b->thash) != NULL) {
     610             :                 /* only persist if parent BAT hasn't changed in the
     611             :                  * mean time */
     612       14277 :                 if (!b->theap->dirty &&
     613       13631 :                     ((size_t *) h->heapbckt.base)[1] == BATcount(b) &&
     614       27262 :                     ((size_t *) h->heapbckt.base)[4] == BATcount(b) &&
     615       27262 :                     HEAPsave(&h->heaplink, h->heaplink.filename, NULL, dosync, h->heaplink.free, NULL) == GDK_SUCCEED &&
     616       13631 :                     HEAPsave(&h->heapbckt, h->heapbckt.filename, NULL, dosync, h->heapbckt.free, NULL) == GDK_SUCCEED) {
     617       13631 :                         h->heaplink.dirty = false;
     618       13631 :                         h->heapbckt.dirty = false;
     619       13631 :                         h->heaplink.hasfile = true;
     620       13631 :                         h->heapbckt.hasfile = true;
     621       13631 :                         gdk_return rc = HASHfix(h, true, dosync);
     622       13631 :                         TRC_DEBUG(ACCELERATOR,
     623             :                                   ALGOBATFMT ": persisting hash %s%s (" LLFMT " usec)%s\n", ALGOBATPAR(b), h->heapbckt.filename, dosync ? "" : " no sync", GDKusec() - t0, rc == GDK_SUCCEED ? "" : " failed");
     624             :                 }
     625       14277 :                 GDKclrerr();
     626             :         }
     627       14277 : }
     628             : 
     629             : void
     630       14277 : BAThashsave(BAT *b, bool dosync)
     631             : {
     632       14277 :         Hash *h = b->thash;
     633       14277 :         if (h == NULL)
     634             :                 return;
     635       14277 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     636       14277 :         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
     637       14277 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     638       14277 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     639       14277 :         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
     640       14277 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     641       14277 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     642       14277 :         BAThashsave_intern(b, dosync);
     643             : }
     644             : 
     645             : #define EQbte(a, b)     ((a) == (b))
     646             : #define EQsht(a, b)     ((a) == (b))
     647             : #define EQint(a, b)     ((a) == (b))
     648             : #define EQlng(a, b)     ((a) == (b))
     649             : #ifdef HAVE_HGE
     650             : #define EQhge(a, b)     ((a) == (b))
     651             : #endif
     652             : #define EQflt(a, b)     (is_flt_nil(a) ? is_flt_nil(b) : (a) == (b))
     653             : #define EQdbl(a, b)     (is_dbl_nil(a) ? is_dbl_nil(b) : (a) == (b))
     654             : #ifdef HAVE_HGE
     655             : #define EQuuid(a, b)    ((a).h == (b).h)
     656             : #else
     657             : #define EQuuid(a, b)    (memcmp((a).u, (b).u, UUID_SIZE) == 0)
     658             : #endif
     659             : 
     660             : #define starthash(TYPE)                                                 \
     661             :         do {                                                            \
     662             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     663             :                 TIMEOUT_LOOP(p, qry_ctx) {                              \
     664             :                         hget = HASHget(h, c);                           \
     665             :                         if (hget == BUN_NONE) {                         \
     666             :                                 if (h->nheads == maxslots)           \
     667             :                                         TIMEOUT_LOOP_BREAK; /* mask too full */ \
     668             :                                 h->nheads++;                         \
     669             :                                 h->nunique++;                                \
     670             :                         } else {                                        \
     671             :                                 for (hb = hget;                         \
     672             :                                      hb != BUN_NONE;                    \
     673             :                                      hb = HASHgetlink(h, hb)) {         \
     674             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     675             :                                                 break;                  \
     676             :                                 }                                       \
     677             :                                 h->nunique += hb == BUN_NONE;                \
     678             :                         }                                               \
     679             :                         HASHputlink(h, p, hget);                        \
     680             :                         HASHput(h, c, p);                               \
     681             :                         o = canditer_next(ci);                          \
     682             :                 }                                                       \
     683             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     684             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     685             :         } while (0)
     686             : #define finishhash(TYPE)                                                \
     687             :         do {                                                            \
     688             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     689             :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {                       \
     690             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     691             :                         hget = HASHget(h, c);                           \
     692             :                         h->nheads += hget == BUN_NONE;                       \
     693             :                         if (!hascand) {                                 \
     694             :                                 for (hb = hget;                         \
     695             :                                      hb != BUN_NONE;                    \
     696             :                                      hb = HASHgetlink(h, hb)) {         \
     697             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     698             :                                                 break;                  \
     699             :                                 }                                       \
     700             :                                 h->nunique += hb == BUN_NONE;                \
     701             :                                 o = canditer_next_dense(ci);            \
     702             :                         } else {                                        \
     703             :                                 o = canditer_next(ci);                  \
     704             :                         }                                               \
     705             :                         HASHputlink(h, p, hget);                        \
     706             :                         HASHput(h, c, p);                               \
     707             :                         p++;                                            \
     708             :                 }                                                       \
     709             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     710             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     711             :         } while (0)
     712             : 
     713             : /* Internal function to create a hash table for the given BAT b.
     714             :  * If a candidate list s is also given, the hash table is specific for
     715             :  * the combination of the two: only values from b that are referred to
     716             :  * by s are included in the hash table, so if a result is found when
     717             :  * searching the hash table, the result is a candidate. */
     718             : Hash *
     719       13662 : BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext)
     720             : {
     721       13662 :         lng t0 = 0;
     722       13662 :         BUN cnt1;
     723       13662 :         BUN mask, maxmask = 0;
     724       13662 :         BUN p, c;
     725       13662 :         oid o;
     726       13662 :         BUN hget, hb;
     727       13662 :         Hash *h = NULL;
     728       13662 :         const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
     729       13665 :         BATiter bi = bat_iterator(b);
     730       13664 :         unsigned int tpe = ATOMbasetype(bi.type);
     731       13664 :         bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
     732             : 
     733       13664 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     734             : 
     735       13665 :         assert(strcmp(ext, "thash") != 0 || !hascand);
     736       13665 :         assert(bi.type != TYPE_msk);
     737             : 
     738       27147 :         MT_thread_setalgorithm(hascand ? "create hash with candidates" : "create hash");
     739       13672 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     740       13672 :         TRC_DEBUG(ACCELERATOR,
     741             :                   ALGOBATFMT ": create hash;\n", ALGOBATPAR(b));
     742       13652 :         if (bi.type == TYPE_void) {
     743           0 :                 if (is_oid_nil(b->tseqbase)) {
     744           0 :                         TRC_DEBUG(ACCELERATOR,
     745             :                                   "cannot create hash-table on void-NIL column.\n");
     746           0 :                         GDKerror("no hash on void/nil column\n");
     747           0 :                         bat_iterator_end(&bi);
     748           0 :                         return NULL;
     749             :                 }
     750           0 :                 TRC_DEBUG(ACCELERATOR,
     751             :                           "creating hash-table on void column..\n");
     752           0 :                 assert(0);
     753             :                 tpe = TYPE_void;
     754             :         }
     755             : 
     756       13652 :         if ((h = GDKzalloc(sizeof(*h))) == NULL ||
     757       13666 :             (h->heaplink.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0 ||
     758       13658 :             (h->heapbckt.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0) {
     759           0 :                 GDKfree(h);
     760           0 :                 bat_iterator_end(&bi);
     761           0 :                 return NULL;
     762             :         }
     763       13656 :         h->width = HASHwidth(BATcapacity(b));
     764       13656 :         h->heaplink.dirty = true;
     765       13656 :         h->heapbckt.dirty = true;
     766       13656 :         strconcat_len(h->heaplink.filename, sizeof(h->heaplink.filename),
     767             :                       nme, ".", ext, "l", NULL);
     768       13666 :         strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
     769             :                       nme, ".", ext, "b", NULL);
     770       13644 :         h->heapbckt.parentid = b->batCacheid;
     771       13644 :         h->heaplink.parentid = b->batCacheid;
     772       13644 :         if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
     773       13644 :                       h->width) != GDK_SUCCEED) {
     774           0 :                 GDKfree(h);
     775           0 :                 bat_iterator_end(&bi);
     776           0 :                 return NULL;
     777             :         }
     778       13661 :         h->heaplink.free = ci->ncand * h->width;
     779       13661 :         h->Link = h->heaplink.base;
     780             : #ifndef NDEBUG
     781             :         /* clear unused part of Link array */
     782       13661 :         if (h->heaplink.size > h->heaplink.free)
     783       12410 :                 memset(h->heaplink.base + h->heaplink.free, 0,
     784             :                        h->heaplink.size - h->heaplink.free);
     785             : #endif
     786             : 
     787             :         /* determine hash mask size */
     788       13661 :         cnt1 = 0;
     789       13661 :         if (ATOMsize(tpe) == 1) {
     790             :                 /* perfect hash for one-byte sized atoms */
     791             :                 mask = (1 << 8);
     792       13643 :         } else if (ATOMsize(tpe) == 2) {
     793             :                 /* perfect hash for two-byte sized atoms */
     794             :                 mask = (1 << 16);
     795       13589 :         } else if (bi.key || ci->ncand <= 4096) {
     796             :                 /* if key, or if small, don't bother dynamically
     797             :                  * adjusting the hash mask */
     798       13437 :                 mask = HASHmask(ci->ncand);
     799         152 :         } else if (!hascand && bi.unique_est != 0) {
     800         152 :                 mask = (BUN) (bi.unique_est * 1.15); /* about 8/7 */
     801             :         } else {
     802             :                 /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
     803             :                  * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
     804             :                  * are too many collisions we try HASHmask(ci->ncand)/64,
     805             :                  * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
     806             :                  * HASHmask(ci->ncand), but we might skip some of these if
     807             :                  * there are many distinct values.  */
     808           0 :                 maxmask = HASHmask(ci->ncand);
     809           0 :                 mask = maxmask >> 6;
     810           0 :                 while (mask > 4096)
     811           0 :                         mask >>= 2;
     812             :                 /* try out on first 25% of b */
     813           0 :                 cnt1 = ci->ncand >> 2;
     814             :         }
     815             : 
     816       13661 :         o = canditer_next(ci);  /* always one ahead */
     817           0 :         for (;;) {
     818       13644 :                 lng t1 = 0;
     819       13644 :                 TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
     820       13644 :                 BUN maxslots = (mask >> 3) - 1;   /* 1/8 full is too full */
     821             : 
     822       13644 :                 h->nheads = 0;
     823       13644 :                 h->nunique = 0;
     824       13644 :                 p = 0;
     825       13644 :                 HEAPfree(&h->heapbckt, true);
     826             :                 /* create the hash structures */
     827       27285 :                 if (HASHnew(h, ATOMtype(bi.type), BATcapacity(b),
     828             :                             mask, ci->ncand, true) != GDK_SUCCEED) {
     829           0 :                         HEAPfree(&h->heaplink, true);
     830           0 :                         GDKfree(h);
     831           0 :                         bat_iterator_end(&bi);
     832           0 :                         return NULL;
     833             :                 }
     834             : 
     835       13612 :                 switch (tpe) {
     836          18 :                 case TYPE_bte:
     837          36 :                         starthash(bte);
     838             :                         break;
     839          54 :                 case TYPE_sht:
     840         108 :                         starthash(sht);
     841             :                         break;
     842          11 :                 case TYPE_flt:
     843          22 :                         starthash(flt);
     844             :                         break;
     845       11995 :                 case TYPE_int:
     846       23983 :                         starthash(int);
     847             :                         break;
     848           4 :                 case TYPE_dbl:
     849           8 :                         starthash(dbl);
     850             :                         break;
     851         750 :                 case TYPE_lng:
     852        1500 :                         starthash(lng);
     853             :                         break;
     854             : #ifdef HAVE_HGE
     855           2 :                 case TYPE_hge:
     856           4 :                         starthash(hge);
     857             :                         break;
     858             : #endif
     859           1 :                 case TYPE_uuid:
     860           2 :                         starthash(uuid);
     861             :                         break;
     862         777 :                 default: {
     863         777 :                         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
     864        1554 :                         TIMEOUT_LOOP(p, qry_ctx) {
     865           0 :                                 const void *restrict v = BUNtail(bi, o - b->hseqbase);
     866           0 :                                 c = hash_any(h, v);
     867           0 :                                 hget = HASHget(h, c);
     868           0 :                                 if (hget == BUN_NONE) {
     869           0 :                                         if (h->nheads == maxslots)
     870           0 :                                                 TIMEOUT_LOOP_BREAK; /* mask too full */
     871           0 :                                         h->nheads++;
     872           0 :                                         h->nunique++;
     873             :                                 } else {
     874             :                                         for (hb = hget;
     875           0 :                                              hb != BUN_NONE;
     876           0 :                                              hb = HASHgetlink(h, hb)) {
     877           0 :                                                 if (atomcmp(v,
     878           0 :                                                             BUNtail(bi, hb)) == 0)
     879             :                                                         break;
     880             :                                         }
     881           0 :                                         h->nunique += hb == BUN_NONE;
     882             :                                 }
     883           0 :                                 HASHputlink(h, p, hget);
     884           0 :                                 HASHput(h, c, p);
     885           0 :                                 o = canditer_next(ci);
     886             :                         }
     887         777 :                         TIMEOUT_CHECK(qry_ctx,
     888             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     889             :                         break;
     890             :                 }
     891             :                 }
     892       13615 :                 TRC_DEBUG_IF(ACCELERATOR) if (p < cnt1)
     893           0 :                         TRC_DEBUG_ENDIF(ACCELERATOR,
     894             :                                         "%s: abort starthash with "
     895             :                                 "mask " BUNFMT " at " BUNFMT " after " LLFMT " usec\n", BATgetId(b), mask, p, GDKusec() - t1);
     896       13615 :                 if (p == cnt1 || mask == maxmask)
     897             :                         break;
     898          18 :                 mask <<= 2;
     899             :                 /* if we fill up the slots fast (p <= maxslots * 1.2)
     900             :                  * increase mask size a bit more quickly */
     901          18 :                 if (p == h->nunique) {
     902             :                         /* only unique values so far: grow really fast */
     903             :                         mask = maxmask;
     904             :                         cnt1 = 0;
     905           0 :                 } else if (mask < maxmask && p <= maxslots * 1.2)
     906           0 :                         mask <<= 2;
     907          18 :                 canditer_reset(ci);
     908           0 :                 o = canditer_next(ci);
     909             :         }
     910             : 
     911             :         /* finish the hashtable with the current mask */
     912       13597 :         switch (tpe) {
     913          18 :         case TYPE_bte:
     914         391 :                 finishhash(bte);
     915             :                 break;
     916          54 :         case TYPE_sht:
     917         558 :                 finishhash(sht);
     918             :                 break;
     919       11980 :         case TYPE_int:
     920   108799391 :                 finishhash(int);
     921             :                 break;
     922          11 :         case TYPE_flt:
     923         454 :                 finishhash(flt);
     924             :                 break;
     925           4 :         case TYPE_dbl:
     926        2082 :                 finishhash(dbl);
     927             :                 break;
     928         751 :         case TYPE_lng:
     929    16785585 :                 finishhash(lng);
     930             :                 break;
     931             : #ifdef HAVE_HGE
     932           2 :         case TYPE_hge:
     933          14 :                 finishhash(hge);
     934             :                 break;
     935             : #endif
     936           1 :         case TYPE_uuid:
     937           2 :                 finishhash(uuid);
     938             :                 break;
     939         776 :         default: {
     940         776 :                 int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
     941      325256 :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {
     942      323697 :                         const void *restrict v = BUNtail(bi, o - b->hseqbase);
     943      323697 :                         c = hash_any(h, v);
     944      323675 :                         hget = HASHget(h, c);
     945      323675 :                         h->nheads += hget == BUN_NONE;
     946      323675 :                         if (!hascand) {
     947             :                                 for (hb = hget;
     948      418896 :                                      hb != BUN_NONE;
     949       95180 :                                      hb = HASHgetlink(h, hb)) {
     950      261476 :                                         if (atomcmp(v, BUNtail(bi, hb)) == 0)
     951             :                                                 break;
     952             :                                 }
     953      323656 :                                 h->nunique += hb == BUN_NONE;
     954             :                         }
     955      323615 :                         HASHputlink(h, p, hget);
     956      323652 :                         HASHput(h, c, p);
     957      323699 :                         o = canditer_next(ci);
     958      323698 :                         p++;
     959             :                 }
     960         777 :                 TIMEOUT_CHECK(qry_ctx,
     961             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     962             :                 break;
     963             :         }
     964             :         }
     965       13658 :         bat_iterator_end(&bi);
     966             :         /* if the number of unique values is equal to the bat count,
     967             :          * all values are necessarily distinct */
     968       13665 :         MT_lock_set(&b->theaplock);
     969       13666 :         if (h->nunique == BATcount(b) && !b->tkey) {
     970        9024 :                 b->tkey = true;
     971             :         }
     972       13666 :         if (ci->ncand == BATcount(b))
     973       13487 :                 b->tunique_est = (double) h->nunique;
     974       13666 :         MT_lock_unset(&b->theaplock);
     975       13672 :         TRC_DEBUG_IF(ACCELERATOR) {
     976           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR,
     977             :                                 "hash construction " LLFMT " usec\n", GDKusec() - t0);
     978           0 :                 HASHcollisions(b, h, __func__);
     979             :         }
     980             :         return h;
     981             : 
     982           0 :   bailout:
     983           0 :         bat_iterator_end(&bi);
     984           0 :         HEAPfree(&h->heaplink, true);
     985           0 :         HEAPfree(&h->heapbckt, true);
     986           0 :         GDKfree(h);
     987           0 :         return NULL;
     988             : }
     989             : 
     990             : gdk_return
     991     2719525 : BAThash(BAT *b)
     992             : {
     993     2719525 :         if (b->ttype == TYPE_void) {
     994           0 :                 GDKerror("No hash on void type bats\n");
     995           0 :                 return GDK_FAIL;
     996             :         }
     997     2719525 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
     998           0 :                 GDKerror("No hash on msk type bats\n");
     999           0 :                 return GDK_FAIL;
    1000             :         }
    1001     2719525 :         if (BATcheckhash(b)) {
    1002             :                 return GDK_SUCCEED;
    1003             :         }
    1004             : #ifdef __COVERITY__
    1005             :         MT_rwlock_wrlock(&b->thashlock);
    1006             : #else
    1007       13496 :         for (;;) {
    1008             :                 /* If multiple threads simultaneously try to build a
    1009             :                  * hash on a bat, e.g. in order to perform a join, it
    1010             :                  * may happen that one thread succeeds in obtaining the
    1011             :                  * write lock, then builds the hash, releases the lock,
    1012             :                  * acquires the read lock, and performs the join.  The
    1013             :                  * other threads may then still be attempting to acquire
    1014             :                  * the write lock.  But now they have to wait until the
    1015             :                  * read lock is released, which can be quite a long
    1016             :                  * time.  Especially if a second thread goes through the
    1017             :                  * same process as the first. */
    1018       13496 :                 if (MT_rwlock_wrtry(&b->thashlock))
    1019             :                         break;
    1020          13 :                 MT_sleep_ms(1);
    1021          13 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1022          11 :                         if (b->thash != NULL && b->thash != (Hash *) 1) {
    1023          10 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1024          10 :                                 return GDK_SUCCEED;
    1025             :                         }
    1026           1 :                         MT_rwlock_rdunlock(&b->thashlock);
    1027             :                 }
    1028             :         }
    1029             : #endif
    1030             :         /* we have the write lock */
    1031       13488 :         if (b->thash == NULL) {
    1032       13481 :                 struct canditer ci;
    1033       13481 :                 canditer_init(&ci, b, NULL);
    1034       13490 :                 if ((b->thash = BAThash_impl(b, &ci, "thash")) == NULL) {
    1035           0 :                         MT_rwlock_wrunlock(&b->thashlock);
    1036           0 :                         return GDK_FAIL;
    1037             :                 }
    1038             :         }
    1039       13495 :         MT_rwlock_wrunlock(&b->thashlock);
    1040       13495 :         return GDK_SUCCEED;
    1041             : }
    1042             : 
    1043             : /*
    1044             :  * The entry on which a value hashes can be calculated with the
    1045             :  * routine HASHprobe.
    1046             :  */
    1047             : BUN
    1048   541534689 : HASHprobe(const Hash *h, const void *v)
    1049             : {
    1050   952069474 :         switch (ATOMbasetype(h->type)) {
    1051        3716 :         case TYPE_bte:
    1052        3716 :                 return hash_bte(h, v);
    1053      127321 :         case TYPE_sht:
    1054      127321 :                 return hash_sht(h, v);
    1055   391867940 :         case TYPE_int:
    1056   391867940 :                 return hash_int(h, v);
    1057   131261893 :         case TYPE_lng:
    1058   131261893 :                 return hash_lng(h, v);
    1059             : #ifdef HAVE_HGE
    1060       26926 :         case TYPE_hge:
    1061       26926 :                 return hash_hge(h, v);
    1062             : #endif
    1063      150591 :         case TYPE_flt:
    1064      150591 :                 return hash_flt(h, v);
    1065       35975 :         case TYPE_dbl:
    1066       35975 :                 return hash_dbl(h, v);
    1067        2104 :         case TYPE_uuid:
    1068        2104 :                 return hash_uuid(h, v);
    1069    18058223 :         default:
    1070    18058223 :                 return hash_any(h, v);
    1071             :         }
    1072             : }
    1073             : 
    1074             : void
    1075      494292 : HASHappend_locked(BAT *b, BUN i, const void *v)
    1076             : {
    1077      494292 :         Hash *h = b->thash;
    1078      494292 :         if (h == NULL) {
    1079           0 :                 return;
    1080             :         }
    1081      494292 :         if (h == (Hash *) 1) {
    1082           0 :                 b->thash = NULL;
    1083           0 :                 doHASHdestroy(b, h);
    1084           0 :                 GDKclrerr();
    1085           0 :                 return;
    1086             :         }
    1087      494292 :         assert(i * h->width == h->heaplink.free);
    1088      494292 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1089           0 :                 b->thash = NULL;
    1090           0 :                 doHASHdestroy(b, h);
    1091           0 :                 GDKclrerr();
    1092           0 :                 return;
    1093             :         }
    1094      494292 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1095           0 :                 b->thash = NULL;
    1096           0 :                 doHASHdestroy(b, h);
    1097           0 :                 GDKclrerr();
    1098           0 :                 return;
    1099             :         }
    1100      494292 :         if (HASHwidth(i + 1) > h->width &&
    1101           0 :              HASHupgradehashheap(b) != GDK_SUCCEED) {
    1102           0 :                 GDKclrerr();
    1103           0 :                 return;
    1104             :         }
    1105      988581 :         if ((ATOMsize(b->ttype) > 2 &&
    1106      494289 :              HASHgrowbucket(b) != GDK_SUCCEED) ||
    1107      495555 :             ((i + 1) * h->width > h->heaplink.size &&
    1108        1263 :              HEAPextend(&h->heaplink,
    1109        1263 :                         i * h->width + GDK_mmap_pagesize,
    1110             :                         true) != GDK_SUCCEED)) {
    1111           0 :                 b->thash = NULL;
    1112           0 :                 HEAPfree(&h->heapbckt, true);
    1113           0 :                 HEAPfree(&h->heaplink, true);
    1114           0 :                 GDKfree(h);
    1115           0 :                 GDKclrerr();
    1116           0 :                 return;
    1117             :         }
    1118      494292 :         h->Link = h->heaplink.base;
    1119      494292 :         BUN c = HASHprobe(h, v);
    1120      494292 :         h->heaplink.free += h->width;
    1121      494292 :         BUN hb = HASHget(h, c);
    1122      494292 :         BUN hb2;
    1123      494292 :         BATiter bi = bat_iterator_nolock(b);
    1124      494292 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1125      494292 :         for (hb2 = hb;
    1126      682319 :              hb2 != BUN_NONE;
    1127      188027 :              hb2 = HASHgetlink(h, hb2)) {
    1128      351927 :                 if (atomcmp(v, BUNtail(bi, hb2)) == 0)
    1129             :                         break;
    1130             :         }
    1131      494292 :         h->nheads += hb == BUN_NONE;
    1132      494292 :         h->nunique += hb2 == BUN_NONE;
    1133      494292 :         HASHputlink(h, i, hb);
    1134      494292 :         HASHput(h, c, i);
    1135      494292 :         h->heapbckt.dirty = true;
    1136      494292 :         h->heaplink.dirty = true;
    1137             : }
    1138             : 
    1139             : BUN
    1140           0 : HASHappend(BAT *b, BUN i, const void *v)
    1141             : {
    1142           0 :         BUN nunique;
    1143           0 :         MT_rwlock_wrlock(&b->thashlock);
    1144           0 :         HASHappend_locked(b, i, v);
    1145           0 :         nunique = b->thash ? b->thash->nunique : 0;
    1146           0 :         MT_rwlock_wrunlock(&b->thashlock);
    1147           0 :         return nunique;
    1148             : }
    1149             : 
    1150             : /* insert value v at position p into the hash table of b */
    1151             : void
    1152   174053272 : HASHinsert_locked(BATiter *bi, BUN p, const void *v)
    1153             : {
    1154   174053272 :         BAT *b = bi->b;
    1155   174053272 :         Hash *h = b->thash;
    1156   174053272 :         if (h == NULL) {
    1157             :                 return;
    1158             :         }
    1159       25392 :         if (h == (Hash *) 1) {
    1160           0 :                 b->thash = NULL;
    1161           0 :                 doHASHdestroy(b, h);
    1162           0 :                 GDKclrerr();
    1163           0 :                 return;
    1164             :         }
    1165       25392 :         assert(p * h->width < h->heaplink.free);
    1166       25392 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1167           0 :                 b->thash = NULL;
    1168           0 :                 doHASHdestroy(b, h);
    1169           0 :                 GDKclrerr();
    1170           0 :                 return;
    1171             :         }
    1172       25392 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1173           0 :                 b->thash = NULL;
    1174           0 :                 doHASHdestroy(b, h);
    1175           0 :                 GDKclrerr();
    1176           0 :                 return;
    1177             :         }
    1178       25392 :         BUN c = HASHprobe(h, v);
    1179       25392 :         BUN hb = HASHget(h, c);
    1180       25392 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1181       25392 :         if (hb == BUN_NONE || hb < p) {
    1182             :                 /* bucket is empty, or bucket is used by lower numbered
    1183             :                  * position */
    1184        4970 :                 h->heaplink.dirty = true;
    1185        4970 :                 h->heapbckt.dirty = true;
    1186        4970 :                 HASHputlink(h, p, hb);
    1187        4970 :                 HASHput(h, c, p);
    1188        4970 :                 if (hb == BUN_NONE) {
    1189        2324 :                         h->nheads++;
    1190             :                 } else {
    1191        3100 :                         do {
    1192        3100 :                                 if (atomcmp(v, BUNtail(*bi, hb)) == 0) {
    1193             :                                         /* found another row with the
    1194             :                                          * same value, so don't
    1195             :                                          * increment nunique */
    1196             :                                         return;
    1197             :                                 }
    1198        1292 :                                 hb = HASHgetlink(h, hb);
    1199        1292 :                         } while (hb != BUN_NONE);
    1200             :                 }
    1201             :                 /* this is a new value */
    1202        3162 :                 h->nunique++;
    1203        3162 :                 return;
    1204             :         }
    1205             :         bool seen = false;
    1206      196077 :         for (;;) {
    1207      196077 :                 if (!seen)
    1208       20953 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1209      196077 :                 BUN hb2 = HASHgetlink(h, hb);
    1210      196077 :                 if (hb2 == BUN_NONE || hb2 < p) {
    1211       20422 :                         h->heaplink.dirty = true;
    1212       20422 :                         HASHputlink(h, p, hb2);
    1213       20422 :                         HASHputlink(h, hb, p);
    1214       20702 :                         while (!seen && hb2 != BUN_NONE) {
    1215         280 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1216         280 :                                 hb2 = HASHgetlink(h, hb2);
    1217             :                         }
    1218       20422 :                         if (!seen)
    1219         364 :                                 h->nunique++;
    1220       20422 :                         return;
    1221             :                 }
    1222             :                 hb = hb2;
    1223             :         }
    1224             : }
    1225             : 
    1226             : BUN
    1227           2 : HASHinsert(BATiter *bi, BUN p, const void *v)
    1228             : {
    1229           2 :         BUN nunique;
    1230           2 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1231           2 :         HASHinsert_locked(bi, p, v);
    1232           2 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1233           2 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1234           2 :         return nunique;
    1235             : }
    1236             : 
    1237             : /* delete value v at position p from the hash table of b */
    1238             : void
    1239   168815198 : HASHdelete_locked(BATiter *bi, BUN p, const void *v)
    1240             : {
    1241   168815198 :         BAT *b = bi->b;
    1242   168815198 :         Hash *h = b->thash;
    1243   168815198 :         if (h == NULL) {
    1244             :                 return;
    1245             :         }
    1246       25392 :         if (h == (Hash *) 1) {
    1247           0 :                 b->thash = NULL;
    1248           0 :                 doHASHdestroy(b, h);
    1249           0 :                 GDKclrerr();
    1250           0 :                 return;
    1251             :         }
    1252       25392 :         assert(p * h->width < h->heaplink.free);
    1253       25392 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1254           0 :                 b->thash = NULL;
    1255           0 :                 doHASHdestroy(b, h);
    1256           0 :                 GDKclrerr();
    1257           0 :                 return;
    1258             :         }
    1259       25392 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1260           0 :                 b->thash = NULL;
    1261           0 :                 doHASHdestroy(b, h);
    1262           0 :                 GDKclrerr();
    1263           0 :                 return;
    1264             :         }
    1265       25392 :         BUN c = HASHprobe(h, v);
    1266       25392 :         BUN hb = HASHget(h, c);
    1267       25392 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1268       25392 :         if (hb == p) {
    1269        3434 :                 BUN hb2 = HASHgetlink(h, p);
    1270        3434 :                 h->heaplink.dirty = true;
    1271        3434 :                 h->heapbckt.dirty = true;
    1272        3434 :                 HASHput(h, c, hb2);
    1273        3434 :                 HASHputlink(h, p, BUN_NONE);
    1274        3434 :                 if (hb2 == BUN_NONE) {
    1275        1593 :                         h->nheads--;
    1276             :                 } else {
    1277        2311 :                         do {
    1278        2311 :                                 if (atomcmp(v, BUNtail(*bi, hb2)) == 0) {
    1279             :                                         /* found another row with the
    1280             :                                          * same value, so don't
    1281             :                                          * decrement nunique below */
    1282             :                                         return;
    1283             :                                 }
    1284        1331 :                                 hb2 = HASHgetlink(h, hb2);
    1285        1331 :                         } while (hb2 != BUN_NONE);
    1286             :                 }
    1287             :                 /* no rows found with the same value, so number of
    1288             :                  * unique values is one lower */
    1289        2454 :                 h->nunique--;
    1290        2454 :                 return;
    1291             :         }
    1292             :         bool seen = false;
    1293             :         BUN links = 0;
    1294      185026 :         for (;;) {
    1295      185026 :                 if (!seen)
    1296       22204 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1297      185026 :                 BUN hb2 = HASHgetlink(h, hb);
    1298      185026 :                 assert(hb2 != BUN_NONE );
    1299      185026 :                 assert(hb2 < hb);
    1300      185026 :                 if (hb2 == p) {
    1301       21958 :                         for (hb2 = HASHgetlink(h, hb2);
    1302       22017 :                              !seen && hb2 != BUN_NONE;
    1303          59 :                              hb2 = HASHgetlink(h, hb2))
    1304          59 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1305       21958 :                         break;
    1306             :                 }
    1307      163068 :                 hb = hb2;
    1308      163068 :                 if (++links > hash_destroy_chain_length) {
    1309           0 :                         b->thash = NULL;
    1310           0 :                         doHASHdestroy(b, h);
    1311           0 :                         GDKclrerr();
    1312           0 :                         return;
    1313             :                 }
    1314             :         }
    1315       21958 :         h->heaplink.dirty = true;
    1316       21958 :         HASHputlink(h, hb, HASHgetlink(h, p));
    1317       21958 :         HASHputlink(h, p, BUN_NONE);
    1318       21958 :         if (!seen)
    1319         163 :                 h->nunique--;
    1320             : }
    1321             : 
    1322             : BUN
    1323           6 : HASHdelete(BATiter *bi, BUN p, const void *v)
    1324             : {
    1325           6 :         BUN nunique;
    1326           6 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1327           6 :         HASHdelete_locked(bi, p, v);
    1328           6 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1329           6 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1330           6 :         return nunique;
    1331             : }
    1332             : 
    1333             : BUN
    1334           0 : HASHlist(Hash *h, BUN i)
    1335             : {
    1336           0 :         BUN c = 1;
    1337           0 :         BUN j = HASHget(h, i);
    1338             : 
    1339           0 :         if (j == BUN_NONE)
    1340             :                 return 1;
    1341           0 :         while ((j = HASHgetlink(h, i)) != BUN_NONE) {
    1342           0 :                 c++;
    1343           0 :                 i = j;
    1344             :         }
    1345             :         return c;
    1346             : }
    1347             : 
    1348             : void
    1349    23894215 : HASHdestroy(BAT *b)
    1350             : {
    1351    23894215 :         if (b) {
    1352    23894215 :                 Hash *hs;
    1353    23894215 :                 MT_rwlock_wrlock(&b->thashlock);
    1354    23983677 :                 hs = b->thash;
    1355    23983677 :                 b->thash = NULL;
    1356    23983677 :                 MT_rwlock_wrunlock(&b->thashlock);
    1357    23990017 :                 doHASHdestroy(b, hs);
    1358             :         }
    1359    23869737 : }
    1360             : 
    1361             : void
    1362      575666 : HASHfree(BAT *b)
    1363             : {
    1364      575666 :         if (b) {
    1365      575666 :                 Hash *h;
    1366      575666 :                 MT_rwlock_wrlock(&b->thashlock);
    1367      575665 :                 if ((h = b->thash) != NULL && h != (Hash *) 1) {
    1368        1987 :                         bool rmheap = h->heaplink.dirty || h->heapbckt.dirty;
    1369        1987 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT " free hash %s\n",
    1370             :                                   ALGOBATPAR(b),
    1371             :                                   rmheap ? "removing" : "keeping");
    1372             : 
    1373        1987 :                         b->thash = rmheap ? NULL : (Hash *) 1;
    1374        1987 :                         HEAPfree(&h->heapbckt, rmheap);
    1375        1987 :                         HEAPfree(&h->heaplink, rmheap);
    1376        1987 :                         GDKfree(h);
    1377             :                 }
    1378      575665 :                 MT_rwlock_wrunlock(&b->thashlock);
    1379             :         }
    1380      575666 : }

Generated by: LCOV version 1.14