LCOV - code coverage report
Current view: top level - gdk - gdk_hash.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 548 780 70.3 %
Date: 2025-03-25 20:06:35 Functions: 19 24 79.2 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * - Hash Table Creation
      15             :  * The hash indexing scheme for BATs reserves a block of memory to
      16             :  * maintain the hash table and a collision list. A one-to-one mapping
      17             :  * exists between the BAT and the collision list using the BUN
      18             :  * index. NOTE: we alloc the link list as a parallel array to the BUN
      19             :  * array; hence the hash link array has the same size as
      20             :  * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
      21             :  * and delete to assume that there is hash space if there is BUN
      22             :  * space.
      23             :  *
      24             :  * The hash mask size is a power of two, so we can do bitwise AND on
      25             :  * the hash (integer) number to quickly find the head of the bucket
      26             :  * chain.  Clearly, the hash mask size is a crucial parameter. If we
      27             :  * know that the column is unique (tkey), we use direct hashing (mask
      28             :  * size ~= BATcount). Otherwise we dynamically determine the mask size
      29             :  * by starting out with mask size = BATcount/64 (just 1.5% of memory
      30             :  * storage overhead). Then we start building the hash table on the
      31             :  * first 25% of the BAT. As we aim for max-collisions list length of
      32             :  * 4, the list on 25% should not exceed length 1. So, if a small
      33             :  * number of collisssions occurs (mask/2) then we abandon the attempt
      34             :  * and restart with a mask that is 4 times larger. This converges
      35             :  * after three cycles to direct hashing.
      36             :  */
      37             : 
      38             : #include "monetdb_config.h"
      39             : #include "gdk.h"
      40             : #include "gdk_private.h"
      41             : 
      42             : __attribute__((__const__))
      43             : static inline uint8_t
      44     1365016 : HASHwidth(BUN hashsize)
      45             : {
      46     1365016 :         (void) hashsize;
      47             : #ifdef BUN2
      48     1365016 :         if (hashsize <= (BUN) BUN2_NONE)
      49             :                 return BUN2;
      50             : #endif
      51             : #ifdef BUN8
      52         593 :         if (hashsize > (BUN) BUN4_NONE)
      53           0 :                 return BUN8;
      54             : #endif
      55             :         return BUN4;
      56             : }
      57             : 
      58             : __attribute__((__const__))
      59             : static inline BUN
      60       10051 : hashmask(BUN m)
      61             : {
      62       10051 :         m |= m >> 1;
      63       10051 :         m |= m >> 2;
      64       10051 :         m |= m >> 4;
      65       10051 :         m |= m >> 8;
      66       10051 :         m |= m >> 16;
      67             : #if SIZEOF_BUN == 8
      68       10051 :         m |= m >> 32;
      69             : #endif
      70       10051 :         return m;
      71             : }
      72             : 
      73             : static inline void
      74      869656 : HASHclear(Hash *h)
      75             : {
      76             :         /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
      77             :          * are all equal to ~0, i.e., have all bits set,
      78             :          * we can use a simple memset() to clear the Hash,
      79             :          * rather than iteratively assigning individual
      80             :          * BUNi_NONE values in a for-loop
      81             :          */
      82      869656 :         memset(h->Bckt, 0xFF, h->nbucket * h->width);
      83      869656 : }
      84             : 
      85             : #define HASH_VERSION            6
      86             : #define HASH_HEADER_SIZE        7       /* nr of size_t fields in header */
      87             : 
      88             : void
      89    20124357 : doHASHdestroy(BAT *b, Hash *hs)
      90             : {
      91    20124357 :         if (hs == (Hash *) 1) {
      92          54 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
      93             :                           BATDIR,
      94          54 :                           BBP_physical(b->batCacheid),
      95             :                           "thashl");
      96          54 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
      97             :                           BATDIR,
      98          54 :                           BBP_physical(b->batCacheid),
      99             :                           "thashb");
     100    20124303 :         } else if (hs) {
     101       12389 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": removing%s hash\n", ALGOBATPAR(b), *(size_t *) hs->heapbckt.base & (1 << 24) ? " persisted" : "");
     102       12389 :                 HEAPfree(&hs->heapbckt, true);
     103       12389 :                 HEAPfree(&hs->heaplink, true);
     104       12389 :                 GDKfree(hs);
     105             :         }
     106    20124357 : }
     107             : 
     108             : gdk_return
     109      869656 : HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
     110             : {
     111      869656 :         if (h->width == 0)
     112      855962 :                 h->width = HASHwidth(size);
     113             : 
     114      869656 :         if (!bcktonly) {
     115      855425 :                 if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED)
     116             :                         return GDK_FAIL;
     117      855423 :                 h->heaplink.free = size * h->width;
     118      855423 :                 h->heaplink.dirty = true;
     119      855423 :                 h->Link = h->heaplink.base;
     120             :         }
     121      869654 :         if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width) != GDK_SUCCEED) {
     122           0 :                 if (!bcktonly) {
     123           0 :                         HEAPfree(&h->heaplink, true);
     124           0 :                         h->heaplink.free = 0;
     125           0 :                         h->Link = NULL;
     126             :                 }
     127           0 :                 return GDK_FAIL;
     128             :         }
     129      869656 :         h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     130      869656 :         h->heapbckt.dirty = true;
     131      869656 :         h->nbucket = mask;
     132      869656 :         if (mask & (mask - 1)) {
     133        9842 :                 h->mask2 = hashmask(mask);
     134        9842 :                 h->mask1 = h->mask2 >> 1;
     135             :         } else {
     136             :                 /* mask is a power of two */
     137      859814 :                 h->mask1 = mask - 1;
     138      859814 :                 h->mask2 = h->mask1 << 1 | 1;
     139             :         }
     140      869656 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     141      869656 :         h->type = tpe;
     142      869656 :         HASHclear(h);           /* zero the mask */
     143      869655 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     144      869655 :         ((size_t *) h->heapbckt.base)[1] = (size_t) size;
     145      869655 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     146      869655 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     147      869655 :         ((size_t *) h->heapbckt.base)[4] = (size_t) count;
     148      869655 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     149      869655 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     150      869655 :         TRC_DEBUG(ACCELERATOR,
     151             :                   "create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width);
     152             :         return GDK_SUCCEED;
     153             : }
     154             : 
     155             : /* collect HASH statistics for analysis */
     156             : static void
     157           0 : HASHcollisions(BAT *b, Hash *h, const char *func)
     158             : {
     159           0 :         lng cnt, entries = 0, max = 0;
     160           0 :         double total = 0;
     161           0 :         BUN p, i, j;
     162             : 
     163           0 :         if (b == 0 || h == 0)
     164             :                 return;
     165           0 :         for (i = 0, j = h->nbucket; i < j; i++)
     166           0 :                 if ((p = HASHget(h, i)) != BUN_NONE) {
     167           0 :                         entries++;
     168           0 :                         cnt = 0;
     169           0 :                         for (; p != BUN_NONE; p = HASHgetlink(h, p))
     170           0 :                                 cnt++;
     171           0 :                         if (cnt > max)
     172             :                                 max = cnt;
     173           0 :                         total += cnt;
     174             :                 }
     175           0 :         TRC_DEBUG_ENDIF(ACCELERATOR,
     176             :                         "%s(" ALGOBATFMT "): statistics " BUNFMT ", "
     177             :                         "entries " LLFMT ", nunique " BUNFMT ", "
     178             :                         "nbucket " BUNFMT ", max " LLFMT ", avg %2.6f;\n",
     179             :                         func, ALGOBATPAR(b), BATcount(b), entries,
     180             :                         h->nunique, h->nbucket, max,
     181             :                         entries == 0 ? 0 : total / entries);
     182             : }
     183             : 
     184             : static gdk_return
     185           0 : HASHupgradehashheap(BAT *b)
     186             : {
     187             : #if defined(BUN2) || defined(BUN8)
     188           0 :         Hash *h = b->thash;
     189           0 :         int nwidth = h->width << 1;
     190           0 :         BUN i;
     191             : 
     192           0 :         assert(nwidth <= SIZEOF_BUN);
     193           0 :         assert((nwidth & (nwidth - 1)) == 0);
     194             : 
     195           0 :         if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED ||
     196           0 :             HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) {
     197           0 :                 b->thash = NULL;
     198           0 :                 doHASHdestroy(b, h);
     199           0 :                 return GDK_FAIL;
     200             :         }
     201           0 :         h->Link = h->heaplink.base;
     202           0 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     203           0 :         switch (nwidth) {
     204           0 :         case BUN4:
     205             : #ifdef BUN2
     206           0 :                 switch (h->width) {
     207           0 :                 case BUN2:
     208           0 :                         i = h->heaplink.free / h->width;
     209           0 :                         h->heaplink.free = i * nwidth;
     210           0 :                         while (i > 0) {
     211           0 :                                 i--;
     212           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     213           0 :                                 ((BUN4type *) h->Link)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     214             :                         }
     215           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     216           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     217           0 :                         while (i > 0) {
     218           0 :                                 i--;
     219           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     220           0 :                                 ((BUN4type *) h->Bckt)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     221             :                         }
     222           0 :                         h->heapbckt.dirty = true;
     223           0 :                         h->heaplink.dirty = true;
     224           0 :                         break;
     225             :                 }
     226             : #endif
     227             :                 break;
     228             : #ifdef BUN8
     229           0 :         case BUN8:
     230           0 :                 switch (h->width) {
     231             : #ifdef BUN2
     232           0 :                 case BUN2:
     233           0 :                         i = h->heaplink.free / h->width;
     234           0 :                         h->heaplink.free = i * nwidth;
     235           0 :                         while (i > 0) {
     236           0 :                                 i--;
     237           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     238           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     239             :                         }
     240           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     241           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     242           0 :                         while (i > 0) {
     243           0 :                                 i--;
     244           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     245           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     246             :                         }
     247           0 :                         h->heapbckt.dirty = true;
     248           0 :                         h->heaplink.dirty = true;
     249           0 :                         break;
     250             : #endif
     251           0 :                 case BUN4:
     252           0 :                         i = h->heaplink.free / h->width;
     253           0 :                         h->heaplink.free = i * nwidth;
     254           0 :                         while (i > 0) {
     255           0 :                                 i--;
     256           0 :                                 BUN4type v = ((BUN4type *) h->Link)[i];
     257           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     258             :                         }
     259           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     260           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     261           0 :                         while (i > 0) {
     262           0 :                                 i--;
     263           0 :                                 BUN4type v = ((BUN4type *) h->Bckt)[i];
     264           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     265             :                         }
     266           0 :                         h->heapbckt.dirty = true;
     267           0 :                         h->heaplink.dirty = true;
     268           0 :                         break;
     269             :                 }
     270             :                 break;
     271             : #endif
     272             :         }
     273           0 :         h->width = nwidth;
     274             : #else
     275             :         (void) b;
     276             : #endif
     277           0 :         return GDK_SUCCEED;
     278             : }
     279             : 
     280             : /* write/remove the bit into/from the hash file that indicates the hash
     281             :  * is good to go; the bit is the last part to be written and the first
     282             :  * to be removed */
     283             : static inline gdk_return
     284      559988 : HASHfix(Hash *h, bool save, bool dosync)
     285             : {
     286      559988 :         if (!h->heapbckt.dirty && !h->heaplink.dirty) {
     287       24614 :                 const size_t mask = (size_t) 1 << 24;
     288       24614 :                 if (((size_t *) h->heapbckt.base)[0] & mask) {
     289       10822 :                         if (save)
     290             :                                 return GDK_SUCCEED;
     291       10822 :                         ((size_t *) h->heapbckt.base)[0] &= ~mask;
     292             :                 } else {
     293       13792 :                         if (!save)
     294             :                                 return GDK_SUCCEED;
     295       13792 :                         ((size_t *) h->heapbckt.base)[0] |= mask;
     296             :                 }
     297       24614 :                 if (h->heapbckt.storage == STORE_MEM) {
     298       24587 :                         gdk_return rc = GDK_FAIL;
     299       24587 :                         int fd = GDKfdlocate(h->heapbckt.farmid, h->heapbckt.filename, "rb+", NULL);
     300       24587 :                         if (fd >= 0) {
     301       24587 :                                 if (write(fd, h->heapbckt.base, SIZEOF_SIZE_T) == SIZEOF_SIZE_T) {
     302       24587 :                                         if (dosync &&
     303       24587 :                                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
     304             : #if defined(NATIVE_WIN32)
     305             :                                                 _commit(fd);
     306             : #elif defined(HAVE_FDATASYNC)
     307          13 :                                                 fdatasync(fd);
     308             : #elif defined(HAVE_FSYNC)
     309             :                                                 fsync(fd);
     310             : #endif
     311             :                                         }
     312             :                                         rc = GDK_SUCCEED;
     313             :                                 }
     314       24587 :                                 close(fd);
     315             :                         }
     316       24587 :                         if (rc != GDK_SUCCEED)
     317           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     318       24587 :                         return rc;
     319             :                 } else {
     320          27 :                         if (dosync &&
     321          27 :                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK) &&
     322           0 :                             MT_msync(h->heapbckt.base, SIZEOF_SIZE_T) < 0) {
     323           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     324           0 :                                 return GDK_FAIL;
     325             :                         }
     326             :                 }
     327             :         }
     328             :         return GDK_SUCCEED;
     329             : }
     330             : 
     331             : static gdk_return
     332      495357 : HASHgrowbucket(BAT *b)
     333             : {
     334      495357 :         Hash *h = b->thash;
     335      495357 :         BUN nbucket;
     336      495357 :         BUN onbucket = h->nbucket;
     337      495357 :         lng t0 = 0;
     338             : 
     339      495357 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     340             : 
     341             :         /* only needed to fix hash tables built before this fix was
     342             :          * introduced */
     343      495357 :         if (h->width < SIZEOF_BUN &&
     344      495357 :             ((BUN) 1 << (h->width * 8)) - 1 <= h->mask2 &&
     345           0 :             HASHupgradehashheap(b) != GDK_SUCCEED)
     346             :                 return GDK_FAIL;
     347             : 
     348      753575 :         while (h->nunique >= (nbucket = h->nbucket) * 7 / 8) {
     349      258218 :                 BUN new = h->nbucket;
     350      258218 :                 BUN old = new & h->mask1;
     351      258218 :                 BUN mask = h->mask1 + 1; /* == h->mask2 - h->mask1 */
     352             : 
     353      258218 :                 assert(h->heapbckt.free == nbucket * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T);
     354      258218 :                 if (h->heapbckt.free + h->width > h->heapbckt.size) {
     355        5392 :                         if (HEAPextend(&h->heapbckt,
     356             :                                        h->heapbckt.size + GDK_mmap_pagesize,
     357             :                                        true) != GDK_SUCCEED) {
     358           0 :                                 return GDK_FAIL;
     359             :                         }
     360        5392 :                         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     361             :                 }
     362      258218 :                 assert(h->heapbckt.free + h->width <= h->heapbckt.size);
     363      258218 :                 if (h->nbucket == h->mask2) {
     364         594 :                         h->mask1 = h->mask2;
     365         594 :                         h->mask2 |= h->mask2 << 1;
     366         594 :                         if (h->width < SIZEOF_BUN &&
     367         594 :                             h->mask2 == ((BUN) 1 << (h->width * 8)) - 1) {
     368             :                                 /* time to widen the hash table */
     369           0 :                                 if (HASHupgradehashheap(b) != GDK_SUCCEED)
     370             :                                         return GDK_FAIL;
     371             :                         }
     372             :                 }
     373      258218 :                 h->nbucket++;
     374      258218 :                 h->heapbckt.free += h->width;
     375      258218 :                 BUN lold, lnew, hb;
     376      258218 :                 lold = lnew = BUN_NONE;
     377      258218 :                 BATiter bi = bat_iterator(b);
     378      258218 :                 if ((hb = HASHget(h, old)) != BUN_NONE) {
     379      205489 :                         h->nheads--;
     380      325207 :                         do {
     381      325207 :                                 const void *v = BUNtail(bi, hb);
     382      325207 :                                 BUN hsh = ATOMhash(h->type, v);
     383      325207 :                                 assert((hsh & (mask - 1)) == old);
     384      325207 :                                 if (hsh & mask) {
     385             :                                         /* move to new list */
     386       97340 :                                         if (lnew == BUN_NONE) {
     387       87545 :                                                 HASHput(h, new, hb);
     388       87545 :                                                 h->nheads++;
     389             :                                         } else
     390        9795 :                                                 HASHputlink(h, lnew, hb);
     391             :                                         lnew = hb;
     392             :                                 } else {
     393             :                                         /* move to old list */
     394      227867 :                                         if (lold == BUN_NONE) {
     395      174733 :                                                 h->nheads++;
     396      174733 :                                                 HASHput(h, old, hb);
     397             :                                         } else
     398       53134 :                                                 HASHputlink(h, lold, hb);
     399             :                                         lold = hb;
     400             :                                 }
     401      325207 :                                 hb = HASHgetlink(h, hb);
     402      325207 :                         } while (hb != BUN_NONE);
     403             :                 }
     404      258218 :                 bat_iterator_end(&bi);
     405      258218 :                 if (lnew == BUN_NONE)
     406      170673 :                         HASHput(h, new, BUN_NONE);
     407             :                 else
     408       87545 :                         HASHputlink(h, lnew, BUN_NONE);
     409      258218 :                 if (lold == BUN_NONE)
     410       83485 :                         HASHput(h, old, BUN_NONE);
     411             :                 else
     412      174733 :                         HASHputlink(h, lold, BUN_NONE);
     413             :         }
     414      495357 :         h->heapbckt.dirty = true;
     415      495357 :         h->heaplink.dirty = true;
     416      495357 :         TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
     417           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
     418             :                         " -> " BUNFMT " buckets (" LLFMT " usec)\n",
     419             :                         ALGOBATPAR(b),
     420             :                         onbucket, h->nbucket, GDKusec() - t0);
     421           0 :                 HASHcollisions(b, h, __func__);
     422             :         }
     423             :         return GDK_SUCCEED;
     424             : }
     425             : 
     426             : /* Return TRUE if we have a hash on the tail, even if we need to read
     427             :  * one from disk.
     428             :  *
     429             :  * Note that the b->thash pointer can be NULL, meaning there is no
     430             :  * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist
     431             :  * on disk; or a valid pointer to a loaded hash.  These values are
     432             :  * maintained here, in the HASHdestroy and HASHfree functions, and in
     433             :  * BBPdiskscan during initialization. */
     434             : bool
     435    63954205 : BATcheckhash(BAT *b)
     436             : {
     437    63954205 :         lng t = 0;
     438    63954205 :         Hash *h;
     439             : 
     440    63954205 :         MT_rwlock_rdlock(&b->thashlock);
     441    63970694 :         h = b->thash;
     442    63970694 :         MT_rwlock_rdunlock(&b->thashlock);
     443    63970645 :         if (h == (Hash *) 1) {
     444             :                 /* but when we want to change it, we need the lock */
     445         463 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec();
     446         463 :                 MT_rwlock_wrlock(&b->thashlock);
     447         463 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec() - t;
     448             :                 /* if still 1 now that we have the lock, we can update */
     449         463 :                 if (b->thash == (Hash *) 1) {
     450         463 :                         int fd;
     451             : 
     452         463 :                         assert(!GDKinmemory(b->theap->farmid));
     453         463 :                         b->thash = NULL;
     454         463 :                         if ((h = GDKzalloc(sizeof(*h))) != NULL &&
     455         463 :                             (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
     456         463 :                             (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) {
     457         462 :                                 const char *nme = BBP_physical(b->batCacheid);
     458         462 :                                 strconcat_len(h->heaplink.filename,
     459             :                                               sizeof(h->heaplink.filename),
     460             :                                               nme, ".thashl", NULL);
     461         463 :                                 strconcat_len(h->heapbckt.filename,
     462             :                                               sizeof(h->heapbckt.filename),
     463             :                                               nme, ".thashb", NULL);
     464         463 :                                 h->heaplink.storage = STORE_INVALID;
     465         463 :                                 h->heaplink.newstorage = STORE_INVALID;
     466         463 :                                 h->heapbckt.storage = STORE_INVALID;
     467         463 :                                 h->heapbckt.newstorage = STORE_INVALID;
     468             : 
     469             :                                 /* check whether a persisted hash can be found */
     470         463 :                                 if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) {
     471         463 :                                         size_t hdata[HASH_HEADER_SIZE];
     472         463 :                                         struct stat st;
     473             : 
     474         463 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     475         462 :                                             (hdata[0] == (((size_t) 1 << 24) | HASH_VERSION)) &&
     476         403 :                                             hdata[1] > 0 &&
     477             :                                             (
     478             : #ifdef BUN2
     479         403 :                                                     hdata[3] == BUN2 ||
     480             : #endif
     481             :                                                     hdata[3] == BUN4
     482             : #ifdef BUN8
     483             :                                                     || hdata[3] == BUN8
     484             : #endif
     485         404 :                                                     ) &&
     486         808 :                                             hdata[4] == (size_t) BATcount(b) &&
     487         404 :                                             fstat(fd, &st) == 0 &&
     488         808 :                                             st.st_size >= (off_t) (h->heapbckt.size = h->heapbckt.free = (h->nbucket = (BUN) hdata[2]) * (BUN) (h->width = (uint8_t) hdata[3]) + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
     489         404 :                                             close(fd) == 0 &&
     490         808 :                                             (fd = GDKfdlocate(h->heaplink.farmid, nme, "rb+", "thashl")) >= 0 &&
     491         404 :                                             fstat(fd, &st) == 0 &&
     492         404 :                                             st.st_size > 0 &&
     493         808 :                                             st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
     494         404 :                                             HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) {
     495         404 :                                                 if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) {
     496         404 :                                                         if (h->nbucket & (h->nbucket - 1)) {
     497         209 :                                                                 h->mask2 = hashmask(h->nbucket);
     498         209 :                                                                 h->mask1 = h->mask2 >> 1;
     499             :                                                         } else {
     500         195 :                                                                 h->mask1 = h->nbucket - 1;
     501         195 :                                                                 h->mask2 = h->mask1 << 1 | 1;
     502             :                                                         }
     503         404 :                                                         h->nunique = hdata[5];
     504         404 :                                                         h->nheads = hdata[6];
     505         404 :                                                         h->type = ATOMtype(b->ttype);
     506         404 :                                                         if (h->width < SIZEOF_BUN &&
     507         404 :                                                             ((BUN) 1 << (8 * h->width)) - 1 > h->nbucket) {
     508         404 :                                                                 close(fd);
     509         404 :                                                                 h->Link = h->heaplink.base;
     510         404 :                                                                 h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     511         404 :                                                                 h->heaplink.parentid = b->batCacheid;
     512         404 :                                                                 h->heapbckt.parentid = b->batCacheid;
     513         404 :                                                                 b->thash = h;
     514         404 :                                                                 h->heapbckt.hasfile = true;
     515         404 :                                                                 h->heaplink.hasfile = true;
     516         404 :                                                                 TRC_DEBUG(ACCELERATOR,
     517             :                                                                           ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
     518         404 :                                                                 MT_rwlock_wrunlock(&b->thashlock);
     519         404 :                                                                 return true;
     520             :                                                         }
     521             :                                                         /* if h->nbucket
     522             :                                                          * equals the
     523             :                                                          * BUN_NONE
     524             :                                                          * representation
     525             :                                                          * for the
     526             :                                                          * current hash
     527             :                                                          * width (was
     528             :                                                          * possible in
     529             :                                                          * previous
     530             :                                                          * iterations of
     531             :                                                          * the code),
     532             :                                                          * then we can't
     533             :                                                          * use the hash
     534             :                                                          * since we
     535             :                                                          * can't
     536             :                                                          * distinguish
     537             :                                                          * between
     538             :                                                          * end-of-list
     539             :                                                          * and a valid
     540             :                                                          * link */
     541           0 :                                                         HEAPfree(&h->heapbckt, false);
     542             :                                                 }
     543           0 :                                                 HEAPfree(&h->heaplink, false);
     544             :                                         }
     545          59 :                                         close(fd);
     546             :                                         /* unlink unusable file */
     547          59 :                                         GDKunlink(h->heaplink.farmid, BATDIR, nme, "thashl");
     548          59 :                                         GDKunlink(h->heapbckt.farmid, BATDIR, nme, "thashb");
     549          59 :                                         h->heapbckt.hasfile = false;
     550          59 :                                         h->heaplink.hasfile = false;
     551             :                                 }
     552             :                         }
     553          60 :                         GDKfree(h);
     554          59 :                         GDKclrerr();    /* we're not currently interested in errors */
     555             :                 }
     556          59 :                 h = b->thash;
     557          59 :                 MT_rwlock_wrunlock(&b->thashlock);
     558             :         }
     559    63970241 :         if (h != NULL) {
     560     3769084 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": already has hash, waited " LLFMT " usec\n", ALGOBATPAR(b), t);
     561             :         }
     562    63970241 :         return h != NULL;
     563             : }
     564             : 
     565             : /* figure out size of the hash (sum of the sizes of the two hash files)
     566             :  * without loading them */
     567             : size_t
     568       91013 : HASHsize(BAT *b)
     569             : {
     570       91013 :         size_t sz = 0;
     571       91013 :         MT_rwlock_rdlock(&b->thashlock);
     572       91013 :         if (b->thash == NULL) {
     573             :                 sz = 0;
     574        5172 :         } else if (b->thash != (Hash *) 1) {
     575        4352 :                 sz = b->thash->heaplink.size + b->thash->heapbckt.size;
     576             :         } else {
     577         820 :                 int farmid = BBPselectfarm(b->batRole, b->ttype, hashheap);
     578         820 :                 if (farmid >= 0) {
     579         820 :                         const char *nme = BBP_physical(b->batCacheid);
     580         820 :                         char fname[MAXPATH];
     581             : 
     582         820 :                         if (GDKfilepath(fname, sizeof(fname), farmid, BATDIR, nme, "thashb") == GDK_SUCCEED) {
     583         820 :                                 struct stat st;
     584         820 :                                 if (stat(fname, &st) == 0) {
     585         820 :                                         sz = (size_t) st.st_size;
     586         820 :                                         fname[strlen(fname) - 1] = 'l';
     587         820 :                                         if (stat(fname, &st) == 0)
     588         820 :                                                 sz += (size_t) st.st_size;
     589             :                                         else
     590             :                                                 sz = 0;
     591             :                                 }
     592             :                         }
     593             :                 }
     594             :         }
     595       91013 :         MT_rwlock_rdunlock(&b->thashlock);
     596       91013 :         return sz;
     597             : }
     598             : 
     599             : static void
     600       14668 : BAThashsave_intern(BAT *b, bool dosync)
     601             : {
     602       14668 :         Hash *h;
     603       14668 :         lng t0 = 0;
     604             : 
     605       14668 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     606             : 
     607       14668 :         if ((h = b->thash) != NULL) {
     608             :                 /* only persist if parent BAT hasn't changed in the
     609             :                  * mean time */
     610       14668 :                 if (!b->theap->dirty &&
     611       13792 :                     ((size_t *) h->heapbckt.base)[1] == BATcount(b) &&
     612       13792 :                     ((size_t *) h->heapbckt.base)[4] == BATcount(b)) {
     613       13792 :                         h->heaplink.dirty = false;
     614       13792 :                         h->heapbckt.dirty = false;
     615       27584 :                         if (HEAPsave(&h->heaplink, h->heaplink.filename, NULL, dosync, h->heaplink.free, NULL) == GDK_SUCCEED &&
     616       13792 :                             HEAPsave(&h->heapbckt, h->heapbckt.filename, NULL, dosync, h->heapbckt.free, NULL) == GDK_SUCCEED) {
     617       13792 :                                 h->heaplink.hasfile = true;
     618       13792 :                                 h->heapbckt.hasfile = true;
     619       13792 :                                 gdk_return rc = HASHfix(h, true, dosync);
     620       13792 :                                 TRC_DEBUG(ACCELERATOR,
     621             :                                           ALGOBATFMT ": persisting hash %s%s (" LLFMT " usec)%s\n", ALGOBATPAR(b), h->heapbckt.filename, dosync ? "" : " no sync", GDKusec() - t0, rc == GDK_SUCCEED ? "" : " failed");
     622             :                         } else {
     623           0 :                                 h->heaplink.dirty = true;
     624           0 :                                 h->heapbckt.dirty = true;
     625             :                         }
     626             :                 }
     627       14668 :                 GDKclrerr();
     628             :         }
     629       14668 : }
     630             : 
     631             : void
     632       14668 : BAThashsave(BAT *b, bool dosync)
     633             : {
     634       14668 :         Hash *h = b->thash;
     635       14668 :         if (h == NULL)
     636             :                 return;
     637       14668 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     638       14668 :         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
     639       14668 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     640       14668 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     641       14668 :         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
     642       14668 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     643       14668 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     644       14668 :         BAThashsave_intern(b, dosync);
     645             : }
     646             : 
     647             : #define EQbte(a, b)     ((a) == (b))
     648             : #define EQsht(a, b)     ((a) == (b))
     649             : #define EQint(a, b)     ((a) == (b))
     650             : #define EQlng(a, b)     ((a) == (b))
     651             : #ifdef HAVE_HGE
     652             : #define EQhge(a, b)     ((a) == (b))
     653             : #endif
     654             : #define EQflt(a, b)     (is_flt_nil(a) ? is_flt_nil(b) : (a) == (b))
     655             : #define EQdbl(a, b)     (is_dbl_nil(a) ? is_dbl_nil(b) : (a) == (b))
     656             : #ifdef HAVE_HGE
     657             : #define EQuuid(a, b)    ((a).h == (b).h)
     658             : #else
     659             : #define EQuuid(a, b)    (memcmp((a).u, (b).u, UUID_SIZE) == 0)
     660             : #endif
     661             : 
     662             : #define starthash(TYPE)                                                 \
     663             :         do {                                                            \
     664             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     665             :                 TIMEOUT_LOOP(p, qry_ctx) {                              \
     666             :                         hget = HASHget(h, c);                           \
     667             :                         if (hget == BUN_NONE) {                         \
     668             :                                 if (h->nheads == maxslots)           \
     669             :                                         TIMEOUT_LOOP_BREAK; /* mask too full */ \
     670             :                                 h->nheads++;                         \
     671             :                                 h->nunique++;                                \
     672             :                         } else {                                        \
     673             :                                 for (hb = hget;                         \
     674             :                                      hb != BUN_NONE;                    \
     675             :                                      hb = HASHgetlink(h, hb)) {         \
     676             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     677             :                                                 break;                  \
     678             :                                 }                                       \
     679             :                                 h->nunique += hb == BUN_NONE;                \
     680             :                         }                                               \
     681             :                         HASHputlink(h, p, hget);                        \
     682             :                         HASHput(h, c, p);                               \
     683             :                         o = canditer_next(ci);                          \
     684             :                 }                                                       \
     685             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     686             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     687             :         } while (0)
     688             : #define finishhash(TYPE)                                                \
     689             :         do {                                                            \
     690             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     691             :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {                       \
     692             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     693             :                         hget = HASHget(h, c);                           \
     694             :                         h->nheads += hget == BUN_NONE;                       \
     695             :                         if (!hascand) {                                 \
     696             :                                 for (hb = hget;                         \
     697             :                                      hb != BUN_NONE;                    \
     698             :                                      hb = HASHgetlink(h, hb)) {         \
     699             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     700             :                                                 break;                  \
     701             :                                 }                                       \
     702             :                                 h->nunique += hb == BUN_NONE;                \
     703             :                                 o = canditer_next_dense(ci);            \
     704             :                         } else {                                        \
     705             :                                 o = canditer_next(ci);                  \
     706             :                         }                                               \
     707             :                         HASHputlink(h, p, hget);                        \
     708             :                         HASHput(h, c, p);                               \
     709             :                         p++;                                            \
     710             :                 }                                                       \
     711             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     712             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     713             :         } while (0)
     714             : 
     715             : /* Internal function to create a hash table for the given BAT b.
     716             :  * If a candidate list s is also given, the hash table is specific for
     717             :  * the combination of the two: only values from b that are referred to
     718             :  * by s are included in the hash table, so if a result is found when
     719             :  * searching the hash table, the result is a candidate. */
     720             : Hash *
     721       14231 : BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext)
     722             : {
     723       14231 :         lng t0 = 0;
     724       14231 :         BUN cnt1;
     725       14231 :         BUN mask, maxmask = 0;
     726       14231 :         BUN p, c;
     727       14231 :         oid o;
     728       14231 :         BUN hget, hb;
     729       14231 :         Hash *h = NULL;
     730       14231 :         const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
     731       14231 :         BATiter bi = bat_iterator(b);
     732       14231 :         unsigned int tpe = ATOMbasetype(bi.type);
     733       14231 :         bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
     734             : 
     735       14231 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     736             : 
     737       14231 :         assert(strcmp(ext, "thash") != 0 || !hascand);
     738       14231 :         assert(bi.type != TYPE_msk);
     739             : 
     740       28282 :         MT_thread_setalgorithm(hascand ? "create hash with candidates" : "create hash");
     741       14231 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     742       14231 :         TRC_DEBUG(ACCELERATOR,
     743             :                   ALGOBATFMT ": create hash;\n", ALGOBATPAR(b));
     744       14231 :         if (bi.type == TYPE_void) {
     745           0 :                 if (is_oid_nil(b->tseqbase)) {
     746           0 :                         TRC_DEBUG(ACCELERATOR,
     747             :                                   "cannot create hash-table on void-NIL column.\n");
     748           0 :                         GDKerror("no hash on void/nil column\n");
     749           0 :                         bat_iterator_end(&bi);
     750           0 :                         return NULL;
     751             :                 }
     752           0 :                 TRC_DEBUG(ACCELERATOR,
     753             :                           "creating hash-table on void column..\n");
     754           0 :                 assert(0);
     755             :                 tpe = TYPE_void;
     756             :         }
     757             : 
     758       14231 :         if ((h = GDKzalloc(sizeof(*h))) == NULL ||
     759       14231 :             (h->heaplink.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0 ||
     760       14231 :             (h->heapbckt.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0) {
     761           0 :                 GDKfree(h);
     762           0 :                 bat_iterator_end(&bi);
     763           0 :                 return NULL;
     764             :         }
     765       14231 :         h->width = HASHwidth(BATcapacity(b));
     766       14231 :         h->heaplink.dirty = true;
     767       14231 :         h->heapbckt.dirty = true;
     768       14231 :         strconcat_len(h->heaplink.filename, sizeof(h->heaplink.filename),
     769             :                       nme, ".", ext, "l", NULL);
     770       14231 :         strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
     771             :                       nme, ".", ext, "b", NULL);
     772       14231 :         h->heapbckt.parentid = b->batCacheid;
     773       14231 :         h->heaplink.parentid = b->batCacheid;
     774       14231 :         if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
     775       14231 :                       h->width) != GDK_SUCCEED) {
     776           0 :                 GDKfree(h);
     777           0 :                 bat_iterator_end(&bi);
     778           0 :                 return NULL;
     779             :         }
     780       14231 :         h->heaplink.free = ci->ncand * h->width;
     781       14231 :         h->Link = h->heaplink.base;
     782             : #ifndef NDEBUG
     783             :         /* clear unused part of Link array */
     784       14231 :         if (h->heaplink.size > h->heaplink.free)
     785       13540 :                 memset(h->heaplink.base + h->heaplink.free, 0,
     786             :                        h->heaplink.size - h->heaplink.free);
     787             : #endif
     788             : 
     789             :         /* determine hash mask size */
     790       14231 :         cnt1 = 0;
     791       14231 :         if (ATOMsize(tpe) == 1) {
     792             :                 /* perfect hash for one-byte sized atoms */
     793             :                 mask = (1 << 8);
     794       14213 :         } else if (ATOMsize(tpe) == 2) {
     795             :                 /* perfect hash for two-byte sized atoms */
     796             :                 mask = (1 << 16);
     797       14159 :         } else if (bi.key || ci->ncand <= 4096) {
     798             :                 /* if key, or if small, don't bother dynamically
     799             :                  * adjusting the hash mask */
     800       14001 :                 mask = HASHmask(ci->ncand);
     801         158 :         } else if (!hascand && bi.unique_est != 0) {
     802         158 :                 mask = (BUN) (bi.unique_est * 1.15); /* about 8/7 */
     803             :         } else {
     804             :                 /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
     805             :                  * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
     806             :                  * are too many collisions we try HASHmask(ci->ncand)/64,
     807             :                  * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
     808             :                  * HASHmask(ci->ncand), but we might skip some of these if
     809             :                  * there are many distinct values.  */
     810           0 :                 maxmask = HASHmask(ci->ncand);
     811           0 :                 mask = maxmask >> 6;
     812           0 :                 while (mask > 4096)
     813           0 :                         mask >>= 2;
     814             :                 /* try out on first 25% of b */
     815           0 :                 cnt1 = ci->ncand >> 2;
     816             :         }
     817             : 
     818       14231 :         o = canditer_next(ci);  /* always one ahead */
     819           0 :         for (;;) {
     820       14231 :                 lng t1 = 0;
     821       14231 :                 TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
     822       14231 :                 BUN maxslots = (mask >> 3) - 1;   /* 1/8 full is too full */
     823             : 
     824       14231 :                 h->nheads = 0;
     825       14231 :                 h->nunique = 0;
     826       14231 :                 p = 0;
     827       14231 :                 HEAPfree(&h->heapbckt, true);
     828             :                 /* create the hash structures */
     829       28462 :                 if (HASHnew(h, ATOMtype(bi.type), BATcapacity(b),
     830             :                             mask, ci->ncand, true) != GDK_SUCCEED) {
     831           0 :                         HEAPfree(&h->heaplink, true);
     832           0 :                         GDKfree(h);
     833           0 :                         bat_iterator_end(&bi);
     834           0 :                         return NULL;
     835             :                 }
     836             : 
     837       14231 :                 switch (tpe) {
     838          18 :                 case TYPE_bte:
     839          36 :                         starthash(bte);
     840             :                         break;
     841          54 :                 case TYPE_sht:
     842         108 :                         starthash(sht);
     843             :                         break;
     844           9 :                 case TYPE_flt:
     845          18 :                         starthash(flt);
     846             :                         break;
     847       12665 :                 case TYPE_int:
     848       25330 :                         starthash(int);
     849             :                         break;
     850           5 :                 case TYPE_dbl:
     851          10 :                         starthash(dbl);
     852             :                         break;
     853         712 :                 case TYPE_lng:
     854        1424 :                         starthash(lng);
     855             :                         break;
     856             : #ifdef HAVE_HGE
     857           2 :                 case TYPE_hge:
     858           4 :                         starthash(hge);
     859             :                         break;
     860             : #endif
     861           1 :                 case TYPE_uuid:
     862           2 :                         starthash(uuid);
     863             :                         break;
     864         765 :                 default: {
     865         765 :                         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
     866        1530 :                         TIMEOUT_LOOP(p, qry_ctx) {
     867           0 :                                 const void *restrict v = BUNtail(bi, o - b->hseqbase);
     868           0 :                                 c = hash_any(h, v);
     869           0 :                                 hget = HASHget(h, c);
     870           0 :                                 if (hget == BUN_NONE) {
     871           0 :                                         if (h->nheads == maxslots)
     872           0 :                                                 TIMEOUT_LOOP_BREAK; /* mask too full */
     873           0 :                                         h->nheads++;
     874           0 :                                         h->nunique++;
     875             :                                 } else {
     876             :                                         for (hb = hget;
     877           0 :                                              hb != BUN_NONE;
     878           0 :                                              hb = HASHgetlink(h, hb)) {
     879           0 :                                                 if (atomcmp(v,
     880           0 :                                                             BUNtail(bi, hb)) == 0)
     881             :                                                         break;
     882             :                                         }
     883           0 :                                         h->nunique += hb == BUN_NONE;
     884             :                                 }
     885           0 :                                 HASHputlink(h, p, hget);
     886           0 :                                 HASHput(h, c, p);
     887           0 :                                 o = canditer_next(ci);
     888             :                         }
     889         765 :                         TIMEOUT_CHECK(qry_ctx,
     890             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     891             :                         break;
     892             :                 }
     893             :                 }
     894       14231 :                 TRC_DEBUG_IF(ACCELERATOR) if (p < cnt1)
     895           0 :                         TRC_DEBUG_ENDIF(ACCELERATOR,
     896             :                                         "%s: abort starthash with "
     897             :                                 "mask " BUNFMT " at " BUNFMT " after " LLFMT " usec\n", BATgetId(b), mask, p, GDKusec() - t1);
     898       14231 :                 if (p == cnt1 || mask == maxmask)
     899             :                         break;
     900           0 :                 mask <<= 2;
     901             :                 /* if we fill up the slots fast (p <= maxslots * 1.2)
     902             :                  * increase mask size a bit more quickly */
     903           0 :                 if (p == h->nunique) {
     904             :                         /* only unique values so far: grow really fast */
     905             :                         mask = maxmask;
     906             :                         cnt1 = 0;
     907           0 :                 } else if (mask < maxmask && p <= maxslots * 1.2)
     908           0 :                         mask <<= 2;
     909           0 :                 canditer_reset(ci);
     910           0 :                 o = canditer_next(ci);
     911             :         }
     912             : 
     913             :         /* finish the hashtable with the current mask */
     914       14231 :         switch (tpe) {
     915          18 :         case TYPE_bte:
     916         391 :                 finishhash(bte);
     917             :                 break;
     918          54 :         case TYPE_sht:
     919         558 :                 finishhash(sht);
     920             :                 break;
     921       12665 :         case TYPE_int:
     922   109903379 :                 finishhash(int);
     923             :                 break;
     924           9 :         case TYPE_flt:
     925         342 :                 finishhash(flt);
     926             :                 break;
     927           5 :         case TYPE_dbl:
     928        4143 :                 finishhash(dbl);
     929             :                 break;
     930         712 :         case TYPE_lng:
     931    16876827 :                 finishhash(lng);
     932             :                 break;
     933             : #ifdef HAVE_HGE
     934           2 :         case TYPE_hge:
     935          14 :                 finishhash(hge);
     936             :                 break;
     937             : #endif
     938           1 :         case TYPE_uuid:
     939           2 :                 finishhash(uuid);
     940             :                 break;
     941         765 :         default: {
     942         765 :                 int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
     943      325031 :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {
     944      323495 :                         const void *restrict v = BUNtail(bi, o - b->hseqbase);
     945      323495 :                         c = hash_any(h, v);
     946      323487 :                         hget = HASHget(h, c);
     947      323487 :                         h->nheads += hget == BUN_NONE;
     948      323487 :                         if (!hascand) {
     949             :                                 for (hb = hget;
     950      418804 :                                      hb != BUN_NONE;
     951       95315 :                                      hb = HASHgetlink(h, hb)) {
     952      261667 :                                         if (atomcmp(v, BUNtail(bi, hb)) == 0)
     953             :                                                 break;
     954             :                                 }
     955      323487 :                                 h->nunique += hb == BUN_NONE;
     956             :                         }
     957      323485 :                         HASHputlink(h, p, hget);
     958      323484 :                         HASHput(h, c, p);
     959      323487 :                         o = canditer_next(ci);
     960      323495 :                         p++;
     961             :                 }
     962         765 :                 TIMEOUT_CHECK(qry_ctx,
     963             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     964             :                 break;
     965             :         }
     966             :         }
     967       14231 :         bat_iterator_end(&bi);
     968             :         /* if the number of unique values is equal to the bat count,
     969             :          * all values are necessarily distinct */
     970       14231 :         MT_lock_set(&b->theaplock);
     971       14230 :         if (h->nunique == BATcount(b) && !b->tkey) {
     972       10175 :                 b->tkey = true;
     973             :         }
     974       14230 :         if (ci->ncand == BATcount(b))
     975       14050 :                 b->tunique_est = (double) h->nunique;
     976       14230 :         MT_lock_unset(&b->theaplock);
     977       14231 :         TRC_DEBUG_IF(ACCELERATOR) {
     978           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR,
     979             :                                 "hash construction " LLFMT " usec\n", GDKusec() - t0);
     980           0 :                 HASHcollisions(b, h, __func__);
     981             :         }
     982             :         return h;
     983             : 
     984           0 :   bailout:
     985           0 :         bat_iterator_end(&bi);
     986           0 :         HEAPfree(&h->heaplink, true);
     987           0 :         HEAPfree(&h->heapbckt, true);
     988           0 :         GDKfree(h);
     989           0 :         return NULL;
     990             : }
     991             : 
     992             : gdk_return
     993     2718553 : BAThash(BAT *b)
     994             : {
     995     2718553 :         if (b->ttype == TYPE_void) {
     996           0 :                 GDKerror("No hash on void type bats\n");
     997           0 :                 return GDK_FAIL;
     998             :         }
     999     2718553 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1000           0 :                 GDKerror("No hash on msk type bats\n");
    1001           0 :                 return GDK_FAIL;
    1002             :         }
    1003     2718553 :         if (BATcheckhash(b)) {
    1004             :                 return GDK_SUCCEED;
    1005             :         }
    1006             : #ifdef __COVERITY__
    1007             :         MT_rwlock_wrlock(&b->thashlock);
    1008             : #else
    1009       15984 :         for (;;) {
    1010             :                 /* If multiple threads simultaneously try to build a
    1011             :                  * hash on a bat, e.g. in order to perform a join, it
    1012             :                  * may happen that one thread succeeds in obtaining the
    1013             :                  * write lock, then builds the hash, releases the lock,
    1014             :                  * acquires the read lock, and performs the join.  The
    1015             :                  * other threads may then still be attempting to acquire
    1016             :                  * the write lock.  But now they have to wait until the
    1017             :                  * read lock is released, which can be quite a long
    1018             :                  * time.  Especially if a second thread goes through the
    1019             :                  * same process as the first. */
    1020       15984 :                 if (MT_rwlock_wrtry(&b->thashlock))
    1021             :                         break;
    1022        1933 :                 MT_sleep_ms(1);
    1023        1933 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1024          54 :                         if (b->thash != NULL && b->thash != (Hash *) 1) {
    1025          35 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1026          35 :                                 return GDK_SUCCEED;
    1027             :                         }
    1028          19 :                         MT_rwlock_rdunlock(&b->thashlock);
    1029             :                 }
    1030             :         }
    1031             : #endif
    1032             :         /* we have the write lock */
    1033       14051 :         if (b->thash == NULL) {
    1034       14051 :                 struct canditer ci;
    1035       14051 :                 canditer_init(&ci, b, NULL);
    1036       14051 :                 if ((b->thash = BAThash_impl(b, &ci, "thash")) == NULL) {
    1037           0 :                         MT_rwlock_wrunlock(&b->thashlock);
    1038           0 :                         return GDK_FAIL;
    1039             :                 }
    1040             :         }
    1041       14051 :         MT_rwlock_wrunlock(&b->thashlock);
    1042       14051 :         return GDK_SUCCEED;
    1043             : }
    1044             : 
    1045             : /*
    1046             :  * The entry on which a value hashes can be calculated with the
    1047             :  * routine HASHprobe.
    1048             :  */
    1049             : BUN
    1050   549017279 : HASHprobe(const Hash *h, const void *v)
    1051             : {
    1052   967799720 :         switch (ATOMbasetype(h->type)) {
    1053        3883 :         case TYPE_bte:
    1054        3883 :                 return hash_bte(h, v);
    1055      139356 :         case TYPE_sht:
    1056      139356 :                 return hash_sht(h, v);
    1057   398015540 :         case TYPE_int:
    1058   398015540 :                 return hash_int(h, v);
    1059   131958253 :         case TYPE_lng:
    1060   131958253 :                 return hash_lng(h, v);
    1061             : #ifdef HAVE_HGE
    1062       26874 :         case TYPE_hge:
    1063       26874 :                 return hash_hge(h, v);
    1064             : #endif
    1065      150481 :         case TYPE_flt:
    1066      150481 :                 return hash_flt(h, v);
    1067       35901 :         case TYPE_dbl:
    1068       35901 :                 return hash_dbl(h, v);
    1069        2104 :         case TYPE_uuid:
    1070        2104 :                 return hash_uuid(h, v);
    1071    18684887 :         default:
    1072    18684887 :                 return hash_any(h, v);
    1073             :         }
    1074             : }
    1075             : 
    1076             : void
    1077      495360 : HASHappend_locked(BAT *b, BUN i, const void *v)
    1078             : {
    1079      495360 :         Hash *h = b->thash;
    1080      495360 :         if (h == NULL) {
    1081           0 :                 return;
    1082             :         }
    1083      495360 :         if (h == (Hash *) 1) {
    1084           0 :                 b->thash = NULL;
    1085           0 :                 doHASHdestroy(b, h);
    1086           0 :                 GDKclrerr();
    1087           0 :                 return;
    1088             :         }
    1089      495360 :         assert(i * h->width == h->heaplink.free);
    1090      495360 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1091           0 :                 b->thash = NULL;
    1092           0 :                 doHASHdestroy(b, h);
    1093           0 :                 GDKclrerr();
    1094           0 :                 return;
    1095             :         }
    1096      495360 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1097           0 :                 b->thash = NULL;
    1098           0 :                 doHASHdestroy(b, h);
    1099           0 :                 GDKclrerr();
    1100           0 :                 return;
    1101             :         }
    1102      495360 :         if (HASHwidth(i + 1) > h->width &&
    1103           0 :              HASHupgradehashheap(b) != GDK_SUCCEED) {
    1104           0 :                 GDKclrerr();
    1105           0 :                 return;
    1106             :         }
    1107      990717 :         if ((ATOMsize(b->ttype) > 2 &&
    1108      495357 :              HASHgrowbucket(b) != GDK_SUCCEED) ||
    1109      497057 :             ((i + 1) * h->width > h->heaplink.size &&
    1110        1697 :              HEAPextend(&h->heaplink,
    1111        1697 :                         i * h->width + GDK_mmap_pagesize,
    1112             :                         true) != GDK_SUCCEED)) {
    1113           0 :                 b->thash = NULL;
    1114           0 :                 HEAPfree(&h->heapbckt, true);
    1115           0 :                 HEAPfree(&h->heaplink, true);
    1116           0 :                 GDKfree(h);
    1117           0 :                 GDKclrerr();
    1118           0 :                 return;
    1119             :         }
    1120      495360 :         h->Link = h->heaplink.base;
    1121      495360 :         BUN c = HASHprobe(h, v);
    1122      495360 :         h->heaplink.free += h->width;
    1123      495360 :         BUN hb = HASHget(h, c);
    1124      495360 :         BUN hb2;
    1125      495360 :         BATiter bi = bat_iterator_nolock(b);
    1126      495360 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1127      495360 :         for (hb2 = hb;
    1128      689252 :              hb2 != BUN_NONE;
    1129      193892 :              hb2 = HASHgetlink(h, hb2)) {
    1130      357487 :                 if (atomcmp(v, BUNtail(bi, hb2)) == 0)
    1131             :                         break;
    1132             :         }
    1133      495360 :         h->nheads += hb == BUN_NONE;
    1134      495360 :         h->nunique += hb2 == BUN_NONE;
    1135      495360 :         HASHputlink(h, i, hb);
    1136      495360 :         HASHput(h, c, i);
    1137      495360 :         h->heapbckt.dirty = true;
    1138      495360 :         h->heaplink.dirty = true;
    1139             : }
    1140             : 
    1141             : BUN
    1142           0 : HASHappend(BAT *b, BUN i, const void *v)
    1143             : {
    1144           0 :         BUN nunique;
    1145           0 :         MT_rwlock_wrlock(&b->thashlock);
    1146           0 :         HASHappend_locked(b, i, v);
    1147           0 :         nunique = b->thash ? b->thash->nunique : 0;
    1148           0 :         MT_rwlock_wrunlock(&b->thashlock);
    1149           0 :         return nunique;
    1150             : }
    1151             : 
    1152             : /* insert value v at position p into the hash table of b */
    1153             : void
    1154   173308461 : HASHinsert_locked(BATiter *bi, BUN p, const void *v)
    1155             : {
    1156   173308461 :         BAT *b = bi->b;
    1157   173308461 :         Hash *h = b->thash;
    1158   173308461 :         if (h == NULL) {
    1159             :                 return;
    1160             :         }
    1161       25418 :         if (h == (Hash *) 1) {
    1162           0 :                 b->thash = NULL;
    1163           0 :                 doHASHdestroy(b, h);
    1164           0 :                 GDKclrerr();
    1165           0 :                 return;
    1166             :         }
    1167       25418 :         assert(p * h->width < h->heaplink.free);
    1168       25418 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1169           0 :                 b->thash = NULL;
    1170           0 :                 doHASHdestroy(b, h);
    1171           0 :                 GDKclrerr();
    1172           0 :                 return;
    1173             :         }
    1174       25418 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1175           0 :                 b->thash = NULL;
    1176           0 :                 doHASHdestroy(b, h);
    1177           0 :                 GDKclrerr();
    1178           0 :                 return;
    1179             :         }
    1180       25418 :         BUN c = HASHprobe(h, v);
    1181       25418 :         BUN hb = HASHget(h, c);
    1182       25418 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1183       25418 :         if (hb == BUN_NONE || hb < p) {
    1184             :                 /* bucket is empty, or bucket is used by lower numbered
    1185             :                  * position */
    1186        4987 :                 HASHputlink(h, p, hb);
    1187        4987 :                 HASHput(h, c, p);
    1188        4987 :                 h->heaplink.dirty = true;
    1189        4987 :                 h->heapbckt.dirty = true;
    1190        4987 :                 if (hb == BUN_NONE) {
    1191        2321 :                         h->nheads++;
    1192             :                 } else {
    1193        3116 :                         do {
    1194        3116 :                                 if (atomcmp(v, BUNtail(*bi, hb)) == 0) {
    1195             :                                         /* found another row with the
    1196             :                                          * same value, so don't
    1197             :                                          * increment nunique */
    1198             :                                         return;
    1199             :                                 }
    1200        1301 :                                 hb = HASHgetlink(h, hb);
    1201        1301 :                         } while (hb != BUN_NONE);
    1202             :                 }
    1203             :                 /* this is a new value */
    1204        3172 :                 h->nunique++;
    1205        3172 :                 return;
    1206             :         }
    1207             :         bool seen = false;
    1208      196081 :         for (;;) {
    1209      196081 :                 if (!seen)
    1210       20957 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1211      196081 :                 BUN hb2 = HASHgetlink(h, hb);
    1212      196081 :                 if (hb2 == BUN_NONE || hb2 < p) {
    1213       20431 :                         HASHputlink(h, p, hb2);
    1214       20431 :                         HASHputlink(h, hb, p);
    1215       20431 :                         h->heaplink.dirty = true;
    1216       20746 :                         while (!seen && hb2 != BUN_NONE) {
    1217         315 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1218         315 :                                 hb2 = HASHgetlink(h, hb2);
    1219             :                         }
    1220       20431 :                         if (!seen)
    1221         372 :                                 h->nunique++;
    1222       20431 :                         return;
    1223             :                 }
    1224             :                 hb = hb2;
    1225             :         }
    1226             : }
    1227             : 
    1228             : BUN
    1229           0 : HASHinsert(BATiter *bi, BUN p, const void *v)
    1230             : {
    1231           0 :         BUN nunique;
    1232           0 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1233           0 :         HASHinsert_locked(bi, p, v);
    1234           0 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1235           0 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1236           0 :         return nunique;
    1237             : }
    1238             : 
    1239             : /* delete value v at position p from the hash table of b */
    1240             : void
    1241   173303570 : HASHdelete_locked(BATiter *bi, BUN p, const void *v)
    1242             : {
    1243   173303570 :         BAT *b = bi->b;
    1244   173303570 :         Hash *h = b->thash;
    1245   173303570 :         if (h == NULL) {
    1246             :                 return;
    1247             :         }
    1248       25418 :         if (h == (Hash *) 1) {
    1249           0 :                 b->thash = NULL;
    1250           0 :                 doHASHdestroy(b, h);
    1251           0 :                 GDKclrerr();
    1252           0 :                 return;
    1253             :         }
    1254       25418 :         assert(p * h->width < h->heaplink.free);
    1255       25418 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1256           0 :                 b->thash = NULL;
    1257           0 :                 doHASHdestroy(b, h);
    1258           0 :                 GDKclrerr();
    1259           0 :                 return;
    1260             :         }
    1261       25418 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1262           0 :                 b->thash = NULL;
    1263           0 :                 doHASHdestroy(b, h);
    1264           0 :                 GDKclrerr();
    1265           0 :                 return;
    1266             :         }
    1267       25418 :         BUN c = HASHprobe(h, v);
    1268       25418 :         BUN hb = HASHget(h, c);
    1269       25418 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1270       25418 :         if (hb == p) {
    1271        3430 :                 BUN hb2 = HASHgetlink(h, p);
    1272        3430 :                 HASHput(h, c, hb2);
    1273        3430 :                 HASHputlink(h, p, BUN_NONE);
    1274        3430 :                 h->heaplink.dirty = true;
    1275        3430 :                 h->heapbckt.dirty = true;
    1276        3430 :                 if (hb2 == BUN_NONE) {
    1277        1563 :                         h->nheads--;
    1278             :                 } else {
    1279        2342 :                         do {
    1280        2342 :                                 if (atomcmp(v, BUNtail(*bi, hb2)) == 0) {
    1281             :                                         /* found another row with the
    1282             :                                          * same value, so don't
    1283             :                                          * decrement nunique below */
    1284             :                                         return;
    1285             :                                 }
    1286        1362 :                                 hb2 = HASHgetlink(h, hb2);
    1287        1362 :                         } while (hb2 != BUN_NONE);
    1288             :                 }
    1289             :                 /* no rows found with the same value, so number of
    1290             :                  * unique values is one lower */
    1291        2450 :                 h->nunique--;
    1292        2450 :                 return;
    1293             :         }
    1294             :         bool seen = false;
    1295             :         BUN links = 0;
    1296      185411 :         for (;;) {
    1297      185411 :                 if (!seen)
    1298       22378 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1299      185411 :                 BUN hb2 = HASHgetlink(h, hb);
    1300      185411 :                 assert(hb2 != BUN_NONE );
    1301      185411 :                 assert(hb2 < hb);
    1302      185411 :                 if (hb2 == p) {
    1303       21988 :                         for (hb2 = HASHgetlink(h, hb2);
    1304       22116 :                              !seen && hb2 != BUN_NONE;
    1305         128 :                              hb2 = HASHgetlink(h, hb2))
    1306         128 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1307       21988 :                         break;
    1308             :                 }
    1309      163423 :                 hb = hb2;
    1310      163423 :                 if (++links > hash_destroy_chain_length) {
    1311           0 :                         b->thash = NULL;
    1312           0 :                         doHASHdestroy(b, h);
    1313           0 :                         GDKclrerr();
    1314           0 :                         return;
    1315             :                 }
    1316             :         }
    1317       21988 :         HASHputlink(h, hb, HASHgetlink(h, p));
    1318       21988 :         HASHputlink(h, p, BUN_NONE);
    1319       21988 :         h->heaplink.dirty = true;
    1320       21988 :         if (!seen)
    1321         169 :                 h->nunique--;
    1322             : }
    1323             : 
    1324             : BUN
    1325           2 : HASHdelete(BATiter *bi, BUN p, const void *v)
    1326             : {
    1327           2 :         BUN nunique;
    1328           2 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1329           2 :         HASHdelete_locked(bi, p, v);
    1330           2 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1331           2 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1332           2 :         return nunique;
    1333             : }
    1334             : 
    1335             : BUN
    1336           0 : HASHlist(Hash *h, BUN i)
    1337             : {
    1338           0 :         BUN c = 1;
    1339           0 :         BUN j = HASHget(h, i);
    1340             : 
    1341           0 :         if (j == BUN_NONE)
    1342             :                 return 1;
    1343           0 :         while ((j = HASHgetlink(h, i)) != BUN_NONE) {
    1344           0 :                 c++;
    1345           0 :                 i = j;
    1346             :         }
    1347             :         return c;
    1348             : }
    1349             : 
    1350             : void
    1351    20124699 : HASHdestroy(BAT *b)
    1352             : {
    1353    20124699 :         if (b) {
    1354    20124699 :                 Hash *hs;
    1355    20124699 :                 MT_rwlock_wrlock(&b->thashlock);
    1356    20125118 :                 hs = b->thash;
    1357    20125118 :                 b->thash = NULL;
    1358    20125118 :                 MT_rwlock_wrunlock(&b->thashlock);
    1359    20122716 :                 doHASHdestroy(b, hs);
    1360             :         }
    1361    20124427 : }
    1362             : 
    1363             : void
    1364      364196 : HASHfree(BAT *b)
    1365             : {
    1366      364196 :         if (b) {
    1367      364196 :                 Hash *h;
    1368      364196 :                 MT_rwlock_wrlock(&b->thashlock);
    1369      364196 :                 if ((h = b->thash) != NULL && h != (Hash *) 1) {
    1370        2059 :                         bool rmheap = h->heaplink.dirty || h->heapbckt.dirty;
    1371        2059 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT " free hash %s\n",
    1372             :                                   ALGOBATPAR(b),
    1373             :                                   rmheap ? "removing" : "keeping");
    1374             : 
    1375        2059 :                         b->thash = rmheap ? NULL : (Hash *) 1;
    1376        2059 :                         HEAPfree(&h->heapbckt, rmheap);
    1377        2059 :                         HEAPfree(&h->heaplink, rmheap);
    1378        2059 :                         GDKfree(h);
    1379             :                 }
    1380      364196 :                 MT_rwlock_wrunlock(&b->thashlock);
    1381             :         }
    1382      364196 : }

Generated by: LCOV version 1.14