LCOV - code coverage report
Current view: top level - gdk - gdk_hash.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 563 779 72.3 %
Date: 2024-04-25 20:03:45 Functions: 20 24 83.3 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * - Hash Table Creation
      15             :  * The hash indexing scheme for BATs reserves a block of memory to
      16             :  * maintain the hash table and a collision list. A one-to-one mapping
      17             :  * exists between the BAT and the collision list using the BUN
      18             :  * index. NOTE: we alloc the link list as a parallel array to the BUN
      19             :  * array; hence the hash link array has the same size as
      20             :  * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
      21             :  * and delete to assume that there is hash space if there is BUN
      22             :  * space.
      23             :  *
      24             :  * The hash mask size is a power of two, so we can do bitwise AND on
      25             :  * the hash (integer) number to quickly find the head of the bucket
      26             :  * chain.  Clearly, the hash mask size is a crucial parameter. If we
      27             :  * know that the column is unique (tkey), we use direct hashing (mask
      28             :  * size ~= BATcount). Otherwise we dynamically determine the mask size
      29             :  * by starting out with mask size = BATcount/64 (just 1.5% of memory
      30             :  * storage overhead). Then we start building the hash table on the
      31             :  * first 25% of the BAT. As we aim for max-collisions list length of
      32             :  * 4, the list on 25% should not exceed length 1. So, if a small
      33             :  * number of collisssions occurs (mask/2) then we abandon the attempt
      34             :  * and restart with a mask that is 4 times larger. This converges
      35             :  * after three cycles to direct hashing.
      36             :  */
      37             : 
      38             : #include "monetdb_config.h"
      39             : #include "gdk.h"
      40             : #include "gdk_private.h"
      41             : 
      42             : static inline uint8_t __attribute__((__const__))
      43     1467537 : HASHwidth(BUN hashsize)
      44             : {
      45     1467537 :         (void) hashsize;
      46             : #ifdef BUN2
      47     1467537 :         if (hashsize <= (BUN) BUN2_NONE)
      48             :                 return BUN2;
      49             : #endif
      50             : #ifdef BUN8
      51         598 :         if (hashsize > (BUN) BUN4_NONE)
      52           0 :                 return BUN8;
      53             : #endif
      54             :         return BUN4;
      55             : }
      56             : 
      57             : static inline BUN __attribute__((__const__))
      58        7405 : hashmask(BUN m)
      59             : {
      60        7405 :         m |= m >> 1;
      61        7405 :         m |= m >> 2;
      62        7405 :         m |= m >> 4;
      63        7405 :         m |= m >> 8;
      64        7405 :         m |= m >> 16;
      65             : #if SIZEOF_BUN == 8
      66        7405 :         m |= m >> 32;
      67             : #endif
      68        7405 :         return m;
      69             : }
      70             : 
      71             : BUN
      72      979222 : HASHmask(BUN cnt)
      73             : {
      74      979222 :         BUN m = cnt;
      75             : 
      76             : #if 0
      77             :         /* find largest power of 2 smaller than or equal to cnt */
      78             :         m = hashmask(m);
      79             :         m -= m >> 1;
      80             : 
      81             :         /* if cnt is more than 1/3 into the gap between m and 2*m,
      82             :            double m */
      83             :         if (m + m - cnt < 2 * (cnt - m))
      84             :                 m += m;
      85             : #else
      86      979222 :         m = m * 8 / 7;
      87             : #endif
      88      968290 :         if (m < BATTINY)
      89      204923 :                 m = BATTINY;
      90      968290 :         return m;
      91             : }
      92             : 
      93             : static inline void
      94      980602 : HASHclear(Hash *h)
      95             : {
      96             :         /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
      97             :          * are all equal to ~0, i.e., have all bits set,
      98             :          * we can use a simple memset() to clear the Hash,
      99             :          * rather than iteratively assigning individual
     100             :          * BUNi_NONE values in a for-loop
     101             :          */
     102      980602 :         memset(h->Bckt, 0xFF, h->nbucket * h->width);
     103      980602 : }
     104             : 
     105             : #define HASH_VERSION            6
     106             : /* this is only for the change of hash function of the floating point
     107             :  * types, the UUID type and the MBR type; if HASH_VERSION is increased
     108             :  * again from 6, the code associated with HASH_VERSION_NOUUID and
     109             :  * HASH_VERSION_NOMBR must be deleted */
     110             : #define HASH_VERSION_FLOAT      5
     111             : #define HASH_VERSION_NOMBR      4
     112             : #define HASH_VERSION_NOUUID     3
     113             : #define HASH_HEADER_SIZE        7       /* nr of size_t fields in header */
     114             : 
     115             : void
     116        9664 : doHASHdestroy(BAT *b, Hash *hs)
     117             : {
     118        9664 :         if (hs == (Hash *) 1) {
     119           9 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
     120             :                           BATDIR,
     121           9 :                           BBP_physical(b->batCacheid),
     122             :                           "thashl");
     123           9 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
     124             :                           BATDIR,
     125           9 :                           BBP_physical(b->batCacheid),
     126             :                           "thashb");
     127        9655 :         } else if (hs) {
     128        9655 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": removing%s hash\n", ALGOBATPAR(b), *(size_t *) hs->heapbckt.base & (1 << 24) ? " persisted" : "");
     129        9655 :                 HEAPfree(&hs->heapbckt, true);
     130        9655 :                 HEAPfree(&hs->heaplink, true);
     131        9655 :                 GDKfree(hs);
     132             :         }
     133        9664 : }
     134             : 
     135             : gdk_return
     136      980603 : HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
     137             : {
     138      980603 :         if (h->width == 0)
     139      970019 :                 h->width = HASHwidth(size);
     140             : 
     141      980603 :         if (!bcktonly) {
     142      969468 :                 if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED)
     143             :                         return GDK_FAIL;
     144      969468 :                 h->heaplink.free = size * h->width;
     145      969468 :                 h->heaplink.dirty = true;
     146      969468 :                 h->Link = h->heaplink.base;
     147             :         }
     148      980603 :         if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width) != GDK_SUCCEED) {
     149           0 :                 if (!bcktonly) {
     150           0 :                         HEAPfree(&h->heaplink, true);
     151           0 :                         h->heaplink.free = 0;
     152           0 :                         h->Link = NULL;
     153             :                 }
     154           0 :                 return GDK_FAIL;
     155             :         }
     156      980603 :         h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     157      980603 :         h->heapbckt.dirty = true;
     158      980603 :         h->nbucket = mask;
     159      980603 :         if (mask & (mask - 1)) {
     160        7220 :                 h->mask2 = hashmask(mask);
     161        7220 :                 h->mask1 = h->mask2 >> 1;
     162             :         } else {
     163             :                 /* mask is a power of two */
     164      973383 :                 h->mask1 = mask - 1;
     165      973383 :                 h->mask2 = h->mask1 << 1 | 1;
     166             :         }
     167      980603 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     168      980603 :         h->type = tpe;
     169      980603 :         HASHclear(h);           /* zero the mask */
     170      980603 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     171      980603 :         ((size_t *) h->heapbckt.base)[1] = (size_t) size;
     172      980603 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     173      980603 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     174      980603 :         ((size_t *) h->heapbckt.base)[4] = (size_t) count;
     175      980603 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     176      980603 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     177      980603 :         TRC_DEBUG(ACCELERATOR,
     178             :                   "create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width);
     179             :         return GDK_SUCCEED;
     180             : }
     181             : 
     182             : /* collect HASH statistics for analysis */
     183             : static void
     184           0 : HASHcollisions(BAT *b, Hash *h, const char *func)
     185             : {
     186           0 :         lng cnt, entries = 0, max = 0;
     187           0 :         double total = 0;
     188           0 :         BUN p, i, j;
     189             : 
     190           0 :         if (b == 0 || h == 0)
     191             :                 return;
     192           0 :         for (i = 0, j = h->nbucket; i < j; i++)
     193           0 :                 if ((p = HASHget(h, i)) != BUN_NONE) {
     194           0 :                         entries++;
     195           0 :                         cnt = 0;
     196           0 :                         for (; p != BUN_NONE; p = HASHgetlink(h, p))
     197           0 :                                 cnt++;
     198           0 :                         if (cnt > max)
     199             :                                 max = cnt;
     200           0 :                         total += cnt;
     201             :                 }
     202           0 :         TRC_DEBUG_ENDIF(ACCELERATOR,
     203             :                         "%s(" ALGOBATFMT "): statistics " BUNFMT ", "
     204             :                         "entries " LLFMT ", nunique " BUNFMT ", "
     205             :                         "nbucket " BUNFMT ", max " LLFMT ", avg %2.6f;\n",
     206             :                         func, ALGOBATPAR(b), BATcount(b), entries,
     207             :                         h->nunique, h->nbucket, max,
     208             :                         entries == 0 ? 0 : total / entries);
     209             : }
     210             : 
     211             : static gdk_return
     212           0 : HASHupgradehashheap(BAT *b)
     213             : {
     214             : #if defined(BUN2) || defined(BUN8)
     215           0 :         Hash *h = b->thash;
     216           0 :         int nwidth = h->width << 1;
     217           0 :         BUN i;
     218             : 
     219           0 :         assert(nwidth <= SIZEOF_BUN);
     220           0 :         assert((nwidth & (nwidth - 1)) == 0);
     221             : 
     222           0 :         if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED ||
     223           0 :             HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) {
     224           0 :                 b->thash = NULL;
     225           0 :                 doHASHdestroy(b, h);
     226           0 :                 return GDK_FAIL;
     227             :         }
     228           0 :         h->Link = h->heaplink.base;
     229           0 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     230           0 :         switch (nwidth) {
     231           0 :         case BUN4:
     232             : #ifdef BUN2
     233           0 :                 switch (h->width) {
     234           0 :                 case BUN2:
     235           0 :                         i = h->heaplink.free / h->width;
     236           0 :                         h->heaplink.free = i * nwidth;
     237           0 :                         while (i > 0) {
     238           0 :                                 i--;
     239           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     240           0 :                                 ((BUN4type *) h->Link)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     241             :                         }
     242           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     243           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     244           0 :                         while (i > 0) {
     245           0 :                                 i--;
     246           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     247           0 :                                 ((BUN4type *) h->Bckt)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     248             :                         }
     249           0 :                         h->heapbckt.dirty = true;
     250           0 :                         h->heaplink.dirty = true;
     251           0 :                         break;
     252             :                 }
     253             : #endif
     254             :                 break;
     255             : #ifdef BUN8
     256           0 :         case BUN8:
     257           0 :                 switch (h->width) {
     258             : #ifdef BUN2
     259           0 :                 case BUN2:
     260           0 :                         i = h->heaplink.free / h->width;
     261           0 :                         h->heaplink.free = i * nwidth;
     262           0 :                         while (i > 0) {
     263           0 :                                 i--;
     264           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     265           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     266             :                         }
     267           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     268           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     269           0 :                         while (i > 0) {
     270           0 :                                 i--;
     271           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     272           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     273             :                         }
     274           0 :                         h->heapbckt.dirty = true;
     275           0 :                         h->heaplink.dirty = true;
     276           0 :                         break;
     277             : #endif
     278           0 :                 case BUN4:
     279           0 :                         i = h->heaplink.free / h->width;
     280           0 :                         h->heaplink.free = i * nwidth;
     281           0 :                         while (i > 0) {
     282           0 :                                 i--;
     283           0 :                                 BUN4type v = ((BUN4type *) h->Link)[i];
     284           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     285             :                         }
     286           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     287           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     288           0 :                         while (i > 0) {
     289           0 :                                 i--;
     290           0 :                                 BUN4type v = ((BUN4type *) h->Bckt)[i];
     291           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     292             :                         }
     293           0 :                         h->heapbckt.dirty = true;
     294           0 :                         h->heaplink.dirty = true;
     295           0 :                         break;
     296             :                 }
     297             :                 break;
     298             : #endif
     299             :         }
     300           0 :         h->width = nwidth;
     301             : #else
     302             :         (void) b;
     303             : #endif
     304           0 :         return GDK_SUCCEED;
     305             : }
     306             : 
     307             : /* write/remove the bit into/from the hash file that indicates the hash
     308             :  * is good to go; the bit is the last part to be written and the first
     309             :  * to be removed */
     310             : static inline gdk_return
     311      554170 : HASHfix(Hash *h, bool save, bool dosync)
     312             : {
     313      554170 :         if (!h->heapbckt.dirty && !h->heaplink.dirty) {
     314       23011 :                 const size_t mask = (size_t) 1 << 24;
     315       23011 :                 if (((size_t *) h->heapbckt.base)[0] & mask) {
     316       10354 :                         if (save)
     317             :                                 return GDK_SUCCEED;
     318       10354 :                         ((size_t *) h->heapbckt.base)[0] &= ~mask;
     319             :                 } else {
     320       12657 :                         if (!save)
     321             :                                 return GDK_SUCCEED;
     322       12657 :                         ((size_t *) h->heapbckt.base)[0] |= mask;
     323             :                 }
     324       23011 :                 if (h->heapbckt.storage == STORE_MEM) {
     325       22991 :                         gdk_return rc = GDK_FAIL;
     326       22991 :                         int fd = GDKfdlocate(h->heapbckt.farmid, h->heapbckt.filename, "rb+", NULL);
     327       22991 :                         if (fd >= 0) {
     328       22991 :                                 if (write(fd, h->heapbckt.base, SIZEOF_SIZE_T) == SIZEOF_SIZE_T) {
     329       22991 :                                         if (dosync &&
     330       22991 :                                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
     331             : #if defined(NATIVE_WIN32)
     332             :                                                 _commit(fd);
     333             : #elif defined(HAVE_FDATASYNC)
     334          40 :                                                 fdatasync(fd);
     335             : #elif defined(HAVE_FSYNC)
     336             :                                                 fsync(fd);
     337             : #endif
     338             :                                         }
     339             :                                         rc = GDK_SUCCEED;
     340             :                                 }
     341       22991 :                                 close(fd);
     342             :                         }
     343       22991 :                         if (rc != GDK_SUCCEED)
     344           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     345       22991 :                         return rc;
     346             :                 } else {
     347          20 :                         if (dosync &&
     348          40 :                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK) &&
     349          20 :                             MT_msync(h->heapbckt.base, SIZEOF_SIZE_T) < 0) {
     350           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     351           0 :                                 return GDK_FAIL;
     352             :                         }
     353             :                 }
     354             :         }
     355             :         return GDK_SUCCEED;
     356             : }
     357             : 
     358             : static gdk_return
     359      486931 : HASHgrowbucket(BAT *b)
     360             : {
     361      486931 :         Hash *h = b->thash;
     362      486931 :         BUN nbucket;
     363      486931 :         BUN onbucket = h->nbucket;
     364      486931 :         lng t0 = 0;
     365             : 
     366      486931 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     367             : 
     368             :         /* only needed to fix hash tables built before this fix was
     369             :          * introduced */
     370      486931 :         if (h->width < SIZEOF_BUN &&
     371      486931 :             ((BUN) 1 << (h->width * 8)) - 1 <= h->mask2 &&
     372           0 :             HASHupgradehashheap(b) != GDK_SUCCEED)
     373             :                 return GDK_FAIL;
     374             : 
     375      486931 :         h->heapbckt.dirty = true;
     376      486931 :         h->heaplink.dirty = true;
     377      739048 :         while (h->nunique >= (nbucket = h->nbucket) * 7 / 8) {
     378      252117 :                 BUN new = h->nbucket;
     379      252117 :                 BUN old = new & h->mask1;
     380      252117 :                 BUN mask = h->mask1 + 1; /* == h->mask2 - h->mask1 */
     381             : 
     382      252117 :                 assert(h->heapbckt.free == nbucket * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T);
     383      252117 :                 if (h->heapbckt.free + h->width > h->heapbckt.size) {
     384        4465 :                         if (HEAPextend(&h->heapbckt,
     385             :                                        h->heapbckt.size + GDK_mmap_pagesize,
     386             :                                        true) != GDK_SUCCEED) {
     387           0 :                                 return GDK_FAIL;
     388             :                         }
     389        4465 :                         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     390             :                 }
     391      252117 :                 assert(h->heapbckt.free + h->width <= h->heapbckt.size);
     392      252117 :                 if (h->nbucket == h->mask2) {
     393         451 :                         h->mask1 = h->mask2;
     394         451 :                         h->mask2 |= h->mask2 << 1;
     395         451 :                         if (h->width < SIZEOF_BUN &&
     396         451 :                             h->mask2 == ((BUN) 1 << (h->width * 8)) - 1) {
     397             :                                 /* time to widen the hash table */
     398           0 :                                 if (HASHupgradehashheap(b) != GDK_SUCCEED)
     399             :                                         return GDK_FAIL;
     400             :                         }
     401             :                 }
     402      252117 :                 h->nbucket++;
     403      252117 :                 h->heapbckt.free += h->width;
     404      252117 :                 BUN lold, lnew, hb;
     405      252117 :                 lold = lnew = BUN_NONE;
     406      252117 :                 BATiter bi = bat_iterator(b);
     407      252117 :                 if ((hb = HASHget(h, old)) != BUN_NONE) {
     408      213310 :                         h->nheads--;
     409      322575 :                         do {
     410      322575 :                                 const void *v = BUNtail(bi, hb);
     411      322575 :                                 BUN hsh = ATOMhash(h->type, v);
     412      322575 :                                 assert((hsh & (mask - 1)) == old);
     413      322575 :                                 if (hsh & mask) {
     414             :                                         /* move to new list */
     415      105349 :                                         if (lnew == BUN_NONE) {
     416       99582 :                                                 HASHput(h, new, hb);
     417       99582 :                                                 h->nheads++;
     418             :                                         } else
     419        5767 :                                                 HASHputlink(h, lnew, hb);
     420             :                                         lnew = hb;
     421             :                                 } else {
     422             :                                         /* move to old list */
     423      217226 :                                         if (lold == BUN_NONE) {
     424      170615 :                                                 h->nheads++;
     425      170615 :                                                 HASHput(h, old, hb);
     426             :                                         } else
     427       46611 :                                                 HASHputlink(h, lold, hb);
     428             :                                         lold = hb;
     429             :                                 }
     430      322575 :                                 hb = HASHgetlink(h, hb);
     431      322575 :                         } while (hb != BUN_NONE);
     432             :                 }
     433      252117 :                 bat_iterator_end(&bi);
     434      252117 :                 if (lnew == BUN_NONE)
     435      152535 :                         HASHput(h, new, BUN_NONE);
     436             :                 else
     437       99582 :                         HASHputlink(h, lnew, BUN_NONE);
     438      252117 :                 if (lold == BUN_NONE)
     439       81502 :                         HASHput(h, old, BUN_NONE);
     440             :                 else
     441      170615 :                         HASHputlink(h, lold, BUN_NONE);
     442             :         }
     443      486931 :         TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
     444           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
     445             :                         " -> " BUNFMT " buckets (" LLFMT " usec)\n",
     446             :                         ALGOBATPAR(b),
     447             :                         onbucket, h->nbucket, GDKusec() - t0);
     448           0 :                 HASHcollisions(b, h, __func__);
     449             :         }
     450             :         return GDK_SUCCEED;
     451             : }
     452             : 
     453             : /* Return TRUE if we have a hash on the tail, even if we need to read
     454             :  * one from disk.
     455             :  *
     456             :  * Note that the b->thash pointer can be NULL, meaning there is no
     457             :  * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist
     458             :  * on disk; or a valid pointer to a loaded hash.  These values are
     459             :  * maintained here, in the HASHdestroy and HASHfree functions, and in
     460             :  * BBPdiskscan during initialization. */
     461             : bool
     462    72249557 : BATcheckhash(BAT *b)
     463             : {
     464    72249557 :         lng t = 0;
     465    72249557 :         Hash *h;
     466             : 
     467    72249557 :         MT_rwlock_rdlock(&b->thashlock);
     468    72277666 :         h = b->thash;
     469    72277666 :         MT_rwlock_rdunlock(&b->thashlock);
     470    72279065 :         if (h == (Hash *) 1) {
     471             :                 /* but when we want to change it, we need the lock */
     472         428 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec();
     473         428 :                 MT_rwlock_wrlock(&b->thashlock);
     474         428 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec() - t;
     475             :                 /* if still 1 now that we have the lock, we can update */
     476         428 :                 if (b->thash == (Hash *) 1) {
     477         428 :                         int fd;
     478             : 
     479         428 :                         assert(!GDKinmemory(b->theap->farmid));
     480         428 :                         b->thash = NULL;
     481         428 :                         if ((h = GDKzalloc(sizeof(*h))) != NULL &&
     482         428 :                             (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
     483         428 :                             (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) {
     484         428 :                                 const char *nme = BBP_physical(b->batCacheid);
     485         428 :                                 strconcat_len(h->heaplink.filename,
     486             :                                               sizeof(h->heaplink.filename),
     487             :                                               nme, ".thashl", NULL);
     488         428 :                                 strconcat_len(h->heapbckt.filename,
     489             :                                               sizeof(h->heapbckt.filename),
     490             :                                               nme, ".thashb", NULL);
     491         428 :                                 h->heaplink.storage = STORE_INVALID;
     492         428 :                                 h->heaplink.newstorage = STORE_INVALID;
     493         428 :                                 h->heapbckt.storage = STORE_INVALID;
     494         428 :                                 h->heapbckt.newstorage = STORE_INVALID;
     495             : 
     496             :                                 /* check whether a persisted hash can be found */
     497         428 :                                 if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) {
     498         428 :                                         size_t hdata[HASH_HEADER_SIZE];
     499         428 :                                         struct stat st;
     500             : 
     501         428 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     502         428 :                                             (hdata[0] == (
     503             : #ifdef PERSISTENTHASH
     504             :                                                     ((size_t) 1 << 24) |
     505             : #endif
     506             :                                                     HASH_VERSION)
     507             : #ifdef HASH_VERSION_NOUUID
     508             :                                              /* if not uuid, also allow previous version */
     509         210 :                                              || (hdata[0] == (
     510             : #ifdef PERSISTENTHASH
     511             :                                                          ((size_t) 1 << 24) |
     512             : #endif
     513           0 :                                                          HASH_VERSION_NOUUID) &&
     514           0 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     515           0 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0 &&
     516           0 :                                                  strcmp(ATOMname(b->ttype), "uuid") != 0 &&
     517           0 :                                                  strcmp(ATOMname(b->ttype), "mbr") != 0)
     518             : #endif
     519             : #ifdef HASH_VERSION_NOMBR
     520             :                                              /* if not uuid, also allow previous version */
     521         210 :                                              || (hdata[0] == (
     522             : #ifdef PERSISTENTHASH
     523             :                                                          ((size_t) 1 << 24) |
     524             : #endif
     525         120 :                                                          HASH_VERSION_NOMBR) &&
     526         120 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     527         120 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0 &&
     528         120 :                                                  strcmp(ATOMname(b->ttype), "mbr") != 0)
     529             : #endif
     530             : #ifdef HASH_VERSION_FLOAT
     531             :                                              /* if not floating point, also allow previous version */
     532          90 :                                              || (hdata[0] == (
     533             : #ifdef PERSISTENTHASH
     534             :                                                          ((size_t) 1 << 24) |
     535             : #endif
     536          90 :                                                          HASH_VERSION_FLOAT) &&
     537          90 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     538          90 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0)
     539             : #endif
     540         428 :                                                     ) &&
     541         428 :                                             hdata[1] > 0 &&
     542             :                                             (
     543             : #ifdef BUN2
     544         428 :                                                     hdata[3] == BUN2 ||
     545             : #endif
     546             :                                                     hdata[3] == BUN4
     547             : #ifdef BUN8
     548             :                                                     || hdata[3] == BUN8
     549             : #endif
     550         428 :                                                     ) &&
     551         856 :                                             hdata[4] == (size_t) BATcount(b) &&
     552         428 :                                             fstat(fd, &st) == 0 &&
     553         856 :                                             st.st_size >= (off_t) (h->heapbckt.size = h->heapbckt.free = (h->nbucket = (BUN) hdata[2]) * (BUN) (h->width = (uint8_t) hdata[3]) + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
     554         428 :                                             close(fd) == 0 &&
     555         856 :                                             (fd = GDKfdlocate(h->heaplink.farmid, nme, "rb+", "thashl")) >= 0 &&
     556         428 :                                             fstat(fd, &st) == 0 &&
     557         428 :                                             st.st_size > 0 &&
     558         856 :                                             st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
     559         428 :                                             HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) {
     560         428 :                                                 if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) {
     561         428 :                                                         if (h->nbucket & (h->nbucket - 1)) {
     562         185 :                                                                 h->mask2 = hashmask(h->nbucket);
     563         185 :                                                                 h->mask1 = h->mask2 >> 1;
     564             :                                                         } else {
     565         243 :                                                                 h->mask1 = h->nbucket - 1;
     566         243 :                                                                 h->mask2 = h->mask1 << 1 | 1;
     567             :                                                         }
     568         428 :                                                         h->nunique = hdata[5];
     569         428 :                                                         h->nheads = hdata[6];
     570         428 :                                                         h->type = ATOMtype(b->ttype);
     571         428 :                                                         if (h->width < SIZEOF_BUN &&
     572         428 :                                                             ((BUN) 1 << (8 * h->width)) - 1 > h->nbucket) {
     573         427 :                                                                 close(fd);
     574         427 :                                                                 h->Link = h->heaplink.base;
     575         427 :                                                                 h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     576         427 :                                                                 h->heaplink.parentid = b->batCacheid;
     577         427 :                                                                 h->heapbckt.parentid = b->batCacheid;
     578         427 :                                                                 h->heaplink.dirty = false;
     579         427 :                                                                 h->heapbckt.dirty = false;
     580         427 :                                                                 b->thash = h;
     581         427 :                                                                 h->heapbckt.hasfile = true;
     582         427 :                                                                 h->heaplink.hasfile = true;
     583         427 :                                                                 TRC_DEBUG(ACCELERATOR,
     584             :                                                                           ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
     585         427 :                                                                 MT_rwlock_wrunlock(&b->thashlock);
     586         427 :                                                                 return true;
     587             :                                                         }
     588             :                                                         /* if h->nbucket
     589             :                                                          * equals the
     590             :                                                          * BUN_NONE
     591             :                                                          * representation
     592             :                                                          * for the
     593             :                                                          * current hash
     594             :                                                          * width (was
     595             :                                                          * possible in
     596             :                                                          * previous
     597             :                                                          * iterations of
     598             :                                                          * the code),
     599             :                                                          * then we can't
     600             :                                                          * use the hash
     601             :                                                          * since we
     602             :                                                          * can't
     603             :                                                          * distinguish
     604             :                                                          * between
     605             :                                                          * end-of-list
     606             :                                                          * and a valid
     607             :                                                          * link */
     608           1 :                                                         HEAPfree(&h->heapbckt, false);
     609             :                                                 }
     610           1 :                                                 HEAPfree(&h->heaplink, false);
     611             :                                         }
     612           1 :                                         close(fd);
     613             :                                         /* unlink unusable file */
     614           1 :                                         GDKunlink(h->heaplink.farmid, BATDIR, nme, "thashl");
     615           1 :                                         GDKunlink(h->heapbckt.farmid, BATDIR, nme, "thashb");
     616           1 :                                         h->heapbckt.hasfile = false;
     617           1 :                                         h->heaplink.hasfile = false;
     618             :                                 }
     619             :                         }
     620           1 :                         GDKfree(h);
     621           1 :                         GDKclrerr();    /* we're not currently interested in errors */
     622             :                 }
     623           1 :                 h = b->thash;
     624           1 :                 MT_rwlock_wrunlock(&b->thashlock);
     625             :         }
     626    72278638 :         if (h != NULL) {
     627     3787489 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": already has hash, waited " LLFMT " usec\n", ALGOBATPAR(b), t);
     628             :         }
     629    72278638 :         return h != NULL;
     630             : }
     631             : 
     632             : static void
     633       13389 : BAThashsave_intern(BAT *b, bool dosync)
     634             : {
     635       13389 :         Hash *h;
     636       13389 :         lng t0 = 0;
     637             : 
     638       13389 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     639             : 
     640       13389 :         if ((h = b->thash) != NULL) {
     641             : #ifndef PERSISTENTHASH
     642             :                 /* no need to sync if not persistent */
     643             :                 dosync = false;
     644             : #endif
     645             : 
     646             :                 /* only persist if parent BAT hasn't changed in the
     647             :                  * mean time */
     648       13389 :                 if (!b->theap->dirty &&
     649       12657 :                     ((size_t *) h->heapbckt.base)[1] == BATcount(b) &&
     650       25314 :                     ((size_t *) h->heapbckt.base)[4] == BATcount(b) &&
     651       25314 :                     HEAPsave(&h->heaplink, h->heaplink.filename, NULL, dosync, h->heaplink.free, NULL) == GDK_SUCCEED &&
     652       12657 :                     HEAPsave(&h->heapbckt, h->heapbckt.filename, NULL, dosync, h->heapbckt.free, NULL) == GDK_SUCCEED) {
     653       12657 :                         h->heaplink.dirty = false;
     654       12657 :                         h->heapbckt.dirty = false;
     655       12657 :                         h->heaplink.hasfile = true;
     656       12657 :                         h->heapbckt.hasfile = true;
     657       12657 :                         gdk_return rc = HASHfix(h, true, dosync);
     658       12657 :                         TRC_DEBUG(ACCELERATOR,
     659             :                                   ALGOBATFMT ": persisting hash %s%s (" LLFMT " usec)%s\n", ALGOBATPAR(b), h->heapbckt.filename, dosync ? "" : " no sync", GDKusec() - t0, rc == GDK_SUCCEED ? "" : " failed");
     660             :                 }
     661       13389 :                 GDKclrerr();
     662             :         }
     663       13389 : }
     664             : 
     665             : void
     666       13389 : BAThashsave(BAT *b, bool dosync)
     667             : {
     668       13389 :         Hash *h = b->thash;
     669       13389 :         if (h == NULL)
     670             :                 return;
     671       13389 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     672       13389 :         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
     673       13389 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     674       13389 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     675       13389 :         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
     676       13389 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     677       13389 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     678       13389 :         BAThashsave_intern(b, dosync);
     679             : }
     680             : 
     681             : #define EQbte(a, b)     ((a) == (b))
     682             : #define EQsht(a, b)     ((a) == (b))
     683             : #define EQint(a, b)     ((a) == (b))
     684             : #define EQlng(a, b)     ((a) == (b))
     685             : #ifdef HAVE_HGE
     686             : #define EQhge(a, b)     ((a) == (b))
     687             : #endif
     688             : #define EQflt(a, b)     (is_flt_nil(a) ? is_flt_nil(b) : (a) == (b))
     689             : #define EQdbl(a, b)     (is_dbl_nil(a) ? is_dbl_nil(b) : (a) == (b))
     690             : #ifdef HAVE_HGE
     691             : #define EQuuid(a, b)    ((a).h == (b).h)
     692             : #else
     693             : #define EQuuid(a, b)    (memcmp((a).u, (b).u, UUID_SIZE) == 0)
     694             : #endif
     695             : 
     696             : #define starthash(TYPE)                                                 \
     697             :         do {                                                            \
     698             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     699             :                 TIMEOUT_LOOP(p, timeoffset) {                           \
     700             :                         hget = HASHget(h, c);                           \
     701             :                         if (hget == BUN_NONE) {                         \
     702             :                                 if (h->nheads == maxslots)           \
     703             :                                         TIMEOUT_LOOP_BREAK; /* mask too full */ \
     704             :                                 h->nheads++;                         \
     705             :                                 h->nunique++;                                \
     706             :                         } else {                                        \
     707             :                                 for (hb = hget;                         \
     708             :                                      hb != BUN_NONE;                    \
     709             :                                      hb = HASHgetlink(h, hb)) {         \
     710             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     711             :                                                 break;                  \
     712             :                                 }                                       \
     713             :                                 h->nunique += hb == BUN_NONE;                \
     714             :                         }                                               \
     715             :                         HASHputlink(h, p, hget);                        \
     716             :                         HASHput(h, c, p);                               \
     717             :                         o = canditer_next(ci);                          \
     718             :                 }                                                       \
     719             :                 TIMEOUT_CHECK(timeoffset,                               \
     720             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));     \
     721             :         } while (0)
     722             : #define finishhash(TYPE)                                                \
     723             :         do {                                                            \
     724             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     725             :                 TIMEOUT_LOOP(ci->ncand - p, timeoffset) {            \
     726             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     727             :                         hget = HASHget(h, c);                           \
     728             :                         h->nheads += hget == BUN_NONE;                       \
     729             :                         if (!hascand) {                                 \
     730             :                                 for (hb = hget;                         \
     731             :                                      hb != BUN_NONE;                    \
     732             :                                      hb = HASHgetlink(h, hb)) {         \
     733             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     734             :                                                 break;                  \
     735             :                                 }                                       \
     736             :                                 h->nunique += hb == BUN_NONE;                \
     737             :                                 o = canditer_next_dense(ci);            \
     738             :                         } else {                                        \
     739             :                                 o = canditer_next(ci);                  \
     740             :                         }                                               \
     741             :                         HASHputlink(h, p, hget);                        \
     742             :                         HASHput(h, c, p);                               \
     743             :                         p++;                                            \
     744             :                 }                                                       \
     745             :                 TIMEOUT_CHECK(timeoffset,                               \
     746             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));     \
     747             :         } while (0)
     748             : 
     749             : /* Internal function to create a hash table for the given BAT b.
     750             :  * If a candidate list s is also given, the hash table is specific for
     751             :  * the combination of the two: only values from b that are referred to
     752             :  * by s are included in the hash table, so if a result is found when
     753             :  * searching the hash table, the result is a candidate. */
     754             : Hash *
     755       11135 : BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext)
     756             : {
     757       11135 :         lng t0 = 0;
     758       11135 :         BUN cnt1;
     759       11135 :         BUN mask, maxmask = 0;
     760       11135 :         BUN p, c;
     761       11135 :         oid o;
     762       11135 :         BUN hget, hb;
     763       11135 :         Hash *h = NULL;
     764       11135 :         const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
     765       11135 :         BATiter bi = bat_iterator(b);
     766       11135 :         unsigned int tpe = ATOMbasetype(bi.type);
     767       11135 :         bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
     768             : 
     769       11135 :         lng timeoffset = 0;
     770       11135 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     771       11135 :         if (qry_ctx != NULL) {
     772        9817 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     773             :         }
     774             : 
     775       11135 :         assert(strcmp(ext, "thash") != 0 || !hascand);
     776       11135 :         assert(bi.type != TYPE_msk);
     777             : 
     778       22138 :         MT_thread_setalgorithm(hascand ? "create hash with candidates" : "create hash");
     779       11135 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     780       11135 :         TRC_DEBUG(ACCELERATOR,
     781             :                   ALGOBATFMT ": create hash;\n", ALGOBATPAR(b));
     782       11135 :         if (bi.type == TYPE_void) {
     783           0 :                 if (is_oid_nil(b->tseqbase)) {
     784           0 :                         TRC_DEBUG(ACCELERATOR,
     785             :                                   "cannot create hash-table on void-NIL column.\n");
     786           0 :                         GDKerror("no hash on void/nil column\n");
     787           0 :                         bat_iterator_end(&bi);
     788           0 :                         return NULL;
     789             :                 }
     790           0 :                 TRC_DEBUG(ACCELERATOR,
     791             :                           "creating hash-table on void column..\n");
     792           0 :                 assert(0);
     793             :                 tpe = TYPE_void;
     794             :         }
     795             : 
     796       11135 :         if ((h = GDKzalloc(sizeof(*h))) == NULL ||
     797       11135 :             (h->heaplink.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0 ||
     798       11135 :             (h->heapbckt.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0) {
     799           0 :                 GDKfree(h);
     800           0 :                 bat_iterator_end(&bi);
     801           0 :                 return NULL;
     802             :         }
     803       11135 :         h->width = HASHwidth(BATcapacity(b));
     804       11135 :         h->heaplink.dirty = true;
     805       11135 :         h->heapbckt.dirty = true;
     806       11135 :         strconcat_len(h->heaplink.filename, sizeof(h->heaplink.filename),
     807             :                       nme, ".", ext, "l", NULL);
     808       11135 :         strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
     809             :                       nme, ".", ext, "b", NULL);
     810       11135 :         h->heapbckt.parentid = b->batCacheid;
     811       11135 :         h->heaplink.parentid = b->batCacheid;
     812       11135 :         if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
     813       11135 :                       h->width) != GDK_SUCCEED) {
     814           0 :                 GDKfree(h);
     815           0 :                 bat_iterator_end(&bi);
     816           0 :                 return NULL;
     817             :         }
     818       11135 :         h->heaplink.free = ci->ncand * h->width;
     819       11135 :         h->Link = h->heaplink.base;
     820             : #ifndef NDEBUG
     821             :         /* clear unused part of Link array */
     822       11135 :         if (h->heaplink.size > h->heaplink.free)
     823       10217 :                 memset(h->heaplink.base + h->heaplink.free, 0,
     824             :                        h->heaplink.size - h->heaplink.free);
     825             : #endif
     826             : 
     827             :         /* determine hash mask size */
     828       11135 :         cnt1 = 0;
     829       11135 :         if (ATOMsize(tpe) == 1) {
     830             :                 /* perfect hash for one-byte sized atoms */
     831             :                 mask = (1 << 8);
     832       11125 :         } else if (ATOMsize(tpe) == 2) {
     833             :                 /* perfect hash for two-byte sized atoms */
     834             :                 mask = (1 << 16);
     835       11087 :         } else if (bi.key || ci->ncand <= 4096) {
     836             :                 /* if key, or if small, don't bother dynamically
     837             :                  * adjusting the hash mask */
     838       10932 :                 mask = HASHmask(ci->ncand);
     839         155 :         } else if (!hascand && bi.unique_est != 0) {
     840         155 :                 mask = (BUN) (bi.unique_est * 1.15); /* about 8/7 */
     841             :         } else {
     842             :                 /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
     843             :                  * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
     844             :                  * are too many collisions we try HASHmask(ci->ncand)/64,
     845             :                  * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
     846             :                  * HASHmask(ci->ncand), but we might skip some of these if
     847             :                  * there are many distinct values.  */
     848           0 :                 maxmask = HASHmask(ci->ncand);
     849           0 :                 mask = maxmask >> 6;
     850           0 :                 while (mask > 4096)
     851           0 :                         mask >>= 2;
     852             :                 /* try out on first 25% of b */
     853           0 :                 cnt1 = ci->ncand >> 2;
     854             :         }
     855             : 
     856       11135 :         o = canditer_next(ci);  /* always one ahead */
     857           0 :         for (;;) {
     858       11135 :                 lng t1 = 0;
     859       11135 :                 TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
     860       11135 :                 BUN maxslots = (mask >> 3) - 1;   /* 1/8 full is too full */
     861             : 
     862       11135 :                 h->nheads = 0;
     863       11135 :                 h->nunique = 0;
     864       11135 :                 p = 0;
     865       11135 :                 HEAPfree(&h->heapbckt, true);
     866             :                 /* create the hash structures */
     867       22270 :                 if (HASHnew(h, ATOMtype(bi.type), BATcapacity(b),
     868             :                             mask, ci->ncand, true) != GDK_SUCCEED) {
     869           0 :                         HEAPfree(&h->heaplink, true);
     870           0 :                         GDKfree(h);
     871           0 :                         bat_iterator_end(&bi);
     872           0 :                         return NULL;
     873             :                 }
     874             : 
     875       11135 :                 switch (tpe) {
     876          10 :                 case TYPE_bte:
     877          20 :                         starthash(bte);
     878             :                         break;
     879          38 :                 case TYPE_sht:
     880          76 :                         starthash(sht);
     881             :                         break;
     882          12 :                 case TYPE_flt:
     883          24 :                         starthash(flt);
     884             :                         break;
     885        9757 :                 case TYPE_int:
     886       19514 :                         starthash(int);
     887             :                         break;
     888           7 :                 case TYPE_dbl:
     889          14 :                         starthash(dbl);
     890             :                         break;
     891         701 :                 case TYPE_lng:
     892        1402 :                         starthash(lng);
     893             :                         break;
     894             : #ifdef HAVE_HGE
     895           8 :                 case TYPE_hge:
     896          16 :                         starthash(hge);
     897             :                         break;
     898             : #endif
     899           1 :                 case TYPE_uuid:
     900           2 :                         starthash(uuid);
     901             :                         break;
     902             :                 default:
     903        1202 :                         TIMEOUT_LOOP(p, timeoffset) {
     904           0 :                                 const void *restrict v = BUNtail(bi, o - b->hseqbase);
     905           0 :                                 c = hash_any(h, v);
     906           0 :                                 hget = HASHget(h, c);
     907           0 :                                 if (hget == BUN_NONE) {
     908           0 :                                         if (h->nheads == maxslots)
     909           0 :                                                 TIMEOUT_LOOP_BREAK; /* mask too full */
     910           0 :                                         h->nheads++;
     911           0 :                                         h->nunique++;
     912             :                                 } else {
     913             :                                         for (hb = hget;
     914           0 :                                              hb != BUN_NONE;
     915           0 :                                              hb = HASHgetlink(h, hb)) {
     916           0 :                                                 if (ATOMcmp(h->type,
     917             :                                                             v,
     918             :                                                             BUNtail(bi, hb)) == 0)
     919             :                                                         break;
     920             :                                         }
     921           0 :                                         h->nunique += hb == BUN_NONE;
     922             :                                 }
     923           0 :                                 HASHputlink(h, p, hget);
     924           0 :                                 HASHput(h, c, p);
     925           0 :                                 o = canditer_next(ci);
     926             :                         }
     927         601 :                         TIMEOUT_CHECK(timeoffset,
     928             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     929             :                         break;
     930             :                 }
     931       11135 :                 TRC_DEBUG_IF(ACCELERATOR) if (p < cnt1)
     932           0 :                         TRC_DEBUG_ENDIF(ACCELERATOR,
     933             :                                         "%s: abort starthash with "
     934             :                                 "mask " BUNFMT " at " BUNFMT " after " LLFMT " usec\n", BATgetId(b), mask, p, GDKusec() - t1);
     935       11135 :                 if (p == cnt1 || mask == maxmask)
     936             :                         break;
     937           0 :                 mask <<= 2;
     938             :                 /* if we fill up the slots fast (p <= maxslots * 1.2)
     939             :                  * increase mask size a bit more quickly */
     940           0 :                 if (p == h->nunique) {
     941             :                         /* only unique values so far: grow really fast */
     942             :                         mask = maxmask;
     943             :                         cnt1 = 0;
     944           0 :                 } else if (mask < maxmask && p <= maxslots * 1.2)
     945           0 :                         mask <<= 2;
     946           0 :                 canditer_reset(ci);
     947           0 :                 o = canditer_next(ci);
     948             :         }
     949             : 
     950             :         /* finish the hashtable with the current mask */
     951       11135 :         switch (tpe) {
     952          10 :         case TYPE_bte:
     953         343 :                 finishhash(bte);
     954             :                 break;
     955          38 :         case TYPE_sht:
     956         393 :                 finishhash(sht);
     957             :                 break;
     958        9757 :         case TYPE_int:
     959    37815568 :                 finishhash(int);
     960             :                 break;
     961          12 :         case TYPE_flt:
     962         369 :                 finishhash(flt);
     963             :                 break;
     964           7 :         case TYPE_dbl:
     965          58 :                 finishhash(dbl);
     966             :                 break;
     967         701 :         case TYPE_lng:
     968    16675607 :                 finishhash(lng);
     969             :                 break;
     970             : #ifdef HAVE_HGE
     971           8 :         case TYPE_hge:
     972         244 :                 finishhash(hge);
     973             :                 break;
     974             : #endif
     975           1 :         case TYPE_uuid:
     976           2 :                 finishhash(uuid);
     977             :                 break;
     978         601 :         default:
     979      244935 :                 TIMEOUT_LOOP(ci->ncand - p, timeoffset) {
     980      243729 :                         const void *restrict v = BUNtail(bi, o - b->hseqbase);
     981      243719 :                         c = hash_any(h, v);
     982      243733 :                         hget = HASHget(h, c);
     983      243733 :                         h->nheads += hget == BUN_NONE;
     984      243733 :                         if (!hascand) {
     985             :                                 for (hb = hget;
     986      332413 :                                      hb != BUN_NONE;
     987       88663 :                                      hb = HASHgetlink(h, hb)) {
     988      188830 :                                         if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
     989             :                                                 break;
     990             :                                 }
     991      243724 :                                 h->nunique += hb == BUN_NONE;
     992             :                         }
     993      243707 :                         HASHputlink(h, p, hget);
     994      243725 :                         HASHput(h, c, p);
     995      243724 :                         o = canditer_next(ci);
     996      243729 :                         p++;
     997             :                 }
     998           0 :                 TIMEOUT_CHECK(timeoffset,
     999             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1000             :                 break;
    1001             :         }
    1002       11135 :         bat_iterator_end(&bi);
    1003             :         /* if the number of unique values is equal to the bat count,
    1004             :          * all values are necessarily distinct */
    1005       11135 :         MT_lock_set(&b->theaplock);
    1006       11135 :         if (h->nunique == BATcount(b) && !b->tkey) {
    1007        9015 :                 b->tkey = true;
    1008             :         }
    1009       11135 :         if (ci->ncand == BATcount(b))
    1010       11003 :                 b->tunique_est = (double) h->nunique;
    1011       11135 :         MT_lock_unset(&b->theaplock);
    1012       11135 :         TRC_DEBUG_IF(ACCELERATOR) {
    1013           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR,
    1014             :                                 "hash construction " LLFMT " usec\n", GDKusec() - t0);
    1015           0 :                 HASHcollisions(b, h, __func__);
    1016             :         }
    1017             :         return h;
    1018             : 
    1019           0 :   bailout:
    1020           0 :         bat_iterator_end(&bi);
    1021           0 :         HEAPfree(&h->heaplink, true);
    1022           0 :         HEAPfree(&h->heapbckt, true);
    1023           0 :         GDKfree(h);
    1024           0 :         return NULL;
    1025             : }
    1026             : 
    1027             : gdk_return
    1028     2755902 : BAThash(BAT *b)
    1029             : {
    1030     2755902 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1031           0 :                 GDKerror("No hash on msk type bats\n");
    1032           0 :                 return GDK_FAIL;
    1033             :         }
    1034     2755902 :         if (BATcheckhash(b)) {
    1035             :                 return GDK_SUCCEED;
    1036             :         }
    1037       11006 :         for (;;) {
    1038             :                 /* If multiple threads simultaneously try to build a
    1039             :                  * hash on a bat, e.g. in order to perform a join, it
    1040             :                  * may happen that one thread succeeds in obtaining the
    1041             :                  * write lock, then builds the hash, releases the lock,
    1042             :                  * acquires the read lock, and performs the join.  The
    1043             :                  * other threads may then still be attempting to acquire
    1044             :                  * the write lock.  But now they have to wait until the
    1045             :                  * read lock is released, which can be quite a long
    1046             :                  * time.  Especially if a second thread goes through the
    1047             :                  * same process as the first. */
    1048       11006 :                 if (MT_rwlock_wrtry(&b->thashlock))
    1049             :                         break;
    1050           2 :                 MT_sleep_ms(1);
    1051           2 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1052           2 :                         if (b->thash != NULL && b->thash != (Hash *) 1) {
    1053           2 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1054           2 :                                 return GDK_SUCCEED;
    1055             :                         }
    1056           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1057             :                 }
    1058             :         }
    1059             :         /* we have the write lock */
    1060       11003 :         if (b->thash == NULL) {
    1061       11003 :                 struct canditer ci;
    1062       11003 :                 canditer_init(&ci, b, NULL);
    1063       11003 :                 if ((b->thash = BAThash_impl(b, &ci, "thash")) == NULL) {
    1064           0 :                         MT_rwlock_wrunlock(&b->thashlock);
    1065           0 :                         return GDK_FAIL;
    1066             :                 }
    1067             :         }
    1068       11003 :         MT_rwlock_wrunlock(&b->thashlock);
    1069       11003 :         return GDK_SUCCEED;
    1070             : }
    1071             : 
    1072             : /*
    1073             :  * The entry on which a value hashes can be calculated with the
    1074             :  * routine HASHprobe.
    1075             :  */
    1076             : BUN
    1077   647922854 : HASHprobe(const Hash *h, const void *v)
    1078             : {
    1079  1261660429 :         switch (ATOMbasetype(h->type)) {
    1080        4047 :         case TYPE_bte:
    1081        4047 :                 return hash_bte(h, v);
    1082       98137 :         case TYPE_sht:
    1083       98137 :                 return hash_sht(h, v);
    1084   589583113 :         case TYPE_int:
    1085   589583113 :                 return hash_int(h, v);
    1086    34373625 :         case TYPE_lng:
    1087    34373625 :                 return hash_lng(h, v);
    1088             : #ifdef HAVE_HGE
    1089       33054 :         case TYPE_hge:
    1090       33054 :                 return hash_hge(h, v);
    1091             : #endif
    1092      150815 :         case TYPE_flt:
    1093      150815 :                 return hash_flt(h, v);
    1094       69185 :         case TYPE_dbl:
    1095       69185 :                 return hash_dbl(h, v);
    1096        2089 :         case TYPE_uuid:
    1097        2089 :                 return hash_uuid(h, v);
    1098    23608789 :         default:
    1099    23608789 :                 return hash_any(h, v);
    1100             :         }
    1101             : }
    1102             : 
    1103             : void
    1104      486934 : HASHappend_locked(BAT *b, BUN i, const void *v)
    1105             : {
    1106      486934 :         Hash *h = b->thash;
    1107      486934 :         if (h == NULL) {
    1108           0 :                 return;
    1109             :         }
    1110      486934 :         if (h == (Hash *) 1) {
    1111           0 :                 b->thash = NULL;
    1112           0 :                 doHASHdestroy(b, h);
    1113           0 :                 GDKclrerr();
    1114           0 :                 return;
    1115             :         }
    1116      486934 :         assert(i * h->width == h->heaplink.free);
    1117      486934 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1118           0 :                 b->thash = NULL;
    1119           0 :                 doHASHdestroy(b, h);
    1120           0 :                 GDKclrerr();
    1121           0 :                 return;
    1122             :         }
    1123      486934 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1124           0 :                 b->thash = NULL;
    1125           0 :                 doHASHdestroy(b, h);
    1126           0 :                 GDKclrerr();
    1127           0 :                 return;
    1128             :         }
    1129      486934 :         if (HASHwidth(i + 1) > h->width &&
    1130           0 :              HASHupgradehashheap(b) != GDK_SUCCEED) {
    1131           0 :                 GDKclrerr();
    1132           0 :                 return;
    1133             :         }
    1134      973865 :         if ((ATOMsize(b->ttype) > 2 &&
    1135      486931 :              HASHgrowbucket(b) != GDK_SUCCEED) ||
    1136      487942 :             ((i + 1) * h->width > h->heaplink.size &&
    1137        1008 :              HEAPextend(&h->heaplink,
    1138        1008 :                         i * h->width + GDK_mmap_pagesize,
    1139             :                         true) != GDK_SUCCEED)) {
    1140           0 :                 b->thash = NULL;
    1141           0 :                 HEAPfree(&h->heapbckt, true);
    1142           0 :                 HEAPfree(&h->heaplink, true);
    1143           0 :                 GDKfree(h);
    1144           0 :                 GDKclrerr();
    1145           0 :                 return;
    1146             :         }
    1147      486934 :         h->Link = h->heaplink.base;
    1148      486934 :         BUN c = HASHprobe(h, v);
    1149      486934 :         h->heaplink.free += h->width;
    1150      486934 :         BUN hb = HASHget(h, c);
    1151      486934 :         BUN hb2;
    1152      486934 :         BATiter bi = bat_iterator_nolock(b);
    1153      486934 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1154      486934 :         for (hb2 = hb;
    1155      634589 :              hb2 != BUN_NONE;
    1156      147655 :              hb2 = HASHgetlink(h, hb2)) {
    1157      310512 :                 if (atomcmp(v, BUNtail(bi, hb2)) == 0)
    1158             :                         break;
    1159             :         }
    1160      486934 :         h->nheads += hb == BUN_NONE;
    1161      486934 :         h->nunique += hb2 == BUN_NONE;
    1162      486934 :         HASHputlink(h, i, hb);
    1163      486934 :         HASHput(h, c, i);
    1164      486934 :         h->heapbckt.dirty = true;
    1165      486934 :         h->heaplink.dirty = true;
    1166             : }
    1167             : 
    1168             : BUN
    1169           0 : HASHappend(BAT *b, BUN i, const void *v)
    1170             : {
    1171           0 :         BUN nunique;
    1172           0 :         MT_rwlock_wrlock(&b->thashlock);
    1173           0 :         HASHappend_locked(b, i, v);
    1174           0 :         nunique = b->thash ? b->thash->nunique : 0;
    1175           0 :         MT_rwlock_wrunlock(&b->thashlock);
    1176           0 :         return nunique;
    1177             : }
    1178             : 
    1179             : /* insert value v at position p into the hash table of b */
    1180             : void
    1181   159564916 : HASHinsert_locked(BATiter *bi, BUN p, const void *v)
    1182             : {
    1183   159564916 :         BAT *b = bi->b;
    1184   159564916 :         Hash *h = b->thash;
    1185   159564916 :         if (h == NULL) {
    1186             :                 return;
    1187             :         }
    1188       27289 :         if (h == (Hash *) 1) {
    1189           0 :                 b->thash = NULL;
    1190           0 :                 doHASHdestroy(b, h);
    1191           0 :                 GDKclrerr();
    1192           0 :                 return;
    1193             :         }
    1194       27289 :         assert(p * h->width < h->heaplink.free);
    1195       27289 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1196           0 :                 b->thash = NULL;
    1197           0 :                 doHASHdestroy(b, h);
    1198           0 :                 GDKclrerr();
    1199           0 :                 return;
    1200             :         }
    1201       27289 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1202           0 :                 b->thash = NULL;
    1203           0 :                 doHASHdestroy(b, h);
    1204           0 :                 GDKclrerr();
    1205           0 :                 return;
    1206             :         }
    1207       27288 :         BUN c = HASHprobe(h, v);
    1208       27287 :         BUN hb = HASHget(h, c);
    1209       27287 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1210       27287 :         if (hb == BUN_NONE || hb < p) {
    1211             :                 /* bucket is empty, or bucket is used by lower numbered
    1212             :                  * position */
    1213        7160 :                 h->heaplink.dirty = true;
    1214        7160 :                 h->heapbckt.dirty = true;
    1215        7160 :                 HASHputlink(h, p, hb);
    1216        7160 :                 HASHput(h, c, p);
    1217        7160 :                 if (hb == BUN_NONE) {
    1218        2584 :                         h->nheads++;
    1219             :                 } else {
    1220        5254 :                         do {
    1221        5254 :                                 if (atomcmp(v, BUNtail(*bi, hb)) == 0) {
    1222             :                                         /* found another row with the
    1223             :                                          * same value, so don't
    1224             :                                          * increment nunique */
    1225             :                                         return;
    1226             :                                 }
    1227        1883 :                                 hb = HASHgetlink(h, hb);
    1228        1883 :                         } while (hb != BUN_NONE);
    1229             :                 }
    1230             :                 /* this is a new value */
    1231        3789 :                 h->nunique++;
    1232        3789 :                 return;
    1233             :         }
    1234             :         bool seen = false;
    1235      345416 :         for (;;) {
    1236      345416 :                 if (!seen)
    1237       20766 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1238      345418 :                 BUN hb2 = HASHgetlink(h, hb);
    1239      345418 :                 if (hb2 == BUN_NONE || hb2 < p) {
    1240       20129 :                         h->heaplink.dirty = true;
    1241       20129 :                         HASHputlink(h, p, hb2);
    1242       20128 :                         HASHputlink(h, hb, p);
    1243       20529 :                         while (!seen && hb2 != BUN_NONE) {
    1244         400 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1245         401 :                                 hb2 = HASHgetlink(h, hb2);
    1246             :                         }
    1247       20129 :                         if (!seen)
    1248         514 :                                 h->nunique++;
    1249       20129 :                         return;
    1250             :                 }
    1251             :                 hb = hb2;
    1252             :         }
    1253             : }
    1254             : 
    1255             : BUN
    1256           2 : HASHinsert(BATiter *bi, BUN p, const void *v)
    1257             : {
    1258           2 :         BUN nunique;
    1259           2 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1260           2 :         HASHinsert_locked(bi, p, v);
    1261           2 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1262           2 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1263           2 :         return nunique;
    1264             : }
    1265             : 
    1266             : /* delete value v at position p from the hash table of b */
    1267             : void
    1268   159567031 : HASHdelete_locked(BATiter *bi, BUN p, const void *v)
    1269             : {
    1270   159567031 :         BAT *b = bi->b;
    1271   159567031 :         Hash *h = b->thash;
    1272   159567031 :         if (h == NULL) {
    1273             :                 return;
    1274             :         }
    1275       27292 :         if (h == (Hash *) 1) {
    1276           0 :                 b->thash = NULL;
    1277           0 :                 doHASHdestroy(b, h);
    1278           0 :                 GDKclrerr();
    1279           0 :                 return;
    1280             :         }
    1281       27292 :         assert(p * h->width < h->heaplink.free);
    1282       27292 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1283           1 :                 b->thash = NULL;
    1284           1 :                 doHASHdestroy(b, h);
    1285           1 :                 GDKclrerr();
    1286           1 :                 return;
    1287             :         }
    1288       27291 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1289           0 :                 b->thash = NULL;
    1290           0 :                 doHASHdestroy(b, h);
    1291           0 :                 GDKclrerr();
    1292           0 :                 return;
    1293             :         }
    1294       27291 :         BUN c = HASHprobe(h, v);
    1295       27291 :         BUN hb = HASHget(h, c);
    1296       27291 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1297       27291 :         if (hb == p) {
    1298        4332 :                 BUN hb2 = HASHgetlink(h, p);
    1299        4332 :                 h->heaplink.dirty = true;
    1300        4332 :                 h->heapbckt.dirty = true;
    1301        4332 :                 HASHput(h, c, hb2);
    1302        4332 :                 HASHputlink(h, p, BUN_NONE);
    1303        4332 :                 if (hb2 == BUN_NONE) {
    1304        2184 :                         h->nheads--;
    1305             :                 } else {
    1306        2815 :                         do {
    1307        2815 :                                 if (atomcmp(v, BUNtail(*bi, hb2)) == 0) {
    1308             :                                         /* found another row with the
    1309             :                                          * same value, so don't
    1310             :                                          * decrement nunique below */
    1311             :                                         return;
    1312             :                                 }
    1313        1634 :                                 hb2 = HASHgetlink(h, hb2);
    1314        1634 :                         } while (hb2 != BUN_NONE);
    1315             :                 }
    1316             :                 /* no rows found with the same value, so number of
    1317             :                  * unique values is one lower */
    1318        3151 :                 h->nunique--;
    1319        3151 :                 return;
    1320             :         }
    1321             :         bool seen = false;
    1322             :         BUN links = 0;
    1323      273018 :         for (;;) {
    1324      273018 :                 if (!seen)
    1325       23675 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1326      273018 :                 BUN hb2 = HASHgetlink(h, hb);
    1327      273018 :                 assert(hb2 != BUN_NONE );
    1328      273018 :                 assert(hb2 < hb);
    1329      273018 :                 if (hb2 == p) {
    1330       22957 :                         for (hb2 = HASHgetlink(h, hb2);
    1331       23360 :                              !seen && hb2 != BUN_NONE;
    1332         403 :                              hb2 = HASHgetlink(h, hb2))
    1333         403 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1334       22957 :                         break;
    1335             :                 }
    1336      250061 :                 hb = hb2;
    1337      250061 :                 if (++links > hash_destroy_chain_length) {
    1338           2 :                         b->thash = NULL;
    1339           2 :                         doHASHdestroy(b, h);
    1340           2 :                         GDKclrerr();
    1341           2 :                         return;
    1342             :                 }
    1343             :         }
    1344       22957 :         h->heaplink.dirty = true;
    1345       22957 :         HASHputlink(h, hb, HASHgetlink(h, p));
    1346       22957 :         HASHputlink(h, p, BUN_NONE);
    1347       22956 :         if (!seen)
    1348         399 :                 h->nunique--;
    1349             : }
    1350             : 
    1351             : BUN
    1352           6 : HASHdelete(BATiter *bi, BUN p, const void *v)
    1353             : {
    1354           6 :         BUN nunique;
    1355           6 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1356           6 :         HASHdelete_locked(bi, p, v);
    1357           6 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1358           6 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1359           6 :         return nunique;
    1360             : }
    1361             : 
    1362             : BUN
    1363           0 : HASHlist(Hash *h, BUN i)
    1364             : {
    1365           0 :         BUN c = 1;
    1366           0 :         BUN j = HASHget(h, i);
    1367             : 
    1368           0 :         if (j == BUN_NONE)
    1369             :                 return 1;
    1370           0 :         while ((j = HASHgetlink(h, i)) != BUN_NONE) {
    1371           0 :                 c++;
    1372           0 :                 i = j;
    1373             :         }
    1374             :         return c;
    1375             : }
    1376             : 
    1377             : void
    1378    15303977 : HASHdestroy(BAT *b)
    1379             : {
    1380    15303977 :         if (b && b->thash) {
    1381        9661 :                 Hash *hs;
    1382        9661 :                 MT_rwlock_wrlock(&b->thashlock);
    1383        9661 :                 hs = b->thash;
    1384        9661 :                 b->thash = NULL;
    1385        9661 :                 MT_rwlock_wrunlock(&b->thashlock);
    1386        9661 :                 doHASHdestroy(b, hs);
    1387             :         }
    1388    15303977 : }
    1389             : 
    1390             : void
    1391      450631 : HASHfree(BAT *b)
    1392             : {
    1393      450631 :         if (b && b->thash) {
    1394        1982 :                 Hash *h;
    1395        1982 :                 MT_rwlock_wrlock(&b->thashlock);
    1396        1982 :                 if ((h = b->thash) != NULL && h != (Hash *) 1) {
    1397        1768 :                         bool rmheap = h->heaplink.dirty || h->heapbckt.dirty;
    1398        1768 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT " free hash %s\n",
    1399             :                                   ALGOBATPAR(b),
    1400             :                                   rmheap ? "removing" : "keeping");
    1401             : 
    1402        1768 :                         b->thash = rmheap ? NULL : (Hash *) 1;
    1403        1768 :                         HEAPfree(&h->heapbckt, rmheap);
    1404        1768 :                         HEAPfree(&h->heaplink, rmheap);
    1405        1768 :                         GDKfree(h);
    1406             :                 }
    1407        1982 :                 MT_rwlock_wrunlock(&b->thashlock);
    1408             :         }
    1409      450631 : }

Generated by: LCOV version 1.14