LCOV - code coverage report
Current view: top level - gdk - gdk_hash.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 551 777 70.9 %
Date: 2024-10-03 20:03:20 Functions: 19 23 82.6 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * - Hash Table Creation
      15             :  * The hash indexing scheme for BATs reserves a block of memory to
      16             :  * maintain the hash table and a collision list. A one-to-one mapping
      17             :  * exists between the BAT and the collision list using the BUN
      18             :  * index. NOTE: we alloc the link list as a parallel array to the BUN
      19             :  * array; hence the hash link array has the same size as
      20             :  * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
      21             :  * and delete to assume that there is hash space if there is BUN
      22             :  * space.
      23             :  *
      24             :  * The hash mask size is a power of two, so we can do bitwise AND on
      25             :  * the hash (integer) number to quickly find the head of the bucket
      26             :  * chain.  Clearly, the hash mask size is a crucial parameter. If we
      27             :  * know that the column is unique (tkey), we use direct hashing (mask
      28             :  * size ~= BATcount). Otherwise we dynamically determine the mask size
      29             :  * by starting out with mask size = BATcount/64 (just 1.5% of memory
      30             :  * storage overhead). Then we start building the hash table on the
      31             :  * first 25% of the BAT. As we aim for max-collisions list length of
      32             :  * 4, the list on 25% should not exceed length 1. So, if a small
      33             :  * number of collisssions occurs (mask/2) then we abandon the attempt
      34             :  * and restart with a mask that is 4 times larger. This converges
      35             :  * after three cycles to direct hashing.
      36             :  */
      37             : 
      38             : #include "monetdb_config.h"
      39             : #include "gdk.h"
      40             : #include "gdk_private.h"
      41             : 
      42             : static inline uint8_t __attribute__((__const__))
      43     1354618 : HASHwidth(BUN hashsize)
      44             : {
      45     1354618 :         (void) hashsize;
      46             : #ifdef BUN2
      47     1354618 :         if (hashsize <= (BUN) BUN2_NONE)
      48             :                 return BUN2;
      49             : #endif
      50             : #ifdef BUN8
      51         512 :         if (hashsize > (BUN) BUN4_NONE)
      52           0 :                 return BUN8;
      53             : #endif
      54             :         return BUN4;
      55             : }
      56             : 
      57             : static inline BUN __attribute__((__const__))
      58        8719 : hashmask(BUN m)
      59             : {
      60        8719 :         m |= m >> 1;
      61        8719 :         m |= m >> 2;
      62        8719 :         m |= m >> 4;
      63        8719 :         m |= m >> 8;
      64        8719 :         m |= m >> 16;
      65             : #if SIZEOF_BUN == 8
      66        8719 :         m |= m >> 32;
      67             : #endif
      68        8719 :         return m;
      69             : }
      70             : 
      71             : static inline void
      72      864288 : HASHclear(Hash *h)
      73             : {
      74             :         /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
      75             :          * are all equal to ~0, i.e., have all bits set,
      76             :          * we can use a simple memset() to clear the Hash,
      77             :          * rather than iteratively assigning individual
      78             :          * BUNi_NONE values in a for-loop
      79             :          */
      80      864288 :         memset(h->Bckt, 0xFF, h->nbucket * h->width);
      81      864288 : }
      82             : 
      83             : #define HASH_VERSION            6
      84             : /* this is only for the change of hash function of the floating point
      85             :  * types, the UUID type and the MBR type; if HASH_VERSION is increased
      86             :  * again from 6, the code associated with HASH_VERSION_NOUUID and
      87             :  * HASH_VERSION_NOMBR must be deleted */
      88             : #define HASH_VERSION_FLOAT      5
      89             : #define HASH_VERSION_NOMBR      4
      90             : #define HASH_VERSION_NOUUID     3
      91             : #define HASH_HEADER_SIZE        7       /* nr of size_t fields in header */
      92             : 
      93             : void
      94    18708159 : doHASHdestroy(BAT *b, Hash *hs)
      95             : {
      96    18708159 :         if (hs == (Hash *) 1) {
      97          18 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
      98             :                           BATDIR,
      99          18 :                           BBP_physical(b->batCacheid),
     100             :                           "thashl");
     101          18 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
     102             :                           BATDIR,
     103          18 :                           BBP_physical(b->batCacheid),
     104             :                           "thashb");
     105    18708141 :         } else if (hs) {
     106       11303 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": removing%s hash\n", ALGOBATPAR(b), *(size_t *) hs->heapbckt.base & (1 << 24) ? " persisted" : "");
     107       11303 :                 HEAPfree(&hs->heapbckt, true);
     108       11303 :                 HEAPfree(&hs->heaplink, true);
     109       11303 :                 GDKfree(hs);
     110             :         }
     111    18708159 : }
     112             : 
     113             : gdk_return
     114      864291 : HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
     115             : {
     116      864291 :         if (h->width == 0)
     117      851710 :                 h->width = HASHwidth(size);
     118             : 
     119      864291 :         if (!bcktonly) {
     120      851244 :                 if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED)
     121             :                         return GDK_FAIL;
     122      851243 :                 h->heaplink.free = size * h->width;
     123      851243 :                 h->heaplink.dirty = true;
     124      851243 :                 h->Link = h->heaplink.base;
     125             :         }
     126      864290 :         if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width) != GDK_SUCCEED) {
     127           0 :                 if (!bcktonly) {
     128           0 :                         HEAPfree(&h->heaplink, true);
     129           0 :                         h->heaplink.free = 0;
     130           0 :                         h->Link = NULL;
     131             :                 }
     132           0 :                 return GDK_FAIL;
     133             :         }
     134      864290 :         h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     135      864290 :         h->heapbckt.dirty = true;
     136      864290 :         h->nbucket = mask;
     137      864290 :         if (mask & (mask - 1)) {
     138        8560 :                 h->mask2 = hashmask(mask);
     139        8560 :                 h->mask1 = h->mask2 >> 1;
     140             :         } else {
     141             :                 /* mask is a power of two */
     142      855730 :                 h->mask1 = mask - 1;
     143      855730 :                 h->mask2 = h->mask1 << 1 | 1;
     144             :         }
     145      864290 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     146      864290 :         h->type = tpe;
     147      864290 :         HASHclear(h);           /* zero the mask */
     148      864288 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     149      864288 :         ((size_t *) h->heapbckt.base)[1] = (size_t) size;
     150      864288 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     151      864288 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     152      864288 :         ((size_t *) h->heapbckt.base)[4] = (size_t) count;
     153      864288 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     154      864288 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     155      864288 :         TRC_DEBUG(ACCELERATOR,
     156             :                   "create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width);
     157             :         return GDK_SUCCEED;
     158             : }
     159             : 
     160             : /* collect HASH statistics for analysis */
     161             : static void
     162           0 : HASHcollisions(BAT *b, Hash *h, const char *func)
     163             : {
     164           0 :         lng cnt, entries = 0, max = 0;
     165           0 :         double total = 0;
     166           0 :         BUN p, i, j;
     167             : 
     168           0 :         if (b == 0 || h == 0)
     169             :                 return;
     170           0 :         for (i = 0, j = h->nbucket; i < j; i++)
     171           0 :                 if ((p = HASHget(h, i)) != BUN_NONE) {
     172           0 :                         entries++;
     173           0 :                         cnt = 0;
     174           0 :                         for (; p != BUN_NONE; p = HASHgetlink(h, p))
     175           0 :                                 cnt++;
     176           0 :                         if (cnt > max)
     177             :                                 max = cnt;
     178           0 :                         total += cnt;
     179             :                 }
     180           0 :         TRC_DEBUG_ENDIF(ACCELERATOR,
     181             :                         "%s(" ALGOBATFMT "): statistics " BUNFMT ", "
     182             :                         "entries " LLFMT ", nunique " BUNFMT ", "
     183             :                         "nbucket " BUNFMT ", max " LLFMT ", avg %2.6f;\n",
     184             :                         func, ALGOBATPAR(b), BATcount(b), entries,
     185             :                         h->nunique, h->nbucket, max,
     186             :                         entries == 0 ? 0 : total / entries);
     187             : }
     188             : 
     189             : static gdk_return
     190           0 : HASHupgradehashheap(BAT *b)
     191             : {
     192             : #if defined(BUN2) || defined(BUN8)
     193           0 :         Hash *h = b->thash;
     194           0 :         int nwidth = h->width << 1;
     195           0 :         BUN i;
     196             : 
     197           0 :         assert(nwidth <= SIZEOF_BUN);
     198           0 :         assert((nwidth & (nwidth - 1)) == 0);
     199             : 
     200           0 :         if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED ||
     201           0 :             HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) {
     202           0 :                 b->thash = NULL;
     203           0 :                 doHASHdestroy(b, h);
     204           0 :                 return GDK_FAIL;
     205             :         }
     206           0 :         h->Link = h->heaplink.base;
     207           0 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     208           0 :         switch (nwidth) {
     209           0 :         case BUN4:
     210             : #ifdef BUN2
     211           0 :                 switch (h->width) {
     212           0 :                 case BUN2:
     213           0 :                         i = h->heaplink.free / h->width;
     214           0 :                         h->heaplink.free = i * nwidth;
     215           0 :                         while (i > 0) {
     216           0 :                                 i--;
     217           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     218           0 :                                 ((BUN4type *) h->Link)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     219             :                         }
     220           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     221           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     222           0 :                         while (i > 0) {
     223           0 :                                 i--;
     224           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     225           0 :                                 ((BUN4type *) h->Bckt)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     226             :                         }
     227           0 :                         h->heapbckt.dirty = true;
     228           0 :                         h->heaplink.dirty = true;
     229           0 :                         break;
     230             :                 }
     231             : #endif
     232             :                 break;
     233             : #ifdef BUN8
     234           0 :         case BUN8:
     235           0 :                 switch (h->width) {
     236             : #ifdef BUN2
     237           0 :                 case BUN2:
     238           0 :                         i = h->heaplink.free / h->width;
     239           0 :                         h->heaplink.free = i * nwidth;
     240           0 :                         while (i > 0) {
     241           0 :                                 i--;
     242           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     243           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     244             :                         }
     245           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     246           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     247           0 :                         while (i > 0) {
     248           0 :                                 i--;
     249           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     250           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     251             :                         }
     252           0 :                         h->heapbckt.dirty = true;
     253           0 :                         h->heaplink.dirty = true;
     254           0 :                         break;
     255             : #endif
     256           0 :                 case BUN4:
     257           0 :                         i = h->heaplink.free / h->width;
     258           0 :                         h->heaplink.free = i * nwidth;
     259           0 :                         while (i > 0) {
     260           0 :                                 i--;
     261           0 :                                 BUN4type v = ((BUN4type *) h->Link)[i];
     262           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     263             :                         }
     264           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     265           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     266           0 :                         while (i > 0) {
     267           0 :                                 i--;
     268           0 :                                 BUN4type v = ((BUN4type *) h->Bckt)[i];
     269           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     270             :                         }
     271           0 :                         h->heapbckt.dirty = true;
     272           0 :                         h->heaplink.dirty = true;
     273           0 :                         break;
     274             :                 }
     275             :                 break;
     276             : #endif
     277             :         }
     278           0 :         h->width = nwidth;
     279             : #else
     280             :         (void) b;
     281             : #endif
     282           0 :         return GDK_SUCCEED;
     283             : }
     284             : 
     285             : /* write/remove the bit into/from the hash file that indicates the hash
     286             :  * is good to go; the bit is the last part to be written and the first
     287             :  * to be removed */
     288             : static inline gdk_return
     289      542646 : HASHfix(Hash *h, bool save, bool dosync)
     290             : {
     291      542646 :         if (!h->heapbckt.dirty && !h->heaplink.dirty) {
     292       24285 :                 const size_t mask = (size_t) 1 << 24;
     293       24285 :                 if (((size_t *) h->heapbckt.base)[0] & mask) {
     294       10674 :                         if (save)
     295             :                                 return GDK_SUCCEED;
     296       10674 :                         ((size_t *) h->heapbckt.base)[0] &= ~mask;
     297             :                 } else {
     298       13611 :                         if (!save)
     299             :                                 return GDK_SUCCEED;
     300       13611 :                         ((size_t *) h->heapbckt.base)[0] |= mask;
     301             :                 }
     302       24285 :                 if (h->heapbckt.storage == STORE_MEM) {
     303       24261 :                         gdk_return rc = GDK_FAIL;
     304       24261 :                         int fd = GDKfdlocate(h->heapbckt.farmid, h->heapbckt.filename, "rb+", NULL);
     305       24261 :                         if (fd >= 0) {
     306       24261 :                                 if (write(fd, h->heapbckt.base, SIZEOF_SIZE_T) == SIZEOF_SIZE_T) {
     307       24261 :                                         if (dosync &&
     308       24261 :                                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
     309             : #if defined(NATIVE_WIN32)
     310             :                                                 _commit(fd);
     311             : #elif defined(HAVE_FDATASYNC)
     312          42 :                                                 fdatasync(fd);
     313             : #elif defined(HAVE_FSYNC)
     314             :                                                 fsync(fd);
     315             : #endif
     316             :                                         }
     317             :                                         rc = GDK_SUCCEED;
     318             :                                 }
     319       24261 :                                 close(fd);
     320             :                         }
     321       24261 :                         if (rc != GDK_SUCCEED)
     322           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     323       24261 :                         return rc;
     324             :                 } else {
     325          24 :                         if (dosync &&
     326          48 :                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK) &&
     327          24 :                             MT_msync(h->heapbckt.base, SIZEOF_SIZE_T) < 0) {
     328           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     329           0 :                                 return GDK_FAIL;
     330             :                         }
     331             :                 }
     332             :         }
     333             :         return GDK_SUCCEED;
     334             : }
     335             : 
     336             : static gdk_return
     337      490324 : HASHgrowbucket(BAT *b)
     338             : {
     339      490324 :         Hash *h = b->thash;
     340      490324 :         BUN nbucket;
     341      490324 :         BUN onbucket = h->nbucket;
     342      490324 :         lng t0 = 0;
     343             : 
     344      490324 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     345             : 
     346             :         /* only needed to fix hash tables built before this fix was
     347             :          * introduced */
     348      490324 :         if (h->width < SIZEOF_BUN &&
     349      490324 :             ((BUN) 1 << (h->width * 8)) - 1 <= h->mask2 &&
     350           0 :             HASHupgradehashheap(b) != GDK_SUCCEED)
     351             :                 return GDK_FAIL;
     352             : 
     353      490324 :         h->heapbckt.dirty = true;
     354      490324 :         h->heaplink.dirty = true;
     355      747375 :         while (h->nunique >= (nbucket = h->nbucket) * 7 / 8) {
     356      257051 :                 BUN new = h->nbucket;
     357      257051 :                 BUN old = new & h->mask1;
     358      257051 :                 BUN mask = h->mask1 + 1; /* == h->mask2 - h->mask1 */
     359             : 
     360      257051 :                 assert(h->heapbckt.free == nbucket * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T);
     361      257051 :                 if (h->heapbckt.free + h->width > h->heapbckt.size) {
     362        4613 :                         if (HEAPextend(&h->heapbckt,
     363             :                                        h->heapbckt.size + GDK_mmap_pagesize,
     364             :                                        true) != GDK_SUCCEED) {
     365           0 :                                 return GDK_FAIL;
     366             :                         }
     367        4613 :                         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     368             :                 }
     369      257051 :                 assert(h->heapbckt.free + h->width <= h->heapbckt.size);
     370      257051 :                 if (h->nbucket == h->mask2) {
     371         475 :                         h->mask1 = h->mask2;
     372         475 :                         h->mask2 |= h->mask2 << 1;
     373         475 :                         if (h->width < SIZEOF_BUN &&
     374         475 :                             h->mask2 == ((BUN) 1 << (h->width * 8)) - 1) {
     375             :                                 /* time to widen the hash table */
     376           0 :                                 if (HASHupgradehashheap(b) != GDK_SUCCEED)
     377             :                                         return GDK_FAIL;
     378             :                         }
     379             :                 }
     380      257051 :                 h->nbucket++;
     381      257051 :                 h->heapbckt.free += h->width;
     382      257051 :                 BUN lold, lnew, hb;
     383      257051 :                 lold = lnew = BUN_NONE;
     384      257051 :                 BATiter bi = bat_iterator(b);
     385      257051 :                 if ((hb = HASHget(h, old)) != BUN_NONE) {
     386      206028 :                         h->nheads--;
     387      338582 :                         do {
     388      338582 :                                 const void *v = BUNtail(bi, hb);
     389      338582 :                                 BUN hsh = ATOMhash(h->type, v);
     390      338582 :                                 assert((hsh & (mask - 1)) == old);
     391      338582 :                                 if (hsh & mask) {
     392             :                                         /* move to new list */
     393      114078 :                                         if (lnew == BUN_NONE) {
     394      102841 :                                                 HASHput(h, new, hb);
     395      102841 :                                                 h->nheads++;
     396             :                                         } else
     397       11237 :                                                 HASHputlink(h, lnew, hb);
     398             :                                         lnew = hb;
     399             :                                 } else {
     400             :                                         /* move to old list */
     401      224504 :                                         if (lold == BUN_NONE) {
     402      170198 :                                                 h->nheads++;
     403      170198 :                                                 HASHput(h, old, hb);
     404             :                                         } else
     405       54306 :                                                 HASHputlink(h, lold, hb);
     406             :                                         lold = hb;
     407             :                                 }
     408      338582 :                                 hb = HASHgetlink(h, hb);
     409      338582 :                         } while (hb != BUN_NONE);
     410             :                 }
     411      257051 :                 bat_iterator_end(&bi);
     412      257051 :                 if (lnew == BUN_NONE)
     413      154210 :                         HASHput(h, new, BUN_NONE);
     414             :                 else
     415      102841 :                         HASHputlink(h, lnew, BUN_NONE);
     416      257051 :                 if (lold == BUN_NONE)
     417       86853 :                         HASHput(h, old, BUN_NONE);
     418             :                 else
     419      170198 :                         HASHputlink(h, lold, BUN_NONE);
     420             :         }
     421      490324 :         TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
     422           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
     423             :                         " -> " BUNFMT " buckets (" LLFMT " usec)\n",
     424             :                         ALGOBATPAR(b),
     425             :                         onbucket, h->nbucket, GDKusec() - t0);
     426           0 :                 HASHcollisions(b, h, __func__);
     427             :         }
     428             :         return GDK_SUCCEED;
     429             : }
     430             : 
     431             : /* Return TRUE if we have a hash on the tail, even if we need to read
     432             :  * one from disk.
     433             :  *
     434             :  * Note that the b->thash pointer can be NULL, meaning there is no
     435             :  * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist
     436             :  * on disk; or a valid pointer to a loaded hash.  These values are
     437             :  * maintained here, in the HASHdestroy and HASHfree functions, and in
     438             :  * BBPdiskscan during initialization. */
     439             : bool
     440    65290069 : BATcheckhash(BAT *b)
     441             : {
     442    65290069 :         lng t = 0;
     443    65290069 :         Hash *h;
     444             : 
     445    65290069 :         MT_rwlock_rdlock(&b->thashlock);
     446    65309199 :         h = b->thash;
     447    65309199 :         MT_rwlock_rdunlock(&b->thashlock);
     448    65309548 :         if (h == (Hash *) 1) {
     449             :                 /* but when we want to change it, we need the lock */
     450         423 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec();
     451         423 :                 MT_rwlock_wrlock(&b->thashlock);
     452         423 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec() - t;
     453             :                 /* if still 1 now that we have the lock, we can update */
     454         423 :                 if (b->thash == (Hash *) 1) {
     455         423 :                         int fd;
     456             : 
     457         423 :                         assert(!GDKinmemory(b->theap->farmid));
     458         423 :                         b->thash = NULL;
     459         423 :                         if ((h = GDKzalloc(sizeof(*h))) != NULL &&
     460         423 :                             (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
     461         423 :                             (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) {
     462         423 :                                 const char *nme = BBP_physical(b->batCacheid);
     463         423 :                                 strconcat_len(h->heaplink.filename,
     464             :                                               sizeof(h->heaplink.filename),
     465             :                                               nme, ".thashl", NULL);
     466         423 :                                 strconcat_len(h->heapbckt.filename,
     467             :                                               sizeof(h->heapbckt.filename),
     468             :                                               nme, ".thashb", NULL);
     469         423 :                                 h->heaplink.storage = STORE_INVALID;
     470         423 :                                 h->heaplink.newstorage = STORE_INVALID;
     471         423 :                                 h->heapbckt.storage = STORE_INVALID;
     472         423 :                                 h->heapbckt.newstorage = STORE_INVALID;
     473             : 
     474             :                                 /* check whether a persisted hash can be found */
     475         423 :                                 if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) {
     476         423 :                                         size_t hdata[HASH_HEADER_SIZE];
     477         423 :                                         struct stat st;
     478             : 
     479         423 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     480         423 :                                             (hdata[0] == (
     481             : #ifdef PERSISTENTHASH
     482             :                                                     ((size_t) 1 << 24) |
     483             : #endif
     484             :                                                     HASH_VERSION)
     485             : #ifdef HASH_VERSION_NOUUID
     486             :                                              /* if not uuid, also allow previous version */
     487          99 :                                              || (hdata[0] == (
     488             : #ifdef PERSISTENTHASH
     489             :                                                          ((size_t) 1 << 24) |
     490             : #endif
     491           0 :                                                          HASH_VERSION_NOUUID) &&
     492           0 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     493           0 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0 &&
     494           0 :                                                  strcmp(ATOMname(b->ttype), "uuid") != 0 &&
     495           0 :                                                  strcmp(ATOMname(b->ttype), "mbr") != 0)
     496             : #endif
     497             : #ifdef HASH_VERSION_NOMBR
     498             :                                              /* if not uuid, also allow previous version */
     499          99 :                                              || (hdata[0] == (
     500             : #ifdef PERSISTENTHASH
     501             :                                                          ((size_t) 1 << 24) |
     502             : #endif
     503          96 :                                                          HASH_VERSION_NOMBR) &&
     504          96 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     505          96 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0 &&
     506          96 :                                                  strcmp(ATOMname(b->ttype), "mbr") != 0)
     507             : #endif
     508             : #ifdef HASH_VERSION_FLOAT
     509             :                                              /* if not floating point, also allow previous version */
     510           3 :                                              || (hdata[0] == (
     511             : #ifdef PERSISTENTHASH
     512             :                                                          ((size_t) 1 << 24) |
     513             : #endif
     514           3 :                                                          HASH_VERSION_FLOAT) &&
     515           3 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     516           3 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0)
     517             : #endif
     518         423 :                                                     ) &&
     519         423 :                                             hdata[1] > 0 &&
     520             :                                             (
     521             : #ifdef BUN2
     522         423 :                                                     hdata[3] == BUN2 ||
     523             : #endif
     524             :                                                     hdata[3] == BUN4
     525             : #ifdef BUN8
     526             :                                                     || hdata[3] == BUN8
     527             : #endif
     528         423 :                                                     ) &&
     529         846 :                                             hdata[4] == (size_t) BATcount(b) &&
     530         423 :                                             fstat(fd, &st) == 0 &&
     531         846 :                                             st.st_size >= (off_t) (h->heapbckt.size = h->heapbckt.free = (h->nbucket = (BUN) hdata[2]) * (BUN) (h->width = (uint8_t) hdata[3]) + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
     532         423 :                                             close(fd) == 0 &&
     533         846 :                                             (fd = GDKfdlocate(h->heaplink.farmid, nme, "rb+", "thashl")) >= 0 &&
     534         423 :                                             fstat(fd, &st) == 0 &&
     535         423 :                                             st.st_size > 0 &&
     536         846 :                                             st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
     537         423 :                                             HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) {
     538         423 :                                                 if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) {
     539         423 :                                                         if (h->nbucket & (h->nbucket - 1)) {
     540         159 :                                                                 h->mask2 = hashmask(h->nbucket);
     541         159 :                                                                 h->mask1 = h->mask2 >> 1;
     542             :                                                         } else {
     543         264 :                                                                 h->mask1 = h->nbucket - 1;
     544         264 :                                                                 h->mask2 = h->mask1 << 1 | 1;
     545             :                                                         }
     546         423 :                                                         h->nunique = hdata[5];
     547         423 :                                                         h->nheads = hdata[6];
     548         423 :                                                         h->type = ATOMtype(b->ttype);
     549         423 :                                                         if (h->width < SIZEOF_BUN &&
     550         423 :                                                             ((BUN) 1 << (8 * h->width)) - 1 > h->nbucket) {
     551         422 :                                                                 close(fd);
     552         422 :                                                                 h->Link = h->heaplink.base;
     553         422 :                                                                 h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     554         422 :                                                                 h->heaplink.parentid = b->batCacheid;
     555         422 :                                                                 h->heapbckt.parentid = b->batCacheid;
     556         422 :                                                                 h->heaplink.dirty = false;
     557         422 :                                                                 h->heapbckt.dirty = false;
     558         422 :                                                                 b->thash = h;
     559         422 :                                                                 h->heapbckt.hasfile = true;
     560         422 :                                                                 h->heaplink.hasfile = true;
     561         422 :                                                                 TRC_DEBUG(ACCELERATOR,
     562             :                                                                           ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
     563         422 :                                                                 MT_rwlock_wrunlock(&b->thashlock);
     564         422 :                                                                 return true;
     565             :                                                         }
     566             :                                                         /* if h->nbucket
     567             :                                                          * equals the
     568             :                                                          * BUN_NONE
     569             :                                                          * representation
     570             :                                                          * for the
     571             :                                                          * current hash
     572             :                                                          * width (was
     573             :                                                          * possible in
     574             :                                                          * previous
     575             :                                                          * iterations of
     576             :                                                          * the code),
     577             :                                                          * then we can't
     578             :                                                          * use the hash
     579             :                                                          * since we
     580             :                                                          * can't
     581             :                                                          * distinguish
     582             :                                                          * between
     583             :                                                          * end-of-list
     584             :                                                          * and a valid
     585             :                                                          * link */
     586           1 :                                                         HEAPfree(&h->heapbckt, false);
     587             :                                                 }
     588           1 :                                                 HEAPfree(&h->heaplink, false);
     589             :                                         }
     590           1 :                                         close(fd);
     591             :                                         /* unlink unusable file */
     592           1 :                                         GDKunlink(h->heaplink.farmid, BATDIR, nme, "thashl");
     593           1 :                                         GDKunlink(h->heapbckt.farmid, BATDIR, nme, "thashb");
     594           1 :                                         h->heapbckt.hasfile = false;
     595           1 :                                         h->heaplink.hasfile = false;
     596             :                                 }
     597             :                         }
     598           1 :                         GDKfree(h);
     599           1 :                         GDKclrerr();    /* we're not currently interested in errors */
     600             :                 }
     601           1 :                 h = b->thash;
     602           1 :                 MT_rwlock_wrunlock(&b->thashlock);
     603             :         }
     604    65309126 :         if (h != NULL) {
     605     3715763 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": already has hash, waited " LLFMT " usec\n", ALGOBATPAR(b), t);
     606             :         }
     607    65309126 :         return h != NULL;
     608             : }
     609             : 
     610             : static void
     611       14475 : BAThashsave_intern(BAT *b, bool dosync)
     612             : {
     613       14475 :         Hash *h;
     614       14475 :         lng t0 = 0;
     615             : 
     616       14475 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     617             : 
     618       14475 :         if ((h = b->thash) != NULL) {
     619             : #ifndef PERSISTENTHASH
     620             :                 /* no need to sync if not persistent */
     621             :                 dosync = false;
     622             : #endif
     623             : 
     624             :                 /* only persist if parent BAT hasn't changed in the
     625             :                  * mean time */
     626       14475 :                 if (!b->theap->dirty &&
     627       13611 :                     ((size_t *) h->heapbckt.base)[1] == BATcount(b) &&
     628       27222 :                     ((size_t *) h->heapbckt.base)[4] == BATcount(b) &&
     629       27222 :                     HEAPsave(&h->heaplink, h->heaplink.filename, NULL, dosync, h->heaplink.free, NULL) == GDK_SUCCEED &&
     630       13611 :                     HEAPsave(&h->heapbckt, h->heapbckt.filename, NULL, dosync, h->heapbckt.free, NULL) == GDK_SUCCEED) {
     631       13611 :                         h->heaplink.dirty = false;
     632       13611 :                         h->heapbckt.dirty = false;
     633       13611 :                         h->heaplink.hasfile = true;
     634       13611 :                         h->heapbckt.hasfile = true;
     635       13611 :                         gdk_return rc = HASHfix(h, true, dosync);
     636       13611 :                         TRC_DEBUG(ACCELERATOR,
     637             :                                   ALGOBATFMT ": persisting hash %s%s (" LLFMT " usec)%s\n", ALGOBATPAR(b), h->heapbckt.filename, dosync ? "" : " no sync", GDKusec() - t0, rc == GDK_SUCCEED ? "" : " failed");
     638             :                 }
     639       14475 :                 GDKclrerr();
     640             :         }
     641       14475 : }
     642             : 
     643             : void
     644       14475 : BAThashsave(BAT *b, bool dosync)
     645             : {
     646       14475 :         Hash *h = b->thash;
     647       14475 :         if (h == NULL)
     648             :                 return;
     649       14475 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     650       14475 :         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
     651       14475 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     652       14475 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     653       14475 :         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
     654       14475 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     655       14475 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     656       14475 :         BAThashsave_intern(b, dosync);
     657             : }
     658             : 
     659             : #define EQbte(a, b)     ((a) == (b))
     660             : #define EQsht(a, b)     ((a) == (b))
     661             : #define EQint(a, b)     ((a) == (b))
     662             : #define EQlng(a, b)     ((a) == (b))
     663             : #ifdef HAVE_HGE
     664             : #define EQhge(a, b)     ((a) == (b))
     665             : #endif
     666             : #define EQflt(a, b)     (is_flt_nil(a) ? is_flt_nil(b) : (a) == (b))
     667             : #define EQdbl(a, b)     (is_dbl_nil(a) ? is_dbl_nil(b) : (a) == (b))
     668             : #ifdef HAVE_HGE
     669             : #define EQuuid(a, b)    ((a).h == (b).h)
     670             : #else
     671             : #define EQuuid(a, b)    (memcmp((a).u, (b).u, UUID_SIZE) == 0)
     672             : #endif
     673             : 
     674             : #define starthash(TYPE)                                                 \
     675             :         do {                                                            \
     676             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     677             :                 TIMEOUT_LOOP(p, qry_ctx) {                              \
     678             :                         hget = HASHget(h, c);                           \
     679             :                         if (hget == BUN_NONE) {                         \
     680             :                                 if (h->nheads == maxslots)           \
     681             :                                         TIMEOUT_LOOP_BREAK; /* mask too full */ \
     682             :                                 h->nheads++;                         \
     683             :                                 h->nunique++;                                \
     684             :                         } else {                                        \
     685             :                                 for (hb = hget;                         \
     686             :                                      hb != BUN_NONE;                    \
     687             :                                      hb = HASHgetlink(h, hb)) {         \
     688             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     689             :                                                 break;                  \
     690             :                                 }                                       \
     691             :                                 h->nunique += hb == BUN_NONE;                \
     692             :                         }                                               \
     693             :                         HASHputlink(h, p, hget);                        \
     694             :                         HASHput(h, c, p);                               \
     695             :                         o = canditer_next(ci);                          \
     696             :                 }                                                       \
     697             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     698             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     699             :         } while (0)
     700             : #define finishhash(TYPE)                                                \
     701             :         do {                                                            \
     702             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     703             :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {                       \
     704             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     705             :                         hget = HASHget(h, c);                           \
     706             :                         h->nheads += hget == BUN_NONE;                       \
     707             :                         if (!hascand) {                                 \
     708             :                                 for (hb = hget;                         \
     709             :                                      hb != BUN_NONE;                    \
     710             :                                      hb = HASHgetlink(h, hb)) {         \
     711             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     712             :                                                 break;                  \
     713             :                                 }                                       \
     714             :                                 h->nunique += hb == BUN_NONE;                \
     715             :                                 o = canditer_next_dense(ci);            \
     716             :                         } else {                                        \
     717             :                                 o = canditer_next(ci);                  \
     718             :                         }                                               \
     719             :                         HASHputlink(h, p, hget);                        \
     720             :                         HASHput(h, c, p);                               \
     721             :                         p++;                                            \
     722             :                 }                                                       \
     723             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     724             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     725             :         } while (0)
     726             : 
     727             : /* Internal function to create a hash table for the given BAT b.
     728             :  * If a candidate list s is also given, the hash table is specific for
     729             :  * the combination of the two: only values from b that are referred to
     730             :  * by s are included in the hash table, so if a result is found when
     731             :  * searching the hash table, the result is a candidate. */
     732             : Hash *
     733       13047 : BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext)
     734             : {
     735       13047 :         lng t0 = 0;
     736       13047 :         BUN cnt1;
     737       13047 :         BUN mask, maxmask = 0;
     738       13047 :         BUN p, c;
     739       13047 :         oid o;
     740       13047 :         BUN hget, hb;
     741       13047 :         Hash *h = NULL;
     742       13047 :         const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
     743       13047 :         BATiter bi = bat_iterator(b);
     744       13047 :         unsigned int tpe = ATOMbasetype(bi.type);
     745       13047 :         bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
     746             : 
     747       13047 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     748             : 
     749       13047 :         assert(strcmp(ext, "thash") != 0 || !hascand);
     750       13047 :         assert(bi.type != TYPE_msk);
     751             : 
     752       25923 :         MT_thread_setalgorithm(hascand ? "create hash with candidates" : "create hash");
     753       13047 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     754       13047 :         TRC_DEBUG(ACCELERATOR,
     755             :                   ALGOBATFMT ": create hash;\n", ALGOBATPAR(b));
     756       13047 :         if (bi.type == TYPE_void) {
     757           0 :                 if (is_oid_nil(b->tseqbase)) {
     758           0 :                         TRC_DEBUG(ACCELERATOR,
     759             :                                   "cannot create hash-table on void-NIL column.\n");
     760           0 :                         GDKerror("no hash on void/nil column\n");
     761           0 :                         bat_iterator_end(&bi);
     762           0 :                         return NULL;
     763             :                 }
     764           0 :                 TRC_DEBUG(ACCELERATOR,
     765             :                           "creating hash-table on void column..\n");
     766           0 :                 assert(0);
     767             :                 tpe = TYPE_void;
     768             :         }
     769             : 
     770       13047 :         if ((h = GDKzalloc(sizeof(*h))) == NULL ||
     771       13047 :             (h->heaplink.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0 ||
     772       13047 :             (h->heapbckt.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0) {
     773           0 :                 GDKfree(h);
     774           0 :                 bat_iterator_end(&bi);
     775           0 :                 return NULL;
     776             :         }
     777       13047 :         h->width = HASHwidth(BATcapacity(b));
     778       13047 :         h->heaplink.dirty = true;
     779       13047 :         h->heapbckt.dirty = true;
     780       13047 :         strconcat_len(h->heaplink.filename, sizeof(h->heaplink.filename),
     781             :                       nme, ".", ext, "l", NULL);
     782       13047 :         strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
     783             :                       nme, ".", ext, "b", NULL);
     784       13047 :         h->heapbckt.parentid = b->batCacheid;
     785       13047 :         h->heaplink.parentid = b->batCacheid;
     786       13047 :         if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
     787       13047 :                       h->width) != GDK_SUCCEED) {
     788           0 :                 GDKfree(h);
     789           0 :                 bat_iterator_end(&bi);
     790           0 :                 return NULL;
     791             :         }
     792       13047 :         h->heaplink.free = ci->ncand * h->width;
     793       13047 :         h->Link = h->heaplink.base;
     794             : #ifndef NDEBUG
     795             :         /* clear unused part of Link array */
     796       13047 :         if (h->heaplink.size > h->heaplink.free)
     797       12029 :                 memset(h->heaplink.base + h->heaplink.free, 0,
     798             :                        h->heaplink.size - h->heaplink.free);
     799             : #endif
     800             : 
     801             :         /* determine hash mask size */
     802       13047 :         cnt1 = 0;
     803       13047 :         if (ATOMsize(tpe) == 1) {
     804             :                 /* perfect hash for one-byte sized atoms */
     805             :                 mask = (1 << 8);
     806       13037 :         } else if (ATOMsize(tpe) == 2) {
     807             :                 /* perfect hash for two-byte sized atoms */
     808             :                 mask = (1 << 16);
     809       12981 :         } else if (bi.key || ci->ncand <= 4096) {
     810             :                 /* if key, or if small, don't bother dynamically
     811             :                  * adjusting the hash mask */
     812       12811 :                 mask = HASHmask(ci->ncand);
     813         170 :         } else if (!hascand && bi.unique_est != 0) {
     814         170 :                 mask = (BUN) (bi.unique_est * 1.15); /* about 8/7 */
     815             :         } else {
     816             :                 /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
     817             :                  * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
     818             :                  * are too many collisions we try HASHmask(ci->ncand)/64,
     819             :                  * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
     820             :                  * HASHmask(ci->ncand), but we might skip some of these if
     821             :                  * there are many distinct values.  */
     822           0 :                 maxmask = HASHmask(ci->ncand);
     823           0 :                 mask = maxmask >> 6;
     824           0 :                 while (mask > 4096)
     825           0 :                         mask >>= 2;
     826             :                 /* try out on first 25% of b */
     827           0 :                 cnt1 = ci->ncand >> 2;
     828             :         }
     829             : 
     830       13047 :         o = canditer_next(ci);  /* always one ahead */
     831           0 :         for (;;) {
     832       13047 :                 lng t1 = 0;
     833       13047 :                 TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
     834       13047 :                 BUN maxslots = (mask >> 3) - 1;   /* 1/8 full is too full */
     835             : 
     836       13047 :                 h->nheads = 0;
     837       13047 :                 h->nunique = 0;
     838       13047 :                 p = 0;
     839       13047 :                 HEAPfree(&h->heapbckt, true);
     840             :                 /* create the hash structures */
     841       26094 :                 if (HASHnew(h, ATOMtype(bi.type), BATcapacity(b),
     842             :                             mask, ci->ncand, true) != GDK_SUCCEED) {
     843           0 :                         HEAPfree(&h->heaplink, true);
     844           0 :                         GDKfree(h);
     845           0 :                         bat_iterator_end(&bi);
     846           0 :                         return NULL;
     847             :                 }
     848             : 
     849       13047 :                 switch (tpe) {
     850          10 :                 case TYPE_bte:
     851          20 :                         starthash(bte);
     852             :                         break;
     853          56 :                 case TYPE_sht:
     854         112 :                         starthash(sht);
     855             :                         break;
     856          12 :                 case TYPE_flt:
     857          24 :                         starthash(flt);
     858             :                         break;
     859       11477 :                 case TYPE_int:
     860       22954 :                         starthash(int);
     861             :                         break;
     862           7 :                 case TYPE_dbl:
     863          14 :                         starthash(dbl);
     864             :                         break;
     865         746 :                 case TYPE_lng:
     866        1492 :                         starthash(lng);
     867             :                         break;
     868             : #ifdef HAVE_HGE
     869           7 :                 case TYPE_hge:
     870          14 :                         starthash(hge);
     871             :                         break;
     872             : #endif
     873           1 :                 case TYPE_uuid:
     874           2 :                         starthash(uuid);
     875             :                         break;
     876         731 :                 default: {
     877         731 :                         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
     878        1462 :                         TIMEOUT_LOOP(p, qry_ctx) {
     879           0 :                                 const void *restrict v = BUNtail(bi, o - b->hseqbase);
     880           0 :                                 c = hash_any(h, v);
     881           0 :                                 hget = HASHget(h, c);
     882           0 :                                 if (hget == BUN_NONE) {
     883           0 :                                         if (h->nheads == maxslots)
     884           0 :                                                 TIMEOUT_LOOP_BREAK; /* mask too full */
     885           0 :                                         h->nheads++;
     886           0 :                                         h->nunique++;
     887             :                                 } else {
     888             :                                         for (hb = hget;
     889           0 :                                              hb != BUN_NONE;
     890           0 :                                              hb = HASHgetlink(h, hb)) {
     891           0 :                                                 if (atomcmp(v,
     892           0 :                                                             BUNtail(bi, hb)) == 0)
     893             :                                                         break;
     894             :                                         }
     895           0 :                                         h->nunique += hb == BUN_NONE;
     896             :                                 }
     897           0 :                                 HASHputlink(h, p, hget);
     898           0 :                                 HASHput(h, c, p);
     899           0 :                                 o = canditer_next(ci);
     900             :                         }
     901         731 :                         TIMEOUT_CHECK(qry_ctx,
     902             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     903             :                         break;
     904             :                 }
     905             :                 }
     906       13047 :                 TRC_DEBUG_IF(ACCELERATOR) if (p < cnt1)
     907           0 :                         TRC_DEBUG_ENDIF(ACCELERATOR,
     908             :                                         "%s: abort starthash with "
     909             :                                 "mask " BUNFMT " at " BUNFMT " after " LLFMT " usec\n", BATgetId(b), mask, p, GDKusec() - t1);
     910       13047 :                 if (p == cnt1 || mask == maxmask)
     911             :                         break;
     912           0 :                 mask <<= 2;
     913             :                 /* if we fill up the slots fast (p <= maxslots * 1.2)
     914             :                  * increase mask size a bit more quickly */
     915           0 :                 if (p == h->nunique) {
     916             :                         /* only unique values so far: grow really fast */
     917             :                         mask = maxmask;
     918             :                         cnt1 = 0;
     919           0 :                 } else if (mask < maxmask && p <= maxslots * 1.2)
     920           0 :                         mask <<= 2;
     921           0 :                 canditer_reset(ci);
     922           0 :                 o = canditer_next(ci);
     923             :         }
     924             : 
     925             :         /* finish the hashtable with the current mask */
     926       13047 :         switch (tpe) {
     927          10 :         case TYPE_bte:
     928         343 :                 finishhash(bte);
     929             :                 break;
     930          56 :         case TYPE_sht:
     931         577 :                 finishhash(sht);
     932             :                 break;
     933       11477 :         case TYPE_int:
     934    41065767 :                 finishhash(int);
     935             :                 break;
     936          12 :         case TYPE_flt:
     937         369 :                 finishhash(flt);
     938             :                 break;
     939           7 :         case TYPE_dbl:
     940          58 :                 finishhash(dbl);
     941             :                 break;
     942         746 :         case TYPE_lng:
     943    16618384 :                 finishhash(lng);
     944             :                 break;
     945             : #ifdef HAVE_HGE
     946           7 :         case TYPE_hge:
     947         114 :                 finishhash(hge);
     948             :                 break;
     949             : #endif
     950           1 :         case TYPE_uuid:
     951           2 :                 finishhash(uuid);
     952             :                 break;
     953         731 :         default: {
     954         731 :                 int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
     955      324213 :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {
     956      322745 :                         const void *restrict v = BUNtail(bi, o - b->hseqbase);
     957      322746 :                         c = hash_any(h, v);
     958      322764 :                         hget = HASHget(h, c);
     959      322764 :                         h->nheads += hget == BUN_NONE;
     960      322764 :                         if (!hascand) {
     961             :                                 for (hb = hget;
     962      423579 :                                      hb != BUN_NONE;
     963      100810 :                                      hb = HASHgetlink(h, hb)) {
     964      267173 :                                         if (atomcmp(v, BUNtail(bi, hb)) == 0)
     965             :                                                 break;
     966             :                                 }
     967      322757 :                                 h->nunique += hb == BUN_NONE;
     968             :                         }
     969      322752 :                         HASHputlink(h, p, hget);
     970      322741 :                         HASHput(h, c, p);
     971      322737 :                         o = canditer_next(ci);
     972      322745 :                         p++;
     973             :                 }
     974         731 :                 TIMEOUT_CHECK(qry_ctx,
     975             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     976             :                 break;
     977             :         }
     978             :         }
     979       13047 :         bat_iterator_end(&bi);
     980             :         /* if the number of unique values is equal to the bat count,
     981             :          * all values are necessarily distinct */
     982       13047 :         MT_lock_set(&b->theaplock);
     983       13047 :         if (h->nunique == BATcount(b) && !b->tkey) {
     984        9453 :                 b->tkey = true;
     985             :         }
     986       13047 :         if (ci->ncand == BATcount(b))
     987       12876 :                 b->tunique_est = (double) h->nunique;
     988       13047 :         MT_lock_unset(&b->theaplock);
     989       13047 :         TRC_DEBUG_IF(ACCELERATOR) {
     990           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR,
     991             :                                 "hash construction " LLFMT " usec\n", GDKusec() - t0);
     992           0 :                 HASHcollisions(b, h, __func__);
     993             :         }
     994             :         return h;
     995             : 
     996           0 :   bailout:
     997           0 :         bat_iterator_end(&bi);
     998           0 :         HEAPfree(&h->heaplink, true);
     999           0 :         HEAPfree(&h->heapbckt, true);
    1000           0 :         GDKfree(h);
    1001           0 :         return NULL;
    1002             : }
    1003             : 
    1004             : gdk_return
    1005     2681137 : BAThash(BAT *b)
    1006             : {
    1007     2681137 :         if (b->ttype == TYPE_void) {
    1008           0 :                 GDKerror("No hash on void type bats\n");
    1009           0 :                 return GDK_FAIL;
    1010             :         }
    1011     2681137 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1012           0 :                 GDKerror("No hash on msk type bats\n");
    1013           0 :                 return GDK_FAIL;
    1014             :         }
    1015     2681137 :         if (BATcheckhash(b)) {
    1016             :                 return GDK_SUCCEED;
    1017             :         }
    1018             : #ifdef __COVERITY__
    1019             :         MT_rwlock_wrlock(&b->thashlock);
    1020             : #else
    1021       12878 :         for (;;) {
    1022             :                 /* If multiple threads simultaneously try to build a
    1023             :                  * hash on a bat, e.g. in order to perform a join, it
    1024             :                  * may happen that one thread succeeds in obtaining the
    1025             :                  * write lock, then builds the hash, releases the lock,
    1026             :                  * acquires the read lock, and performs the join.  The
    1027             :                  * other threads may then still be attempting to acquire
    1028             :                  * the write lock.  But now they have to wait until the
    1029             :                  * read lock is released, which can be quite a long
    1030             :                  * time.  Especially if a second thread goes through the
    1031             :                  * same process as the first. */
    1032       12878 :                 if (MT_rwlock_wrtry(&b->thashlock))
    1033             :                         break;
    1034           2 :                 MT_sleep_ms(1);
    1035           2 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1036           2 :                         if (b->thash != NULL && b->thash != (Hash *) 1) {
    1037           2 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1038           2 :                                 return GDK_SUCCEED;
    1039             :                         }
    1040           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1041             :                 }
    1042             :         }
    1043             : #endif
    1044             :         /* we have the write lock */
    1045       12876 :         if (b->thash == NULL) {
    1046       12876 :                 struct canditer ci;
    1047       12876 :                 canditer_init(&ci, b, NULL);
    1048       12876 :                 if ((b->thash = BAThash_impl(b, &ci, "thash")) == NULL) {
    1049           0 :                         MT_rwlock_wrunlock(&b->thashlock);
    1050           0 :                         return GDK_FAIL;
    1051             :                 }
    1052             :         }
    1053       12876 :         MT_rwlock_wrunlock(&b->thashlock);
    1054       12876 :         return GDK_SUCCEED;
    1055             : }
    1056             : 
    1057             : /*
    1058             :  * The entry on which a value hashes can be calculated with the
    1059             :  * routine HASHprobe.
    1060             :  */
    1061             : BUN
    1062   449746124 : HASHprobe(const Hash *h, const void *v)
    1063             : {
    1064   864793926 :         switch (ATOMbasetype(h->type)) {
    1065        4067 :         case TYPE_bte:
    1066        4067 :                 return hash_bte(h, v);
    1067      141656 :         case TYPE_sht:
    1068      141656 :                 return hash_sht(h, v);
    1069   396363518 :         case TYPE_int:
    1070   396363518 :                 return hash_int(h, v);
    1071    34878694 :         case TYPE_lng:
    1072    34878694 :                 return hash_lng(h, v);
    1073             : #ifdef HAVE_HGE
    1074       31593 :         case TYPE_hge:
    1075       31593 :                 return hash_hge(h, v);
    1076             : #endif
    1077      150798 :         case TYPE_flt:
    1078      150798 :                 return hash_flt(h, v);
    1079       41055 :         case TYPE_dbl:
    1080       41055 :                 return hash_dbl(h, v);
    1081        2104 :         case TYPE_uuid:
    1082        2104 :                 return hash_uuid(h, v);
    1083    18132639 :         default:
    1084    18132639 :                 return hash_any(h, v);
    1085             :         }
    1086             : }
    1087             : 
    1088             : void
    1089      490327 : HASHappend_locked(BAT *b, BUN i, const void *v)
    1090             : {
    1091      490327 :         Hash *h = b->thash;
    1092      490327 :         if (h == NULL) {
    1093           0 :                 return;
    1094             :         }
    1095      490327 :         if (h == (Hash *) 1) {
    1096           0 :                 b->thash = NULL;
    1097           0 :                 doHASHdestroy(b, h);
    1098           0 :                 GDKclrerr();
    1099           0 :                 return;
    1100             :         }
    1101      490327 :         assert(i * h->width == h->heaplink.free);
    1102      490327 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1103           0 :                 b->thash = NULL;
    1104           0 :                 doHASHdestroy(b, h);
    1105           0 :                 GDKclrerr();
    1106           0 :                 return;
    1107             :         }
    1108      490327 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1109           0 :                 b->thash = NULL;
    1110           0 :                 doHASHdestroy(b, h);
    1111           0 :                 GDKclrerr();
    1112           0 :                 return;
    1113             :         }
    1114      490327 :         if (HASHwidth(i + 1) > h->width &&
    1115           0 :              HASHupgradehashheap(b) != GDK_SUCCEED) {
    1116           0 :                 GDKclrerr();
    1117           0 :                 return;
    1118             :         }
    1119      980651 :         if ((ATOMsize(b->ttype) > 2 &&
    1120      490324 :              HASHgrowbucket(b) != GDK_SUCCEED) ||
    1121      491461 :             ((i + 1) * h->width > h->heaplink.size &&
    1122        1134 :              HEAPextend(&h->heaplink,
    1123        1134 :                         i * h->width + GDK_mmap_pagesize,
    1124             :                         true) != GDK_SUCCEED)) {
    1125           0 :                 b->thash = NULL;
    1126           0 :                 HEAPfree(&h->heapbckt, true);
    1127           0 :                 HEAPfree(&h->heaplink, true);
    1128           0 :                 GDKfree(h);
    1129           0 :                 GDKclrerr();
    1130           0 :                 return;
    1131             :         }
    1132      490327 :         h->Link = h->heaplink.base;
    1133      490327 :         BUN c = HASHprobe(h, v);
    1134      490327 :         h->heaplink.free += h->width;
    1135      490327 :         BUN hb = HASHget(h, c);
    1136      490327 :         BUN hb2;
    1137      490327 :         BATiter bi = bat_iterator_nolock(b);
    1138      490327 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1139      490327 :         for (hb2 = hb;
    1140      687832 :              hb2 != BUN_NONE;
    1141      197505 :              hb2 = HASHgetlink(h, hb2)) {
    1142      360067 :                 if (atomcmp(v, BUNtail(bi, hb2)) == 0)
    1143             :                         break;
    1144             :         }
    1145      490327 :         h->nheads += hb == BUN_NONE;
    1146      490327 :         h->nunique += hb2 == BUN_NONE;
    1147      490327 :         HASHputlink(h, i, hb);
    1148      490327 :         HASHput(h, c, i);
    1149      490327 :         h->heapbckt.dirty = true;
    1150      490327 :         h->heaplink.dirty = true;
    1151             : }
    1152             : 
    1153             : BUN
    1154           0 : HASHappend(BAT *b, BUN i, const void *v)
    1155             : {
    1156           0 :         BUN nunique;
    1157           0 :         MT_rwlock_wrlock(&b->thashlock);
    1158           0 :         HASHappend_locked(b, i, v);
    1159           0 :         nunique = b->thash ? b->thash->nunique : 0;
    1160           0 :         MT_rwlock_wrunlock(&b->thashlock);
    1161           0 :         return nunique;
    1162             : }
    1163             : 
    1164             : /* insert value v at position p into the hash table of b */
    1165             : void
    1166   160495767 : HASHinsert_locked(BATiter *bi, BUN p, const void *v)
    1167             : {
    1168   160495767 :         BAT *b = bi->b;
    1169   160495767 :         Hash *h = b->thash;
    1170   160495767 :         if (h == NULL) {
    1171             :                 return;
    1172             :         }
    1173       19354 :         if (h == (Hash *) 1) {
    1174           0 :                 b->thash = NULL;
    1175           0 :                 doHASHdestroy(b, h);
    1176           0 :                 GDKclrerr();
    1177           0 :                 return;
    1178             :         }
    1179       19354 :         assert(p * h->width < h->heaplink.free);
    1180       19354 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1181           0 :                 b->thash = NULL;
    1182           0 :                 doHASHdestroy(b, h);
    1183           0 :                 GDKclrerr();
    1184           0 :                 return;
    1185             :         }
    1186       19354 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1187           0 :                 b->thash = NULL;
    1188           0 :                 doHASHdestroy(b, h);
    1189           0 :                 GDKclrerr();
    1190           0 :                 return;
    1191             :         }
    1192       19354 :         BUN c = HASHprobe(h, v);
    1193       19354 :         BUN hb = HASHget(h, c);
    1194       19354 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1195       19354 :         if (hb == BUN_NONE || hb < p) {
    1196             :                 /* bucket is empty, or bucket is used by lower numbered
    1197             :                  * position */
    1198        4400 :                 h->heaplink.dirty = true;
    1199        4400 :                 h->heapbckt.dirty = true;
    1200        4400 :                 HASHputlink(h, p, hb);
    1201        4400 :                 HASHput(h, c, p);
    1202        4400 :                 if (hb == BUN_NONE) {
    1203        2077 :                         h->nheads++;
    1204             :                 } else {
    1205        2722 :                         do {
    1206        2722 :                                 if (atomcmp(v, BUNtail(*bi, hb)) == 0) {
    1207             :                                         /* found another row with the
    1208             :                                          * same value, so don't
    1209             :                                          * increment nunique */
    1210             :                                         return;
    1211             :                                 }
    1212        1134 :                                 hb = HASHgetlink(h, hb);
    1213        1134 :                         } while (hb != BUN_NONE);
    1214             :                 }
    1215             :                 /* this is a new value */
    1216        2812 :                 h->nunique++;
    1217        2812 :                 return;
    1218             :         }
    1219             :         bool seen = false;
    1220      132594 :         for (;;) {
    1221      132594 :                 if (!seen)
    1222       15366 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1223      132594 :                 BUN hb2 = HASHgetlink(h, hb);
    1224      132594 :                 if (hb2 == BUN_NONE || hb2 < p) {
    1225       14954 :                         h->heaplink.dirty = true;
    1226       14954 :                         HASHputlink(h, p, hb2);
    1227       14954 :                         HASHputlink(h, hb, p);
    1228       15240 :                         while (!seen && hb2 != BUN_NONE) {
    1229         286 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1230         286 :                                 hb2 = HASHgetlink(h, hb2);
    1231             :                         }
    1232       14954 :                         if (!seen)
    1233         292 :                                 h->nunique++;
    1234       14954 :                         return;
    1235             :                 }
    1236             :                 hb = hb2;
    1237             :         }
    1238             : }
    1239             : 
    1240             : BUN
    1241           2 : HASHinsert(BATiter *bi, BUN p, const void *v)
    1242             : {
    1243           2 :         BUN nunique;
    1244           2 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1245           2 :         HASHinsert_locked(bi, p, v);
    1246           2 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1247           2 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1248           2 :         return nunique;
    1249             : }
    1250             : 
    1251             : /* delete value v at position p from the hash table of b */
    1252             : void
    1253   160515588 : HASHdelete_locked(BATiter *bi, BUN p, const void *v)
    1254             : {
    1255   160515588 :         BAT *b = bi->b;
    1256   160515588 :         Hash *h = b->thash;
    1257   160515588 :         if (h == NULL) {
    1258             :                 return;
    1259             :         }
    1260       19354 :         if (h == (Hash *) 1) {
    1261           0 :                 b->thash = NULL;
    1262           0 :                 doHASHdestroy(b, h);
    1263           0 :                 GDKclrerr();
    1264           0 :                 return;
    1265             :         }
    1266       19354 :         assert(p * h->width < h->heaplink.free);
    1267       19354 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1268           0 :                 b->thash = NULL;
    1269           0 :                 doHASHdestroy(b, h);
    1270           0 :                 GDKclrerr();
    1271           0 :                 return;
    1272             :         }
    1273       19354 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1274           0 :                 b->thash = NULL;
    1275           0 :                 doHASHdestroy(b, h);
    1276           0 :                 GDKclrerr();
    1277           0 :                 return;
    1278             :         }
    1279       19354 :         BUN c = HASHprobe(h, v);
    1280       19354 :         BUN hb = HASHget(h, c);
    1281       19354 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1282       19354 :         if (hb == p) {
    1283        3186 :                 BUN hb2 = HASHgetlink(h, p);
    1284        3186 :                 h->heaplink.dirty = true;
    1285        3186 :                 h->heapbckt.dirty = true;
    1286        3186 :                 HASHput(h, c, hb2);
    1287        3186 :                 HASHputlink(h, p, BUN_NONE);
    1288        3186 :                 if (hb2 == BUN_NONE) {
    1289        1452 :                         h->nheads--;
    1290             :                 } else {
    1291        2176 :                         do {
    1292        2176 :                                 if (atomcmp(v, BUNtail(*bi, hb2)) == 0) {
    1293             :                                         /* found another row with the
    1294             :                                          * same value, so don't
    1295             :                                          * decrement nunique below */
    1296             :                                         return;
    1297             :                                 }
    1298        1194 :                                 hb2 = HASHgetlink(h, hb2);
    1299        1194 :                         } while (hb2 != BUN_NONE);
    1300             :                 }
    1301             :                 /* no rows found with the same value, so number of
    1302             :                  * unique values is one lower */
    1303        2204 :                 h->nunique--;
    1304        2204 :                 return;
    1305             :         }
    1306             :         bool seen = false;
    1307             :         BUN links = 0;
    1308      127843 :         for (;;) {
    1309      127843 :                 if (!seen)
    1310       16348 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1311      127843 :                 BUN hb2 = HASHgetlink(h, hb);
    1312      127843 :                 assert(hb2 != BUN_NONE );
    1313      127843 :                 assert(hb2 < hb);
    1314      127843 :                 if (hb2 == p) {
    1315       16168 :                         for (hb2 = HASHgetlink(h, hb2);
    1316       16220 :                              !seen && hb2 != BUN_NONE;
    1317          52 :                              hb2 = HASHgetlink(h, hb2))
    1318          52 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1319       16168 :                         break;
    1320             :                 }
    1321      111675 :                 hb = hb2;
    1322      111675 :                 if (++links > hash_destroy_chain_length) {
    1323           0 :                         b->thash = NULL;
    1324           0 :                         doHASHdestroy(b, h);
    1325           0 :                         GDKclrerr();
    1326           0 :                         return;
    1327             :                 }
    1328             :         }
    1329       16168 :         h->heaplink.dirty = true;
    1330       16168 :         HASHputlink(h, hb, HASHgetlink(h, p));
    1331       16168 :         HASHputlink(h, p, BUN_NONE);
    1332       16168 :         if (!seen)
    1333          94 :                 h->nunique--;
    1334             : }
    1335             : 
    1336             : BUN
    1337           6 : HASHdelete(BATiter *bi, BUN p, const void *v)
    1338             : {
    1339           6 :         BUN nunique;
    1340           6 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1341           6 :         HASHdelete_locked(bi, p, v);
    1342           6 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1343           6 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1344           6 :         return nunique;
    1345             : }
    1346             : 
    1347             : BUN
    1348           0 : HASHlist(Hash *h, BUN i)
    1349             : {
    1350           0 :         BUN c = 1;
    1351           0 :         BUN j = HASHget(h, i);
    1352             : 
    1353           0 :         if (j == BUN_NONE)
    1354             :                 return 1;
    1355           0 :         while ((j = HASHgetlink(h, i)) != BUN_NONE) {
    1356           0 :                 c++;
    1357           0 :                 i = j;
    1358             :         }
    1359             :         return c;
    1360             : }
    1361             : 
    1362             : void
    1363    18709326 : HASHdestroy(BAT *b)
    1364             : {
    1365    18709326 :         if (b) {
    1366    18709326 :                 Hash *hs;
    1367    18709326 :                 MT_rwlock_wrlock(&b->thashlock);
    1368    18710176 :                 hs = b->thash;
    1369    18710176 :                 b->thash = NULL;
    1370    18710176 :                 MT_rwlock_wrunlock(&b->thashlock);
    1371    18706878 :                 doHASHdestroy(b, hs);
    1372             :         }
    1373    18708345 : }
    1374             : 
    1375             : void
    1376      352924 : HASHfree(BAT *b)
    1377             : {
    1378      352924 :         if (b) {
    1379      352924 :                 Hash *h;
    1380      352924 :                 MT_rwlock_wrlock(&b->thashlock);
    1381      352924 :                 if ((h = b->thash) != NULL && h != (Hash *) 1) {
    1382        1988 :                         bool rmheap = h->heaplink.dirty || h->heapbckt.dirty;
    1383        1988 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT " free hash %s\n",
    1384             :                                   ALGOBATPAR(b),
    1385             :                                   rmheap ? "removing" : "keeping");
    1386             : 
    1387        1988 :                         b->thash = rmheap ? NULL : (Hash *) 1;
    1388        1988 :                         HEAPfree(&h->heapbckt, rmheap);
    1389        1988 :                         HEAPfree(&h->heaplink, rmheap);
    1390        1988 :                         GDKfree(h);
    1391             :                 }
    1392      352924 :                 MT_rwlock_wrunlock(&b->thashlock);
    1393             :         }
    1394      352924 : }

Generated by: LCOV version 1.14