LCOV - code coverage report
Current view: top level - gdk - gdk_hash.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 556 770 72.2 %
Date: 2024-04-26 00:35:57 Functions: 19 23 82.6 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * - Hash Table Creation
      15             :  * The hash indexing scheme for BATs reserves a block of memory to
      16             :  * maintain the hash table and a collision list. A one-to-one mapping
      17             :  * exists between the BAT and the collision list using the BUN
      18             :  * index. NOTE: we alloc the link list as a parallel array to the BUN
      19             :  * array; hence the hash link array has the same size as
      20             :  * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
      21             :  * and delete to assume that there is hash space if there is BUN
      22             :  * space.
      23             :  *
      24             :  * The hash mask size is a power of two, so we can do bitwise AND on
      25             :  * the hash (integer) number to quickly find the head of the bucket
      26             :  * chain.  Clearly, the hash mask size is a crucial parameter. If we
      27             :  * know that the column is unique (tkey), we use direct hashing (mask
      28             :  * size ~= BATcount). Otherwise we dynamically determine the mask size
      29             :  * by starting out with mask size = BATcount/64 (just 1.5% of memory
      30             :  * storage overhead). Then we start building the hash table on the
      31             :  * first 25% of the BAT. As we aim for max-collisions list length of
      32             :  * 4, the list on 25% should not exceed length 1. So, if a small
      33             :  * number of collisssions occurs (mask/2) then we abandon the attempt
      34             :  * and restart with a mask that is 4 times larger. This converges
      35             :  * after three cycles to direct hashing.
      36             :  */
      37             : 
      38             : #include "monetdb_config.h"
      39             : #include "gdk.h"
      40             : #include "gdk_private.h"
      41             : 
      42             : static inline uint8_t __attribute__((__const__))
      43     1333870 : HASHwidth(BUN hashsize)
      44             : {
      45     1333870 :         (void) hashsize;
      46             : #ifdef BUN2
      47     1333870 :         if (hashsize <= (BUN) BUN2_NONE)
      48             :                 return BUN2;
      49             : #endif
      50             : #ifdef BUN8
      51         678 :         if (hashsize > (BUN) BUN4_NONE)
      52           0 :                 return BUN8;
      53             : #endif
      54             :         return BUN4;
      55             : }
      56             : 
      57             : static inline BUN __attribute__((__const__))
      58        6267 : hashmask(BUN m)
      59             : {
      60        6267 :         m |= m >> 1;
      61        6267 :         m |= m >> 2;
      62        6267 :         m |= m >> 4;
      63        6267 :         m |= m >> 8;
      64        6267 :         m |= m >> 16;
      65             : #if SIZEOF_BUN == 8
      66        6267 :         m |= m >> 32;
      67             : #endif
      68        6267 :         return m;
      69             : }
      70             : 
      71             : static inline void
      72      849866 : HASHclear(Hash *h)
      73             : {
      74             :         /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
      75             :          * are all equal to ~0, i.e., have all bits set,
      76             :          * we can use a simple memset() to clear the Hash,
      77             :          * rather than iteratively assigning individual
      78             :          * BUNi_NONE values in a for-loop
      79             :          */
      80      849866 :         memset(h->Bckt, 0xFF, h->nbucket * h->width);
      81      849866 : }
      82             : 
      83             : #define HASH_VERSION            6
      84             : /* this is only for the change of hash function of the floating point
      85             :  * types, the UUID type and the MBR type; if HASH_VERSION is increased
      86             :  * again from 6, the code associated with HASH_VERSION_NOUUID and
      87             :  * HASH_VERSION_NOMBR must be deleted */
      88             : #define HASH_VERSION_FLOAT      5
      89             : #define HASH_VERSION_NOMBR      4
      90             : #define HASH_VERSION_NOUUID     3
      91             : #define HASH_HEADER_SIZE        7       /* nr of size_t fields in header */
      92             : 
      93             : void
      94        9310 : doHASHdestroy(BAT *b, Hash *hs)
      95             : {
      96        9310 :         if (hs == (Hash *) 1) {
      97           9 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
      98             :                           BATDIR,
      99           9 :                           BBP_physical(b->batCacheid),
     100             :                           "thashl");
     101           9 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
     102             :                           BATDIR,
     103           9 :                           BBP_physical(b->batCacheid),
     104             :                           "thashb");
     105        9301 :         } else if (hs) {
     106        9301 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": removing%s hash\n", ALGOBATPAR(b), *(size_t *) hs->heapbckt.base & (1 << 24) ? " persisted" : "");
     107        9301 :                 HEAPfree(&hs->heapbckt, true);
     108        9308 :                 HEAPfree(&hs->heaplink, true);
     109        9311 :                 GDKfree(hs);
     110             :         }
     111        9319 : }
     112             : 
     113             : gdk_return
     114      849806 : HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
     115             : {
     116      849806 :         if (h->width == 0)
     117      839669 :                 h->width = HASHwidth(size);
     118             : 
     119      849806 :         if (!bcktonly) {
     120      839017 :                 if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED)
     121             :                         return GDK_FAIL;
     122      839098 :                 h->heaplink.free = size * h->width;
     123      839098 :                 h->heaplink.dirty = true;
     124      839098 :                 h->Link = h->heaplink.base;
     125             :         }
     126      849887 :         if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width) != GDK_SUCCEED) {
     127           0 :                 if (!bcktonly) {
     128           0 :                         HEAPfree(&h->heaplink, true);
     129           0 :                         h->heaplink.free = 0;
     130           0 :                         h->Link = NULL;
     131             :                 }
     132           0 :                 return GDK_FAIL;
     133             :         }
     134      849882 :         h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     135      849882 :         h->heapbckt.dirty = true;
     136      849882 :         h->nbucket = mask;
     137      849882 :         if (mask & (mask - 1)) {
     138        6083 :                 h->mask2 = hashmask(mask);
     139        6083 :                 h->mask1 = h->mask2 >> 1;
     140             :         } else {
     141             :                 /* mask is a power of two */
     142      843799 :                 h->mask1 = mask - 1;
     143      843799 :                 h->mask2 = h->mask1 << 1 | 1;
     144             :         }
     145      849882 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     146      849882 :         h->type = tpe;
     147      849882 :         HASHclear(h);           /* zero the mask */
     148      849866 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     149      849866 :         ((size_t *) h->heapbckt.base)[1] = (size_t) size;
     150      849866 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     151      849866 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     152      849866 :         ((size_t *) h->heapbckt.base)[4] = (size_t) count;
     153      849866 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     154      849866 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     155      849866 :         TRC_DEBUG(ACCELERATOR,
     156             :                   "create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width);
     157             :         return GDK_SUCCEED;
     158             : }
     159             : 
     160             : /* collect HASH statistics for analysis */
     161             : static void
     162           0 : HASHcollisions(BAT *b, Hash *h, const char *func)
     163             : {
     164           0 :         lng cnt, entries = 0, max = 0;
     165           0 :         double total = 0;
     166           0 :         BUN p, i, j;
     167             : 
     168           0 :         if (b == 0 || h == 0)
     169             :                 return;
     170           0 :         for (i = 0, j = h->nbucket; i < j; i++)
     171           0 :                 if ((p = HASHget(h, i)) != BUN_NONE) {
     172           0 :                         entries++;
     173           0 :                         cnt = 0;
     174           0 :                         for (; p != BUN_NONE; p = HASHgetlink(h, p))
     175           0 :                                 cnt++;
     176           0 :                         if (cnt > max)
     177             :                                 max = cnt;
     178           0 :                         total += cnt;
     179             :                 }
     180           0 :         TRC_DEBUG_ENDIF(ACCELERATOR,
     181             :                         "%s(" ALGOBATFMT "): statistics " BUNFMT ", "
     182             :                         "entries " LLFMT ", nunique " BUNFMT ", "
     183             :                         "nbucket " BUNFMT ", max " LLFMT ", avg %2.6f;\n",
     184             :                         func, ALGOBATPAR(b), BATcount(b), entries,
     185             :                         h->nunique, h->nbucket, max,
     186             :                         entries == 0 ? 0 : total / entries);
     187             : }
     188             : 
     189             : static gdk_return
     190           0 : HASHupgradehashheap(BAT *b)
     191             : {
     192             : #if defined(BUN2) || defined(BUN8)
     193           0 :         Hash *h = b->thash;
     194           0 :         int nwidth = h->width << 1;
     195           0 :         BUN i;
     196             : 
     197           0 :         assert(nwidth <= SIZEOF_BUN);
     198           0 :         assert((nwidth & (nwidth - 1)) == 0);
     199             : 
     200           0 :         if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED ||
     201           0 :             HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) {
     202           0 :                 b->thash = NULL;
     203           0 :                 doHASHdestroy(b, h);
     204           0 :                 return GDK_FAIL;
     205             :         }
     206           0 :         h->Link = h->heaplink.base;
     207           0 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     208           0 :         switch (nwidth) {
     209           0 :         case BUN4:
     210             : #ifdef BUN2
     211           0 :                 switch (h->width) {
     212           0 :                 case BUN2:
     213           0 :                         i = h->heaplink.free / h->width;
     214           0 :                         h->heaplink.free = i * nwidth;
     215           0 :                         while (i > 0) {
     216           0 :                                 i--;
     217           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     218           0 :                                 ((BUN4type *) h->Link)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     219             :                         }
     220           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     221           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     222           0 :                         while (i > 0) {
     223           0 :                                 i--;
     224           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     225           0 :                                 ((BUN4type *) h->Bckt)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     226             :                         }
     227           0 :                         h->heapbckt.dirty = true;
     228           0 :                         h->heaplink.dirty = true;
     229           0 :                         break;
     230             :                 }
     231             : #endif
     232             :                 break;
     233             : #ifdef BUN8
     234           0 :         case BUN8:
     235           0 :                 switch (h->width) {
     236             : #ifdef BUN2
     237           0 :                 case BUN2:
     238           0 :                         i = h->heaplink.free / h->width;
     239           0 :                         h->heaplink.free = i * nwidth;
     240           0 :                         while (i > 0) {
     241           0 :                                 i--;
     242           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     243           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     244             :                         }
     245           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     246           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     247           0 :                         while (i > 0) {
     248           0 :                                 i--;
     249           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     250           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     251             :                         }
     252           0 :                         h->heapbckt.dirty = true;
     253           0 :                         h->heaplink.dirty = true;
     254           0 :                         break;
     255             : #endif
     256           0 :                 case BUN4:
     257           0 :                         i = h->heaplink.free / h->width;
     258           0 :                         h->heaplink.free = i * nwidth;
     259           0 :                         while (i > 0) {
     260           0 :                                 i--;
     261           0 :                                 BUN4type v = ((BUN4type *) h->Link)[i];
     262           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     263             :                         }
     264           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     265           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     266           0 :                         while (i > 0) {
     267           0 :                                 i--;
     268           0 :                                 BUN4type v = ((BUN4type *) h->Bckt)[i];
     269           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     270             :                         }
     271           0 :                         h->heapbckt.dirty = true;
     272           0 :                         h->heaplink.dirty = true;
     273           0 :                         break;
     274             :                 }
     275             :                 break;
     276             : #endif
     277             :         }
     278           0 :         h->width = nwidth;
     279             : #else
     280             :         (void) b;
     281             : #endif
     282           0 :         return GDK_SUCCEED;
     283             : }
     284             : 
     285             : /* write/remove the bit into/from the hash file that indicates the hash
     286             :  * is good to go; the bit is the last part to be written and the first
     287             :  * to be removed */
     288             : static inline gdk_return
     289      563201 : HASHfix(Hash *h, bool save, bool dosync)
     290             : {
     291      563201 :         if (!h->heapbckt.dirty && !h->heaplink.dirty) {
     292       22492 :                 const size_t mask = (size_t) 1 << 24;
     293       22492 :                 if (((size_t *) h->heapbckt.base)[0] & mask) {
     294       10169 :                         if (save)
     295             :                                 return GDK_SUCCEED;
     296       10169 :                         ((size_t *) h->heapbckt.base)[0] &= ~mask;
     297             :                 } else {
     298       12323 :                         if (!save)
     299             :                                 return GDK_SUCCEED;
     300       12323 :                         ((size_t *) h->heapbckt.base)[0] |= mask;
     301             :                 }
     302       22492 :                 if (h->heapbckt.storage == STORE_MEM) {
     303       22484 :                         gdk_return rc = GDK_FAIL;
     304       22484 :                         int fd = GDKfdlocate(h->heapbckt.farmid, h->heapbckt.filename, "rb+", NULL);
     305       22484 :                         if (fd >= 0) {
     306       22484 :                                 if (write(fd, h->heapbckt.base, SIZEOF_SIZE_T) == SIZEOF_SIZE_T) {
     307       22484 :                                         if (dosync &&
     308       22484 :                                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
     309             : #if defined(NATIVE_WIN32)
     310             :                                                 _commit(fd);
     311             : #elif defined(HAVE_FDATASYNC)
     312          20 :                                                 fdatasync(fd);
     313             : #elif defined(HAVE_FSYNC)
     314             :                                                 fsync(fd);
     315             : #endif
     316             :                                         }
     317             :                                         rc = GDK_SUCCEED;
     318             :                                 }
     319       22484 :                                 close(fd);
     320             :                         }
     321       22484 :                         if (rc != GDK_SUCCEED)
     322           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     323       22484 :                         return rc;
     324             :                 } else {
     325           8 :                         if (dosync &&
     326          14 :                             !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK) &&
     327           7 :                             MT_msync(h->heapbckt.base, SIZEOF_SIZE_T) < 0) {
     328           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     329           0 :                                 return GDK_FAIL;
     330             :                         }
     331             :                 }
     332             :         }
     333             :         return GDK_SUCCEED;
     334             : }
     335             : 
     336             : static gdk_return
     337      484035 : HASHgrowbucket(BAT *b)
     338             : {
     339      484035 :         Hash *h = b->thash;
     340      484035 :         BUN nbucket;
     341      484035 :         BUN onbucket = h->nbucket;
     342      484035 :         lng t0 = 0;
     343             : 
     344      484035 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     345             : 
     346             :         /* only needed to fix hash tables built before this fix was
     347             :          * introduced */
     348      484035 :         if (h->width < SIZEOF_BUN &&
     349      484036 :             ((BUN) 1 << (h->width * 8)) - 1 <= h->mask2 &&
     350           0 :             HASHupgradehashheap(b) != GDK_SUCCEED)
     351             :                 return GDK_FAIL;
     352             : 
     353      484035 :         h->heapbckt.dirty = true;
     354      484035 :         h->heaplink.dirty = true;
     355      731542 :         while (h->nunique >= (nbucket = h->nbucket) * 7 / 8) {
     356      247507 :                 BUN new = h->nbucket;
     357      247507 :                 BUN old = new & h->mask1;
     358      247507 :                 BUN mask = h->mask1 + 1; /* == h->mask2 - h->mask1 */
     359             : 
     360      247507 :                 assert(h->heapbckt.free == nbucket * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T);
     361      247507 :                 if (h->heapbckt.free + h->width > h->heapbckt.size) {
     362        3542 :                         if (HEAPextend(&h->heapbckt,
     363             :                                        h->heapbckt.size + GDK_mmap_pagesize,
     364             :                                        true) != GDK_SUCCEED) {
     365           0 :                                 return GDK_FAIL;
     366             :                         }
     367        3542 :                         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     368             :                 }
     369      247507 :                 assert(h->heapbckt.free + h->width <= h->heapbckt.size);
     370      247507 :                 if (h->nbucket == h->mask2) {
     371         275 :                         h->mask1 = h->mask2;
     372         275 :                         h->mask2 |= h->mask2 << 1;
     373         275 :                         if (h->width < SIZEOF_BUN &&
     374         275 :                             h->mask2 == ((BUN) 1 << (h->width * 8)) - 1) {
     375             :                                 /* time to widen the hash table */
     376           0 :                                 if (HASHupgradehashheap(b) != GDK_SUCCEED)
     377             :                                         return GDK_FAIL;
     378             :                         }
     379             :                 }
     380      247507 :                 h->nbucket++;
     381      247507 :                 h->heapbckt.free += h->width;
     382      247507 :                 BUN lold, lnew, hb;
     383      247507 :                 lold = lnew = BUN_NONE;
     384      247507 :                 BATiter bi = bat_iterator(b);
     385      247507 :                 if ((hb = HASHget(h, old)) != BUN_NONE) {
     386      217837 :                         h->nheads--;
     387      333727 :                         do {
     388      333727 :                                 const void *v = BUNtail(bi, hb);
     389      333727 :                                 BUN hsh = ATOMhash(h->type, v);
     390      333727 :                                 assert((hsh & (mask - 1)) == old);
     391      333727 :                                 if (hsh & mask) {
     392             :                                         /* move to new list */
     393      160080 :                                         if (lnew == BUN_NONE) {
     394      153967 :                                                 HASHput(h, new, hb);
     395      153967 :                                                 h->nheads++;
     396             :                                         } else
     397        6113 :                                                 HASHputlink(h, lnew, hb);
     398             :                                         lnew = hb;
     399             :                                 } else {
     400             :                                         /* move to old list */
     401      173647 :                                         if (lold == BUN_NONE) {
     402      141803 :                                                 h->nheads++;
     403      141803 :                                                 HASHput(h, old, hb);
     404             :                                         } else
     405       31844 :                                                 HASHputlink(h, lold, hb);
     406             :                                         lold = hb;
     407             :                                 }
     408      333727 :                                 hb = HASHgetlink(h, hb);
     409      333727 :                         } while (hb != BUN_NONE);
     410             :                 }
     411      247507 :                 bat_iterator_end(&bi);
     412      247507 :                 if (lnew == BUN_NONE)
     413       93540 :                         HASHput(h, new, BUN_NONE);
     414             :                 else
     415      153967 :                         HASHputlink(h, lnew, BUN_NONE);
     416      247507 :                 if (lold == BUN_NONE)
     417      105704 :                         HASHput(h, old, BUN_NONE);
     418             :                 else
     419      141803 :                         HASHputlink(h, lold, BUN_NONE);
     420             :         }
     421      484035 :         TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
     422           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
     423             :                         " -> " BUNFMT " buckets (" LLFMT " usec)\n",
     424             :                         ALGOBATPAR(b),
     425             :                         onbucket, h->nbucket, GDKusec() - t0);
     426           0 :                 HASHcollisions(b, h, __func__);
     427             :         }
     428             :         return GDK_SUCCEED;
     429             : }
     430             : 
     431             : /* Return TRUE if we have a hash on the tail, even if we need to read
     432             :  * one from disk.
     433             :  *
     434             :  * Note that the b->thash pointer can be NULL, meaning there is no
     435             :  * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist
     436             :  * on disk; or a valid pointer to a loaded hash.  These values are
     437             :  * maintained here, in the HASHdestroy and HASHfree functions, and in
     438             :  * BBPdiskscan during initialization. */
     439             : bool
     440    70971546 : BATcheckhash(BAT *b)
     441             : {
     442    70971546 :         lng t = 0;
     443    70971546 :         Hash *h;
     444             : 
     445    70971546 :         MT_rwlock_rdlock(&b->thashlock);
     446    71025178 :         h = b->thash;
     447    71025178 :         MT_rwlock_rdunlock(&b->thashlock);
     448    71011976 :         if (h == (Hash *) 1) {
     449             :                 /* but when we want to change it, we need the lock */
     450         419 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec();
     451         419 :                 MT_rwlock_wrlock(&b->thashlock);
     452         419 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec() - t;
     453             :                 /* if still 1 now that we have the lock, we can update */
     454         419 :                 if (b->thash == (Hash *) 1) {
     455         419 :                         int fd;
     456             : 
     457         419 :                         assert(!GDKinmemory(b->theap->farmid));
     458         419 :                         b->thash = NULL;
     459         419 :                         if ((h = GDKzalloc(sizeof(*h))) != NULL &&
     460         419 :                             (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
     461         419 :                             (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) {
     462         419 :                                 const char *nme = BBP_physical(b->batCacheid);
     463         419 :                                 strconcat_len(h->heaplink.filename,
     464             :                                               sizeof(h->heaplink.filename),
     465             :                                               nme, ".thashl", NULL);
     466         419 :                                 strconcat_len(h->heapbckt.filename,
     467             :                                               sizeof(h->heapbckt.filename),
     468             :                                               nme, ".thashb", NULL);
     469         419 :                                 h->heaplink.storage = STORE_INVALID;
     470         419 :                                 h->heaplink.newstorage = STORE_INVALID;
     471         419 :                                 h->heapbckt.storage = STORE_INVALID;
     472         419 :                                 h->heapbckt.newstorage = STORE_INVALID;
     473             : 
     474             :                                 /* check whether a persisted hash can be found */
     475         419 :                                 if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) {
     476         419 :                                         size_t hdata[HASH_HEADER_SIZE];
     477         419 :                                         struct stat st;
     478             : 
     479         419 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     480         419 :                                             (hdata[0] == (
     481             : #ifdef PERSISTENTHASH
     482             :                                                     ((size_t) 1 << 24) |
     483             : #endif
     484             :                                                     HASH_VERSION)
     485             : #ifdef HASH_VERSION_NOUUID
     486             :                                              /* if not uuid, also allow previous version */
     487         210 :                                              || (hdata[0] == (
     488             : #ifdef PERSISTENTHASH
     489             :                                                          ((size_t) 1 << 24) |
     490             : #endif
     491           0 :                                                          HASH_VERSION_NOUUID) &&
     492           0 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     493           0 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0 &&
     494           0 :                                                  strcmp(ATOMname(b->ttype), "uuid") != 0 &&
     495           0 :                                                  strcmp(ATOMname(b->ttype), "mbr") != 0)
     496             : #endif
     497             : #ifdef HASH_VERSION_NOMBR
     498             :                                              /* if not uuid, also allow previous version */
     499         210 :                                              || (hdata[0] == (
     500             : #ifdef PERSISTENTHASH
     501             :                                                          ((size_t) 1 << 24) |
     502             : #endif
     503         120 :                                                          HASH_VERSION_NOMBR) &&
     504         120 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     505         120 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0 &&
     506         120 :                                                  strcmp(ATOMname(b->ttype), "mbr") != 0)
     507             : #endif
     508             : #ifdef HASH_VERSION_FLOAT
     509             :                                              /* if not floating point, also allow previous version */
     510          90 :                                              || (hdata[0] == (
     511             : #ifdef PERSISTENTHASH
     512             :                                                          ((size_t) 1 << 24) |
     513             : #endif
     514          89 :                                                          HASH_VERSION_FLOAT) &&
     515          89 :                                                  strcmp(ATOMname(b->ttype), "flt") != 0 &&
     516          89 :                                                  strcmp(ATOMname(b->ttype), "dbl") != 0)
     517             : #endif
     518         418 :                                                     ) &&
     519         418 :                                             hdata[1] > 0 &&
     520             :                                             (
     521             : #ifdef BUN2
     522         418 :                                                     hdata[3] == BUN2 ||
     523             : #endif
     524             :                                                     hdata[3] == BUN4
     525             : #ifdef BUN8
     526             :                                                     || hdata[3] == BUN8
     527             : #endif
     528         418 :                                                     ) &&
     529         836 :                                             hdata[4] == (size_t) BATcount(b) &&
     530         418 :                                             fstat(fd, &st) == 0 &&
     531         836 :                                             st.st_size >= (off_t) (h->heapbckt.size = h->heapbckt.free = (h->nbucket = (BUN) hdata[2]) * (BUN) (h->width = (uint8_t) hdata[3]) + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
     532         418 :                                             close(fd) == 0 &&
     533         836 :                                             (fd = GDKfdlocate(h->heaplink.farmid, nme, "rb+", "thashl")) >= 0 &&
     534         418 :                                             fstat(fd, &st) == 0 &&
     535         418 :                                             st.st_size > 0 &&
     536         836 :                                             st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
     537         418 :                                             HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) {
     538         418 :                                                 if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) {
     539         418 :                                                         if (h->nbucket & (h->nbucket - 1)) {
     540         184 :                                                                 h->mask2 = hashmask(h->nbucket);
     541         184 :                                                                 h->mask1 = h->mask2 >> 1;
     542             :                                                         } else {
     543         234 :                                                                 h->mask1 = h->nbucket - 1;
     544         234 :                                                                 h->mask2 = h->mask1 << 1 | 1;
     545             :                                                         }
     546         418 :                                                         h->nunique = hdata[5];
     547         418 :                                                         h->nheads = hdata[6];
     548         418 :                                                         h->type = ATOMtype(b->ttype);
     549         418 :                                                         if (h->width < SIZEOF_BUN &&
     550         418 :                                                             ((BUN) 1 << (8 * h->width)) - 1 > h->nbucket) {
     551         418 :                                                                 close(fd);
     552         418 :                                                                 h->Link = h->heaplink.base;
     553         418 :                                                                 h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     554         418 :                                                                 h->heaplink.parentid = b->batCacheid;
     555         418 :                                                                 h->heapbckt.parentid = b->batCacheid;
     556         418 :                                                                 h->heaplink.dirty = false;
     557         418 :                                                                 h->heapbckt.dirty = false;
     558         418 :                                                                 b->thash = h;
     559         418 :                                                                 h->heapbckt.hasfile = true;
     560         418 :                                                                 h->heaplink.hasfile = true;
     561         418 :                                                                 TRC_DEBUG(ACCELERATOR,
     562             :                                                                           ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
     563         418 :                                                                 MT_rwlock_wrunlock(&b->thashlock);
     564         418 :                                                                 return true;
     565             :                                                         }
     566             :                                                         /* if h->nbucket
     567             :                                                          * equals the
     568             :                                                          * BUN_NONE
     569             :                                                          * representation
     570             :                                                          * for the
     571             :                                                          * current hash
     572             :                                                          * width (was
     573             :                                                          * possible in
     574             :                                                          * previous
     575             :                                                          * iterations of
     576             :                                                          * the code),
     577             :                                                          * then we can't
     578             :                                                          * use the hash
     579             :                                                          * since we
     580             :                                                          * can't
     581             :                                                          * distinguish
     582             :                                                          * between
     583             :                                                          * end-of-list
     584             :                                                          * and a valid
     585             :                                                          * link */
     586           0 :                                                         HEAPfree(&h->heapbckt, false);
     587             :                                                 }
     588           0 :                                                 HEAPfree(&h->heaplink, false);
     589             :                                         }
     590           1 :                                         close(fd);
     591             :                                         /* unlink unusable file */
     592           1 :                                         GDKunlink(h->heaplink.farmid, BATDIR, nme, "thashl");
     593           1 :                                         GDKunlink(h->heapbckt.farmid, BATDIR, nme, "thashb");
     594           1 :                                         h->heapbckt.hasfile = false;
     595           1 :                                         h->heaplink.hasfile = false;
     596             :                                 }
     597             :                         }
     598           1 :                         GDKfree(h);
     599           1 :                         GDKclrerr();    /* we're not currently interested in errors */
     600             :                 }
     601           1 :                 h = b->thash;
     602           1 :                 MT_rwlock_wrunlock(&b->thashlock);
     603             :         }
     604    71011558 :         if (h != NULL) {
     605     3827224 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": already has hash, waited " LLFMT " usec\n", ALGOBATPAR(b), t);
     606             :         }
     607    71011558 :         return h != NULL;
     608             : }
     609             : 
     610             : static void
     611       12861 : BAThashsave_intern(BAT *b, bool dosync)
     612             : {
     613       12861 :         Hash *h;
     614       12861 :         lng t0 = 0;
     615             : 
     616       12861 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     617             : 
     618       12861 :         if ((h = b->thash) != NULL) {
     619             : #ifndef PERSISTENTHASH
     620             :                 /* no need to sync if not persistent */
     621             :                 dosync = false;
     622             : #endif
     623             : 
     624             :                 /* only persist if parent BAT hasn't changed in the
     625             :                  * mean time */
     626       12861 :                 if (!b->theap->dirty &&
     627       12323 :                     ((size_t *) h->heapbckt.base)[1] == BATcount(b) &&
     628       24646 :                     ((size_t *) h->heapbckt.base)[4] == BATcount(b) &&
     629       24646 :                     HEAPsave(&h->heaplink, h->heaplink.filename, NULL, dosync, h->heaplink.free, NULL) == GDK_SUCCEED &&
     630       12323 :                     HEAPsave(&h->heapbckt, h->heapbckt.filename, NULL, dosync, h->heapbckt.free, NULL) == GDK_SUCCEED) {
     631       12323 :                         h->heaplink.dirty = false;
     632       12323 :                         h->heapbckt.dirty = false;
     633       12323 :                         h->heaplink.hasfile = true;
     634       12323 :                         h->heapbckt.hasfile = true;
     635       12323 :                         gdk_return rc = HASHfix(h, true, dosync);
     636       12323 :                         TRC_DEBUG(ACCELERATOR,
     637             :                                   ALGOBATFMT ": persisting hash %s%s (" LLFMT " usec)%s\n", ALGOBATPAR(b), h->heapbckt.filename, dosync ? "" : " no sync", GDKusec() - t0, rc == GDK_SUCCEED ? "" : " failed");
     638             :                 }
     639       12861 :                 GDKclrerr();
     640             :         }
     641       12861 : }
     642             : 
     643             : void
     644       12861 : BAThashsave(BAT *b, bool dosync)
     645             : {
     646       12861 :         Hash *h = b->thash;
     647       12861 :         if (h == NULL)
     648             :                 return;
     649       12861 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     650       12861 :         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
     651       12861 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     652       12861 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     653       12861 :         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
     654       12861 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     655       12861 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     656       12861 :         BAThashsave_intern(b, dosync);
     657             : }
     658             : 
     659             : #define EQbte(a, b)     ((a) == (b))
     660             : #define EQsht(a, b)     ((a) == (b))
     661             : #define EQint(a, b)     ((a) == (b))
     662             : #define EQlng(a, b)     ((a) == (b))
     663             : #ifdef HAVE_HGE
     664             : #define EQhge(a, b)     ((a) == (b))
     665             : #endif
     666             : #define EQflt(a, b)     (is_flt_nil(a) ? is_flt_nil(b) : (a) == (b))
     667             : #define EQdbl(a, b)     (is_dbl_nil(a) ? is_dbl_nil(b) : (a) == (b))
     668             : #ifdef HAVE_HGE
     669             : #define EQuuid(a, b)    ((a).h == (b).h)
     670             : #else
     671             : #define EQuuid(a, b)    (memcmp((a).u, (b).u, UUID_SIZE) == 0)
     672             : #endif
     673             : 
     674             : #define starthash(TYPE)                                                 \
     675             :         do {                                                            \
     676             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     677             :                 TIMEOUT_LOOP(p, qry_ctx) {                              \
     678             :                         hget = HASHget(h, c);                           \
     679             :                         if (hget == BUN_NONE) {                         \
     680             :                                 if (h->nheads == maxslots)           \
     681             :                                         TIMEOUT_LOOP_BREAK; /* mask too full */ \
     682             :                                 h->nheads++;                         \
     683             :                                 h->nunique++;                                \
     684             :                         } else {                                        \
     685             :                                 for (hb = hget;                         \
     686             :                                      hb != BUN_NONE;                    \
     687             :                                      hb = HASHgetlink(h, hb)) {         \
     688             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     689             :                                                 break;                  \
     690             :                                 }                                       \
     691             :                                 h->nunique += hb == BUN_NONE;                \
     692             :                         }                                               \
     693             :                         HASHputlink(h, p, hget);                        \
     694             :                         HASHput(h, c, p);                               \
     695             :                         o = canditer_next(ci);                          \
     696             :                 }                                                       \
     697             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     698             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     699             :         } while (0)
     700             : #define finishhash(TYPE)                                                \
     701             :         do {                                                            \
     702             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     703             :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {                       \
     704             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     705             :                         hget = HASHget(h, c);                           \
     706             :                         h->nheads += hget == BUN_NONE;                       \
     707             :                         if (!hascand) {                                 \
     708             :                                 for (hb = hget;                         \
     709             :                                      hb != BUN_NONE;                    \
     710             :                                      hb = HASHgetlink(h, hb)) {         \
     711             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     712             :                                                 break;                  \
     713             :                                 }                                       \
     714             :                                 h->nunique += hb == BUN_NONE;                \
     715             :                                 o = canditer_next_dense(ci);            \
     716             :                         } else {                                        \
     717             :                                 o = canditer_next(ci);                  \
     718             :                         }                                               \
     719             :                         HASHputlink(h, p, hget);                        \
     720             :                         HASHput(h, c, p);                               \
     721             :                         p++;                                            \
     722             :                 }                                                       \
     723             :                 TIMEOUT_CHECK(qry_ctx,                                  \
     724             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     725             :         } while (0)
     726             : 
     727             : /* Internal function to create a hash table for the given BAT b.
     728             :  * If a candidate list s is also given, the hash table is specific for
     729             :  * the combination of the two: only values from b that are referred to
     730             :  * by s are included in the hash table, so if a result is found when
     731             :  * searching the hash table, the result is a candidate. */
     732             : Hash *
     733       10789 : BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext)
     734             : {
     735       10789 :         lng t0 = 0;
     736       10789 :         BUN cnt1;
     737       10789 :         BUN mask, maxmask = 0;
     738       10789 :         BUN p, c;
     739       10789 :         oid o;
     740       10789 :         BUN hget, hb;
     741       10789 :         Hash *h = NULL;
     742       10789 :         const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
     743       10789 :         BATiter bi = bat_iterator(b);
     744       10796 :         unsigned int tpe = ATOMbasetype(bi.type);
     745       10796 :         bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
     746             : 
     747       10796 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     748             : 
     749       10797 :         assert(strcmp(ext, "thash") != 0 || !hascand);
     750       10797 :         assert(bi.type != TYPE_msk);
     751             : 
     752       21457 :         MT_thread_setalgorithm(hascand ? "create hash with candidates" : "create hash");
     753       10801 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     754       10801 :         TRC_DEBUG(ACCELERATOR,
     755             :                   ALGOBATFMT ": create hash;\n", ALGOBATPAR(b));
     756       10792 :         if (bi.type == TYPE_void) {
     757           0 :                 if (is_oid_nil(b->tseqbase)) {
     758           0 :                         TRC_DEBUG(ACCELERATOR,
     759             :                                   "cannot create hash-table on void-NIL column.\n");
     760           0 :                         GDKerror("no hash on void/nil column\n");
     761           0 :                         bat_iterator_end(&bi);
     762           0 :                         return NULL;
     763             :                 }
     764           0 :                 TRC_DEBUG(ACCELERATOR,
     765             :                           "creating hash-table on void column..\n");
     766           0 :                 assert(0);
     767             :                 tpe = TYPE_void;
     768             :         }
     769             : 
     770       10792 :         if ((h = GDKzalloc(sizeof(*h))) == NULL ||
     771       10800 :             (h->heaplink.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0 ||
     772       10800 :             (h->heapbckt.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, bi.type, hashheap)) < 0) {
     773           0 :                 GDKfree(h);
     774           0 :                 bat_iterator_end(&bi);
     775           0 :                 return NULL;
     776             :         }
     777       10798 :         h->width = HASHwidth(BATcapacity(b));
     778       10798 :         h->heaplink.dirty = true;
     779       10798 :         h->heapbckt.dirty = true;
     780       10798 :         strconcat_len(h->heaplink.filename, sizeof(h->heaplink.filename),
     781             :                       nme, ".", ext, "l", NULL);
     782       10799 :         strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
     783             :                       nme, ".", ext, "b", NULL);
     784       10794 :         h->heapbckt.parentid = b->batCacheid;
     785       10794 :         h->heaplink.parentid = b->batCacheid;
     786       10794 :         if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
     787       10794 :                       h->width) != GDK_SUCCEED) {
     788           0 :                 GDKfree(h);
     789           0 :                 bat_iterator_end(&bi);
     790           0 :                 return NULL;
     791             :         }
     792       10796 :         h->heaplink.free = ci->ncand * h->width;
     793       10796 :         h->Link = h->heaplink.base;
     794             : #ifndef NDEBUG
     795             :         /* clear unused part of Link array */
     796       10796 :         if (h->heaplink.size > h->heaplink.free)
     797        9585 :                 memset(h->heaplink.base + h->heaplink.free, 0,
     798             :                        h->heaplink.size - h->heaplink.free);
     799             : #endif
     800             : 
     801             :         /* determine hash mask size */
     802       10796 :         cnt1 = 0;
     803       10796 :         if (ATOMsize(tpe) == 1) {
     804             :                 /* perfect hash for one-byte sized atoms */
     805             :                 mask = (1 << 8);
     806       10786 :         } else if (ATOMsize(tpe) == 2) {
     807             :                 /* perfect hash for two-byte sized atoms */
     808             :                 mask = (1 << 16);
     809       10749 :         } else if (bi.key || ci->ncand <= 4096) {
     810             :                 /* if key, or if small, don't bother dynamically
     811             :                  * adjusting the hash mask */
     812       10592 :                 mask = HASHmask(ci->ncand);
     813         157 :         } else if (!hascand && bi.unique_est != 0) {
     814         157 :                 mask = (BUN) (bi.unique_est * 1.15); /* about 8/7 */
     815             :         } else {
     816             :                 /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
     817             :                  * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
     818             :                  * are too many collisions we try HASHmask(ci->ncand)/64,
     819             :                  * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
     820             :                  * HASHmask(ci->ncand), but we might skip some of these if
     821             :                  * there are many distinct values.  */
     822           0 :                 maxmask = HASHmask(ci->ncand);
     823           0 :                 mask = maxmask >> 6;
     824           0 :                 while (mask > 4096)
     825           0 :                         mask >>= 2;
     826             :                 /* try out on first 25% of b */
     827           0 :                 cnt1 = ci->ncand >> 2;
     828             :         }
     829             : 
     830       10796 :         o = canditer_next(ci);  /* always one ahead */
     831           0 :         for (;;) {
     832       10791 :                 lng t1 = 0;
     833       10791 :                 TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
     834       10791 :                 BUN maxslots = (mask >> 3) - 1;   /* 1/8 full is too full */
     835             : 
     836       10791 :                 h->nheads = 0;
     837       10791 :                 h->nunique = 0;
     838       10791 :                 p = 0;
     839       10791 :                 HEAPfree(&h->heapbckt, true);
     840             :                 /* create the hash structures */
     841       21589 :                 if (HASHnew(h, ATOMtype(bi.type), BATcapacity(b),
     842             :                             mask, ci->ncand, true) != GDK_SUCCEED) {
     843           0 :                         HEAPfree(&h->heaplink, true);
     844           0 :                         GDKfree(h);
     845           0 :                         bat_iterator_end(&bi);
     846           0 :                         return NULL;
     847             :                 }
     848             : 
     849       10783 :                 switch (tpe) {
     850          10 :                 case TYPE_bte:
     851          20 :                         starthash(bte);
     852             :                         break;
     853          37 :                 case TYPE_sht:
     854          74 :                         starthash(sht);
     855             :                         break;
     856          17 :                 case TYPE_flt:
     857          34 :                         starthash(flt);
     858             :                         break;
     859        9216 :                 case TYPE_int:
     860       18427 :                         starthash(int);
     861             :                         break;
     862           6 :                 case TYPE_dbl:
     863          12 :                         starthash(dbl);
     864             :                         break;
     865         753 :                 case TYPE_lng:
     866        1506 :                         starthash(lng);
     867             :                         break;
     868             : #ifdef HAVE_HGE
     869           7 :                 case TYPE_hge:
     870          14 :                         starthash(hge);
     871             :                         break;
     872             : #endif
     873           1 :                 case TYPE_uuid:
     874           2 :                         starthash(uuid);
     875             :                         break;
     876             :                 default:
     877        1472 :                         TIMEOUT_LOOP(p, qry_ctx) {
     878           0 :                                 const void *restrict v = BUNtail(bi, o - b->hseqbase);
     879           0 :                                 c = hash_any(h, v);
     880           0 :                                 hget = HASHget(h, c);
     881           0 :                                 if (hget == BUN_NONE) {
     882           0 :                                         if (h->nheads == maxslots)
     883           0 :                                                 TIMEOUT_LOOP_BREAK; /* mask too full */
     884           0 :                                         h->nheads++;
     885           0 :                                         h->nunique++;
     886             :                                 } else {
     887             :                                         for (hb = hget;
     888           0 :                                              hb != BUN_NONE;
     889           0 :                                              hb = HASHgetlink(h, hb)) {
     890           0 :                                                 if (ATOMcmp(h->type,
     891             :                                                             v,
     892             :                                                             BUNtail(bi, hb)) == 0)
     893             :                                                         break;
     894             :                                         }
     895           0 :                                         h->nunique += hb == BUN_NONE;
     896             :                                 }
     897           0 :                                 HASHputlink(h, p, hget);
     898           0 :                                 HASHput(h, c, p);
     899           0 :                                 o = canditer_next(ci);
     900             :                         }
     901         736 :                         TIMEOUT_CHECK(qry_ctx,
     902             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     903             :                         break;
     904             :                 }
     905       10781 :                 TRC_DEBUG_IF(ACCELERATOR) if (p < cnt1)
     906           0 :                         TRC_DEBUG_ENDIF(ACCELERATOR,
     907             :                                         "%s: abort starthash with "
     908             :                                 "mask " BUNFMT " at " BUNFMT " after " LLFMT " usec\n", BATgetId(b), mask, p, GDKusec() - t1);
     909       10781 :                 if (p == cnt1 || mask == maxmask)
     910             :                         break;
     911          10 :                 mask <<= 2;
     912             :                 /* if we fill up the slots fast (p <= maxslots * 1.2)
     913             :                  * increase mask size a bit more quickly */
     914          10 :                 if (p == h->nunique) {
     915             :                         /* only unique values so far: grow really fast */
     916             :                         mask = maxmask;
     917             :                         cnt1 = 0;
     918           0 :                 } else if (mask < maxmask && p <= maxslots * 1.2)
     919           0 :                         mask <<= 2;
     920          10 :                 canditer_reset(ci);
     921           0 :                 o = canditer_next(ci);
     922             :         }
     923             : 
     924             :         /* finish the hashtable with the current mask */
     925       10771 :         switch (tpe) {
     926          10 :         case TYPE_bte:
     927         343 :                 finishhash(bte);
     928             :                 break;
     929          37 :         case TYPE_sht:
     930         381 :                 finishhash(sht);
     931             :                 break;
     932        9208 :         case TYPE_int:
     933    34930565 :                 finishhash(int);
     934             :                 break;
     935          17 :         case TYPE_flt:
     936         354 :                 finishhash(flt);
     937             :                 break;
     938           6 :         case TYPE_dbl:
     939          49 :                 finishhash(dbl);
     940             :                 break;
     941         751 :         case TYPE_lng:
     942    16537946 :                 finishhash(lng);
     943             :                 break;
     944             : #ifdef HAVE_HGE
     945           7 :         case TYPE_hge:
     946         114 :                 finishhash(hge);
     947             :                 break;
     948             : #endif
     949           1 :         case TYPE_uuid:
     950           2 :                 finishhash(uuid);
     951             :                 break;
     952         734 :         default:
     953      308630 :                 TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {
     954      307155 :                         const void *restrict v = BUNtail(bi, o - b->hseqbase);
     955      307075 :                         c = hash_any(h, v);
     956      307261 :                         hget = HASHget(h, c);
     957      307261 :                         h->nheads += hget == BUN_NONE;
     958      307261 :                         if (!hascand) {
     959             :                                 for (hb = hget;
     960      391769 :                                      hb != BUN_NONE;
     961       84260 :                                      hb = HASHgetlink(h, hb)) {
     962      237030 :                                         if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
     963             :                                                 break;
     964             :                                 }
     965      307229 :                                 h->nunique += hb == BUN_NONE;
     966             :                         }
     967      306981 :                         HASHputlink(h, p, hget);
     968      307127 :                         HASHput(h, c, p);
     969      307482 :                         o = canditer_next(ci);
     970      307158 :                         p++;
     971             :                 }
     972         737 :                 TIMEOUT_CHECK(qry_ctx,
     973             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     974             :                 break;
     975             :         }
     976       10797 :         bat_iterator_end(&bi);
     977             :         /* if the number of unique values is equal to the bat count,
     978             :          * all values are necessarily distinct */
     979       10794 :         MT_lock_set(&b->theaplock);
     980       10800 :         if (h->nunique == BATcount(b) && !b->tkey) {
     981        7660 :                 b->tkey = true;
     982             :         }
     983       10800 :         if (ci->ncand == BATcount(b))
     984       10660 :                 b->tunique_est = (double) h->nunique;
     985       10800 :         MT_lock_unset(&b->theaplock);
     986       10800 :         TRC_DEBUG_IF(ACCELERATOR) {
     987           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR,
     988             :                                 "hash construction " LLFMT " usec\n", GDKusec() - t0);
     989           0 :                 HASHcollisions(b, h, __func__);
     990             :         }
     991             :         return h;
     992             : 
     993           0 :   bailout:
     994           0 :         bat_iterator_end(&bi);
     995           0 :         HEAPfree(&h->heaplink, true);
     996           0 :         HEAPfree(&h->heapbckt, true);
     997           0 :         GDKfree(h);
     998           0 :         return NULL;
     999             : }
    1000             : 
    1001             : gdk_return
    1002     2766072 : BAThash(BAT *b)
    1003             : {
    1004     2766072 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1005           0 :                 GDKerror("No hash on msk type bats\n");
    1006           0 :                 return GDK_FAIL;
    1007             :         }
    1008     2766072 :         if (BATcheckhash(b)) {
    1009             :                 return GDK_SUCCEED;
    1010             :         }
    1011       10679 :         for (;;) {
    1012             :                 /* If multiple threads simultaneously try to build a
    1013             :                  * hash on a bat, e.g. in order to perform a join, it
    1014             :                  * may happen that one thread succeeds in obtaining the
    1015             :                  * write lock, then builds the hash, releases the lock,
    1016             :                  * acquires the read lock, and performs the join.  The
    1017             :                  * other threads may then still be attempting to acquire
    1018             :                  * the write lock.  But now they have to wait until the
    1019             :                  * read lock is released, which can be quite a long
    1020             :                  * time.  Especially if a second thread goes through the
    1021             :                  * same process as the first. */
    1022       10679 :                 if (MT_rwlock_wrtry(&b->thashlock))
    1023             :                         break;
    1024           4 :                 MT_sleep_ms(1);
    1025           4 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1026           4 :                         if (b->thash != NULL && b->thash != (Hash *) 1) {
    1027           4 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1028           4 :                                 return GDK_SUCCEED;
    1029             :                         }
    1030           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1031             :                 }
    1032             :         }
    1033             :         /* we have the write lock */
    1034       10658 :         if (b->thash == NULL) {
    1035       10656 :                 struct canditer ci;
    1036       10656 :                 canditer_init(&ci, b, NULL);
    1037       10663 :                 if ((b->thash = BAThash_impl(b, &ci, "thash")) == NULL) {
    1038           0 :                         MT_rwlock_wrunlock(&b->thashlock);
    1039           0 :                         return GDK_FAIL;
    1040             :                 }
    1041             :         }
    1042       10654 :         MT_rwlock_wrunlock(&b->thashlock);
    1043       10654 :         return GDK_SUCCEED;
    1044             : }
    1045             : 
    1046             : /*
    1047             :  * The entry on which a value hashes can be calculated with the
    1048             :  * routine HASHprobe.
    1049             :  */
    1050             : BUN
    1051   447115262 : HASHprobe(const Hash *h, const void *v)
    1052             : {
    1053   859925150 :         switch (ATOMbasetype(h->type)) {
    1054        3938 :         case TYPE_bte:
    1055        3938 :                 return hash_bte(h, v);
    1056       89319 :         case TYPE_sht:
    1057       89319 :                 return hash_sht(h, v);
    1058   394903712 :         case TYPE_int:
    1059   394903712 :                 return hash_int(h, v);
    1060    34538152 :         case TYPE_lng:
    1061    34538152 :                 return hash_lng(h, v);
    1062             : #ifdef HAVE_HGE
    1063       31527 :         case TYPE_hge:
    1064       31527 :                 return hash_hge(h, v);
    1065             : #endif
    1066      151745 :         case TYPE_flt:
    1067      151745 :                 return hash_flt(h, v);
    1068       55302 :         case TYPE_dbl:
    1069       55302 :                 return hash_dbl(h, v);
    1070        2115 :         case TYPE_uuid:
    1071        2115 :                 return hash_uuid(h, v);
    1072    17339452 :         default:
    1073    17339452 :                 return hash_any(h, v);
    1074             :         }
    1075             : }
    1076             : 
    1077             : void
    1078      484039 : HASHappend_locked(BAT *b, BUN i, const void *v)
    1079             : {
    1080      484039 :         Hash *h = b->thash;
    1081      484039 :         if (h == NULL) {
    1082           0 :                 return;
    1083             :         }
    1084      484039 :         if (h == (Hash *) 1) {
    1085           0 :                 b->thash = NULL;
    1086           0 :                 doHASHdestroy(b, h);
    1087           0 :                 GDKclrerr();
    1088           0 :                 return;
    1089             :         }
    1090      484039 :         assert(i * h->width == h->heaplink.free);
    1091      484039 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1092           0 :                 b->thash = NULL;
    1093           0 :                 doHASHdestroy(b, h);
    1094           0 :                 GDKclrerr();
    1095           0 :                 return;
    1096             :         }
    1097      484039 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1098           0 :                 b->thash = NULL;
    1099           0 :                 doHASHdestroy(b, h);
    1100           0 :                 GDKclrerr();
    1101           0 :                 return;
    1102             :         }
    1103      484039 :         if (HASHwidth(i + 1) > h->width &&
    1104           0 :              HASHupgradehashheap(b) != GDK_SUCCEED) {
    1105           0 :                 GDKclrerr();
    1106           0 :                 return;
    1107             :         }
    1108      968075 :         if ((ATOMsize(b->ttype) > 2 &&
    1109      484036 :              HASHgrowbucket(b) != GDK_SUCCEED) ||
    1110      484706 :             ((i + 1) * h->width > h->heaplink.size &&
    1111         667 :              HEAPextend(&h->heaplink,
    1112         667 :                         i * h->width + GDK_mmap_pagesize,
    1113             :                         true) != GDK_SUCCEED)) {
    1114           0 :                 b->thash = NULL;
    1115           0 :                 HEAPfree(&h->heapbckt, true);
    1116           0 :                 HEAPfree(&h->heaplink, true);
    1117           0 :                 GDKfree(h);
    1118           0 :                 GDKclrerr();
    1119           0 :                 return;
    1120             :         }
    1121      484039 :         h->Link = h->heaplink.base;
    1122      484039 :         BUN c = HASHprobe(h, v);
    1123      484037 :         h->heaplink.free += h->width;
    1124      484037 :         BUN hb = HASHget(h, c);
    1125      484037 :         BUN hb2;
    1126      484037 :         BATiter bi = bat_iterator_nolock(b);
    1127      484037 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1128      484037 :         for (hb2 = hb;
    1129      637372 :              hb2 != BUN_NONE;
    1130      153335 :              hb2 = HASHgetlink(h, hb2)) {
    1131      317247 :                 if (atomcmp(v, BUNtail(bi, hb2)) == 0)
    1132             :                         break;
    1133             :         }
    1134      484038 :         h->nheads += hb == BUN_NONE;
    1135      484038 :         h->nunique += hb2 == BUN_NONE;
    1136      484038 :         HASHputlink(h, i, hb);
    1137      484037 :         HASHput(h, c, i);
    1138      484039 :         h->heapbckt.dirty = true;
    1139      484039 :         h->heaplink.dirty = true;
    1140             : }
    1141             : 
    1142             : BUN
    1143           0 : HASHappend(BAT *b, BUN i, const void *v)
    1144             : {
    1145           0 :         BUN nunique;
    1146           0 :         MT_rwlock_wrlock(&b->thashlock);
    1147           0 :         HASHappend_locked(b, i, v);
    1148           0 :         nunique = b->thash ? b->thash->nunique : 0;
    1149           0 :         MT_rwlock_wrunlock(&b->thashlock);
    1150           0 :         return nunique;
    1151             : }
    1152             : 
    1153             : /* insert value v at position p into the hash table of b */
    1154             : void
    1155   164690134 : HASHinsert_locked(BATiter *bi, BUN p, const void *v)
    1156             : {
    1157   164690134 :         BAT *b = bi->b;
    1158   164690134 :         Hash *h = b->thash;
    1159   164690134 :         if (h == NULL) {
    1160             :                 return;
    1161             :         }
    1162       33463 :         if (h == (Hash *) 1) {
    1163           0 :                 b->thash = NULL;
    1164           0 :                 doHASHdestroy(b, h);
    1165           0 :                 GDKclrerr();
    1166           0 :                 return;
    1167             :         }
    1168       33463 :         assert(p * h->width < h->heaplink.free);
    1169       33463 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1170           0 :                 b->thash = NULL;
    1171           0 :                 doHASHdestroy(b, h);
    1172           0 :                 GDKclrerr();
    1173           0 :                 return;
    1174             :         }
    1175       33463 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1176           0 :                 b->thash = NULL;
    1177           0 :                 doHASHdestroy(b, h);
    1178           0 :                 GDKclrerr();
    1179           0 :                 return;
    1180             :         }
    1181       33462 :         BUN c = HASHprobe(h, v);
    1182       33487 :         BUN hb = HASHget(h, c);
    1183       33487 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1184       33487 :         if (hb == BUN_NONE || hb < p) {
    1185             :                 /* bucket is empty, or bucket is used by lower numbered
    1186             :                  * position */
    1187        9248 :                 h->heaplink.dirty = true;
    1188        9248 :                 h->heapbckt.dirty = true;
    1189        9248 :                 HASHputlink(h, p, hb);
    1190        9248 :                 HASHput(h, c, p);
    1191        9248 :                 if (hb == BUN_NONE) {
    1192        2995 :                         h->nheads++;
    1193             :                 } else {
    1194        7007 :                         do {
    1195        7007 :                                 if (atomcmp(v, BUNtail(*bi, hb)) == 0) {
    1196             :                                         /* found another row with the
    1197             :                                          * same value, so don't
    1198             :                                          * increment nunique */
    1199             :                                         return;
    1200             :                                 }
    1201        1876 :                                 hb = HASHgetlink(h, hb);
    1202        1876 :                         } while (hb != BUN_NONE);
    1203             :                 }
    1204             :                 /* this is a new value */
    1205        4117 :                 h->nunique++;
    1206        4117 :                 return;
    1207             :         }
    1208             :         bool seen = false;
    1209      322565 :         for (;;) {
    1210      322565 :                 if (!seen)
    1211       25745 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1212      322569 :                 BUN hb2 = HASHgetlink(h, hb);
    1213      322569 :                 if (hb2 == BUN_NONE || hb2 < p) {
    1214       24243 :                         h->heaplink.dirty = true;
    1215       24243 :                         HASHputlink(h, p, hb2);
    1216       24242 :                         HASHputlink(h, hb, p);
    1217       24854 :                         while (!seen && hb2 != BUN_NONE) {
    1218         611 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1219         612 :                                 hb2 = HASHgetlink(h, hb2);
    1220             :                         }
    1221       24240 :                         if (!seen)
    1222         720 :                                 h->nunique++;
    1223       24240 :                         return;
    1224             :                 }
    1225             :                 hb = hb2;
    1226             :         }
    1227             : }
    1228             : 
    1229             : BUN
    1230           2 : HASHinsert(BATiter *bi, BUN p, const void *v)
    1231             : {
    1232           2 :         BUN nunique;
    1233           2 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1234           2 :         HASHinsert_locked(bi, p, v);
    1235           2 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1236           2 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1237           2 :         return nunique;
    1238             : }
    1239             : 
    1240             : /* delete value v at position p from the hash table of b */
    1241             : void
    1242   157330645 : HASHdelete_locked(BATiter *bi, BUN p, const void *v)
    1243             : {
    1244   157330645 :         BAT *b = bi->b;
    1245   157330645 :         Hash *h = b->thash;
    1246   157330645 :         if (h == NULL) {
    1247             :                 return;
    1248             :         }
    1249       33439 :         if (h == (Hash *) 1) {
    1250           0 :                 b->thash = NULL;
    1251           0 :                 doHASHdestroy(b, h);
    1252           0 :                 GDKclrerr();
    1253           0 :                 return;
    1254             :         }
    1255       33439 :         assert(p * h->width < h->heaplink.free);
    1256       33439 :         if (h->nunique < b->batCount / hash_destroy_uniques_fraction) {
    1257           1 :                 b->thash = NULL;
    1258           1 :                 doHASHdestroy(b, h);
    1259           1 :                 GDKclrerr();
    1260           1 :                 return;
    1261             :         }
    1262       33438 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1263           0 :                 b->thash = NULL;
    1264           0 :                 doHASHdestroy(b, h);
    1265           0 :                 GDKclrerr();
    1266           0 :                 return;
    1267             :         }
    1268       33422 :         BUN c = HASHprobe(h, v);
    1269       33453 :         BUN hb = HASHget(h, c);
    1270       33453 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1271       33453 :         if (hb == p) {
    1272        5048 :                 BUN hb2 = HASHgetlink(h, p);
    1273        5048 :                 h->heaplink.dirty = true;
    1274        5048 :                 h->heapbckt.dirty = true;
    1275        5048 :                 HASHput(h, c, hb2);
    1276        5047 :                 HASHputlink(h, p, BUN_NONE);
    1277        5046 :                 if (hb2 == BUN_NONE) {
    1278        2547 :                         h->nheads--;
    1279             :                 } else {
    1280        3153 :                         do {
    1281        3153 :                                 if (atomcmp(v, BUNtail(*bi, hb2)) == 0) {
    1282             :                                         /* found another row with the
    1283             :                                          * same value, so don't
    1284             :                                          * decrement nunique below */
    1285             :                                         return;
    1286             :                                 }
    1287        1689 :                                 hb2 = HASHgetlink(h, hb2);
    1288        1689 :                         } while (hb2 != BUN_NONE);
    1289             :                 }
    1290             :                 /* no rows found with the same value, so number of
    1291             :                  * unique values is one lower */
    1292        3582 :                 h->nunique--;
    1293        3582 :                 return;
    1294             :         }
    1295             :         bool seen = false;
    1296             :         BUN links = 0;
    1297      319784 :         for (;;) {
    1298      319784 :                 if (!seen)
    1299       29200 :                         seen = atomcmp(v, BUNtail(*bi, hb)) == 0;
    1300      319820 :                 BUN hb2 = HASHgetlink(h, hb);
    1301      319820 :                 assert(hb2 != BUN_NONE );
    1302      319820 :                 assert(hb2 < hb);
    1303      319820 :                 if (hb2 == p) {
    1304       28439 :                         for (hb2 = HASHgetlink(h, hb2);
    1305       28880 :                              !seen && hb2 != BUN_NONE;
    1306         441 :                              hb2 = HASHgetlink(h, hb2))
    1307         449 :                                 seen = atomcmp(v, BUNtail(*bi, hb2)) == 0;
    1308       28431 :                         break;
    1309             :                 }
    1310      291381 :                 hb = hb2;
    1311      291381 :                 if (++links > hash_destroy_chain_length) {
    1312           2 :                         b->thash = NULL;
    1313           2 :                         doHASHdestroy(b, h);
    1314           2 :                         GDKclrerr();
    1315           2 :                         return;
    1316             :                 }
    1317             :         }
    1318       28431 :         h->heaplink.dirty = true;
    1319       28431 :         HASHputlink(h, hb, HASHgetlink(h, p));
    1320       28409 :         HASHputlink(h, p, BUN_NONE);
    1321       28410 :         if (!seen)
    1322         452 :                 h->nunique--;
    1323             : }
    1324             : 
    1325             : BUN
    1326           6 : HASHdelete(BATiter *bi, BUN p, const void *v)
    1327             : {
    1328           6 :         BUN nunique;
    1329           6 :         MT_rwlock_wrlock(&bi->b->thashlock);
    1330           6 :         HASHdelete_locked(bi, p, v);
    1331           6 :         nunique = bi->b->thash ? bi->b->thash->nunique : 0;
    1332           6 :         MT_rwlock_wrunlock(&bi->b->thashlock);
    1333           6 :         return nunique;
    1334             : }
    1335             : 
    1336             : BUN
    1337           0 : HASHlist(Hash *h, BUN i)
    1338             : {
    1339           0 :         BUN c = 1;
    1340           0 :         BUN j = HASHget(h, i);
    1341             : 
    1342           0 :         if (j == BUN_NONE)
    1343             :                 return 1;
    1344           0 :         while ((j = HASHgetlink(h, i)) != BUN_NONE) {
    1345           0 :                 c++;
    1346           0 :                 i = j;
    1347             :         }
    1348             :         return c;
    1349             : }
    1350             : 
    1351             : void
    1352    21435762 : HASHdestroy(BAT *b)
    1353             : {
    1354    21435762 :         if (b && b->thash) {
    1355        9312 :                 Hash *hs;
    1356        9312 :                 MT_rwlock_wrlock(&b->thashlock);
    1357        9321 :                 hs = b->thash;
    1358        9321 :                 b->thash = NULL;
    1359        9321 :                 MT_rwlock_wrunlock(&b->thashlock);
    1360        9321 :                 doHASHdestroy(b, hs);
    1361             :         }
    1362    21435767 : }
    1363             : 
    1364             : void
    1365      521397 : HASHfree(BAT *b)
    1366             : {
    1367      521397 :         if (b && b->thash) {
    1368        1927 :                 Hash *h;
    1369        1927 :                 MT_rwlock_wrlock(&b->thashlock);
    1370        1927 :                 if ((h = b->thash) != NULL && h != (Hash *) 1) {
    1371        1760 :                         bool rmheap = h->heaplink.dirty || h->heapbckt.dirty;
    1372        1760 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT " free hash %s\n",
    1373             :                                   ALGOBATPAR(b),
    1374             :                                   rmheap ? "removing" : "keeping");
    1375             : 
    1376        1760 :                         b->thash = rmheap ? NULL : (Hash *) 1;
    1377        1760 :                         HEAPfree(&h->heapbckt, rmheap);
    1378        1760 :                         HEAPfree(&h->heaplink, rmheap);
    1379        1760 :                         GDKfree(h);
    1380             :                 }
    1381        1927 :                 MT_rwlock_wrunlock(&b->thashlock);
    1382             :         }
    1383      521397 : }

Generated by: LCOV version 1.14