LCOV - code coverage report
Current view: top level - gdk - gdk_bat.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1262 1702 74.1 %
Date: 2024-11-13 19:37:10 Functions: 36 41 87.8 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a M. L. Kersten, P. Boncz, N. Nes
      15             :  * @* BAT Module
      16             :  * In this Chapter we describe the BAT implementation in more detail.
      17             :  * The routines mentioned are primarily meant to simplify the library
      18             :  * implementation.
      19             :  *
      20             :  * @+ BAT Construction
      21             :  * BATs are implemented in several blocks of memory, prepared for disk
      22             :  * storage and easy shipment over a network.
      23             :  *
      24             :  * The BAT starts with a descriptor, which indicates the required BAT
      25             :  * library version and the BAT administration details.  In particular,
      26             :  * it describes the binary relationship maintained and the location of
      27             :  * fields required for storage.
      28             :  *
      29             :  * The general layout of the BAT in this implementation is as follows.
      30             :  * Each BAT comes with a heap for the loc-size buns and, optionally,
      31             :  * with heaps to manage the variable-sized data items of both
      32             :  * dimensions.  The buns are assumed to be stored as loc-size objects.
      33             :  * This is essentially an array of structs to store the associations.
      34             :  * The size is determined at BAT creation time using an upper bound on
      35             :  * the number of elements to be accommodated.  In case of overflow,
      36             :  * its storage space is extended automatically.
      37             :  *
      38             :  * The capacity of a BAT places an upper limit on the number of BUNs
      39             :  * to be stored initially. The actual space set aside may be quite
      40             :  * large.  Moreover, the size is aligned to int boundaries to speedup
      41             :  * access and avoid some machine limitations.
      42             :  *
      43             :  * Initialization of the variable parts rely on type specific routines
      44             :  * called atomHeap.
      45             :  */
      46             : #include "monetdb_config.h"
      47             : #include "gdk.h"
      48             : #include "gdk_private.h"
      49             : #include "mutils.h"
      50             : 
      51             : #ifdef ALIGN
      52             : #undef ALIGN
      53             : #endif
      54             : #define ALIGN(n,b)      ((b)?(b)*(1+(((n)-1)/(b))):n)
      55             : 
      56             : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
      57             : 
      58             : BAT *
      59    23380398 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
      60             : {
      61    23380398 :         bat bid;
      62    23380398 :         BAT *bn;
      63    23380398 :         Heap *h = NULL, *vh = NULL;
      64             : 
      65             :         /*
      66             :          * Alloc space for the BAT and its dependent records.
      67             :          */
      68    23380398 :         assert(tt >= 0);
      69             : 
      70    23380398 :         if (heapnames) {
      71    12019286 :                 if ((h = GDKmalloc(sizeof(Heap))) == NULL) {
      72             :                         return NULL;
      73             :                 }
      74    24156520 :                 *h = (Heap) {
      75    12081322 :                         .farmid = BBPselectfarm(role, tt, offheap),
      76             :                         .dirty = true,
      77             :                         .refs = ATOMIC_VAR_INIT(1),
      78             :                 };
      79             : 
      80    12075198 :                 if (ATOMneedheap(tt)) {
      81     1044625 :                         if ((vh = GDKmalloc(sizeof(Heap))) == NULL) {
      82           0 :                                 GDKfree(h);
      83           0 :                                 return NULL;
      84             :                         }
      85     1052487 :                         *vh = (Heap) {
      86     1052158 :                                 .farmid = BBPselectfarm(role, tt, varheap),
      87             :                                 .dirty = true,
      88             :                                 .refs = ATOMIC_VAR_INIT(1),
      89             :                         };
      90             :                 }
      91             :         }
      92             : 
      93    23444172 :         bid = BBPallocbat(tt);
      94    23511211 :         if (bid == 0) {
      95           0 :                 GDKfree(h);
      96           0 :                 GDKfree(vh);
      97           0 :                 return NULL;
      98             :         }
      99    23511211 :         bn = BBP_desc(bid);
     100             : 
     101             :         /*
     102             :          * Fill in basic column info
     103             :          */
     104    46970865 :         *bn = (BAT) {
     105             :                 .batCacheid = bid,
     106             :                 .hseqbase = hseq,
     107             : 
     108             :                 .ttype = tt,
     109             :                 .tkey = true,
     110             :                 .tnonil = true,
     111             :                 .tnil = false,
     112    23459654 :                 .tsorted = ATOMlinear(tt),
     113             :                 .trevsorted = ATOMlinear(tt),
     114    23459654 :                 .tascii = tt == TYPE_str,
     115             :                 .tseqbase = oid_nil,
     116             :                 .tminpos = BUN_NONE,
     117             :                 .tmaxpos = BUN_NONE,
     118             :                 .tunique_est = 0.0,
     119             : 
     120             :                 .batRole = role,
     121             :                 .batTransient = true,
     122             :                 .batRestricted = BAT_WRITE,
     123             :                 .theap = h,
     124             :                 .tvheap = vh,
     125    23511211 :                 .creator_tid = MT_getpid(),
     126             :         };
     127             : 
     128    23459654 :         if (bn->theap) {
     129    12054604 :                 bn->theap->parentid = bn->batCacheid;
     130    12054604 :                 const char *nme = BBP_physical(bn->batCacheid);
     131    12054604 :                 settailname(bn->theap, nme, tt, width);
     132             : 
     133    12030768 :                 if (bn->tvheap) {
     134     1038660 :                         bn->tvheap->parentid = bn->batCacheid;
     135     1038660 :                         strconcat_len(bn->tvheap->filename,
     136             :                                       sizeof(bn->tvheap->filename),
     137             :                                       nme, ".theap", NULL);
     138             :                 }
     139             :         }
     140    23448286 :         char name[MT_NAME_LEN];
     141    23448286 :         snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
     142    23448286 :         MT_lock_init(&bn->theaplock, name);
     143    23413529 :         snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     144    23413529 :         MT_lock_init(&bn->batIdxLock, name);
     145    23465922 :         snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
     146    23465922 :         MT_rwlock_init(&bn->thashlock, name);
     147    23401126 :         return bn;
     148             : }
     149             : 
     150             : uint8_t
     151    24026491 : ATOMelmshift(int sz)
     152             : {
     153    24026491 :         uint8_t sh;
     154    24026491 :         int i = sz >> 1;
     155             : 
     156    44505481 :         for (sh = 0; i != 0; sh++) {
     157    20478990 :                 i >>= 1;
     158             :         }
     159    24026491 :         return sh;
     160             : }
     161             : 
     162             : 
     163             : void
     164    12049033 : BATsetdims(BAT *b, uint16_t width)
     165             : {
     166    12049033 :         b->twidth = b->ttype == TYPE_str ? width > 0 ? width : 1 : ATOMsize(b->ttype);
     167    12049033 :         b->tshift = ATOMelmshift(b->twidth);
     168    12049033 :         assert_shift_width(b->tshift, b->twidth);
     169    12049033 : }
     170             : 
     171             : const char *
     172        1260 : BATtailname(const BAT *b)
     173             : {
     174        1260 :         if (b->ttype == TYPE_str) {
     175         340 :                 switch (b->twidth) {
     176         250 :                 case 1:
     177         250 :                         return "tail1";
     178          75 :                 case 2:
     179          75 :                         return "tail2";
     180          15 :                 case 4:
     181             : #if SIZEOF_VAR_T == 8
     182          15 :                         return "tail4";
     183             :                 case 8:
     184             : #endif
     185             :                         break;
     186             :                 default:
     187           0 :                         MT_UNREACHABLE();
     188             :                 }
     189             :         }
     190             :         return "tail";
     191             : }
     192             : 
     193             : void
     194    12117688 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
     195             : {
     196    12117688 :         if (tt == TYPE_str) {
     197     1072505 :                 switch (width) {
     198     1015713 :                 case 0:
     199             :                 case 1:
     200     1015713 :                         strconcat_len(tail->filename,
     201             :                                       sizeof(tail->filename), physnme,
     202             :                                       ".tail1", NULL);
     203     1015713 :                         return;
     204       52182 :                 case 2:
     205       52182 :                         strconcat_len(tail->filename,
     206             :                                       sizeof(tail->filename), physnme,
     207             :                                       ".tail2", NULL);
     208       52182 :                         return;
     209        4610 :                 case 4:
     210             : #if SIZEOF_VAR_T == 8
     211        4610 :                         strconcat_len(tail->filename,
     212             :                                       sizeof(tail->filename), physnme,
     213             :                                       ".tail4", NULL);
     214        4610 :                         return;
     215             :                 case 8:
     216             : #endif
     217             :                         break;
     218             :                 default:
     219           0 :                         MT_UNREACHABLE();
     220             :                 }
     221             :         }
     222    11045183 :         strconcat_len(tail->filename, sizeof(tail->filename), physnme,
     223             :                       ".tail", NULL);
     224             : }
     225             : 
     226             : /*
     227             :  * @- BAT allocation
     228             :  * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
     229             :  * We now initialize new BATs with their heapname such that the
     230             :  * modified HEAPalloc/HEAPextend primitives can possibly use memory
     231             :  * mapped files as temporary heap storage.
     232             :  *
     233             :  * In case of huge bats, we want HEAPalloc to write a file to disk,
     234             :  * and memory map it. To make this possible, we must provide it with
     235             :  * filenames.
     236             :  */
     237             : BAT *
     238    12034412 : COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
     239             : {
     240    12034412 :         BAT *bn;
     241             : 
     242    12034412 :         assert(cap <= BUN_MAX);
     243    12034412 :         assert(hseq <= oid_nil);
     244    12034412 :         ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
     245             : 
     246             :         /* round up to multiple of BATTINY */
     247    12034412 :         if (cap < BUN_MAX - BATTINY)
     248    12016237 :                 cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
     249    12034412 :         if (ATOMstorage(tt) == TYPE_msk) {
     250      152454 :                 if (cap < 8*BATTINY)
     251             :                         cap = 8*BATTINY;
     252             :                 else
     253       50763 :                         cap = (cap + 31) & ~(BUN)31;
     254    11881958 :         } else if (cap < BATTINY)
     255             :                 cap = BATTINY;
     256             :         /* limit the size */
     257     3503254 :         if (cap > BUN_MAX)
     258             :                 cap = BUN_MAX;
     259             : 
     260    12034412 :         bn = BATcreatedesc(hseq, tt, true, role, width);
     261    12037213 :         if (bn == NULL)
     262             :                 return NULL;
     263             : 
     264    12037213 :         BATsetdims(bn, width);
     265    12024398 :         bn->batCapacity = cap;
     266             : 
     267    12024398 :         if (ATOMstorage(tt) == TYPE_msk)
     268      152419 :                 cap /= 8;       /* 8 values per byte */
     269             : 
     270             :         /* alloc the main heaps */
     271    12024398 :         if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
     272           0 :                 goto bailout;
     273             :         }
     274             : 
     275    12067573 :         if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
     276           0 :                 HEAPfree(bn->theap, true);
     277           0 :                 goto bailout;
     278             :         }
     279    12061141 :         DELTAinit(bn);
     280    12056778 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {
     281             :                 /* cannot happen, function always returns success */
     282           0 :                 goto bailout;
     283             :         }
     284    12098955 :         TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
     285             :         return bn;
     286           0 :   bailout:
     287           0 :         BBPclear(bn->batCacheid);
     288           0 :         return NULL;
     289             : }
     290             : 
     291             : BAT *
     292    11762941 : COLnew(oid hseq, int tt, BUN cap, role_t role)
     293             : {
     294    11762941 :         return COLnew2(hseq, tt, cap, role, 0);
     295             : }
     296             : 
     297             : BAT *
     298     5608183 : BATdense(oid hseq, oid tseq, BUN cnt)
     299             : {
     300     5608183 :         BAT *bn;
     301             : 
     302     5608183 :         bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
     303     5618463 :         if (bn != NULL) {
     304     5618463 :                 BATtseqbase(bn, tseq);
     305     5616009 :                 BATsetcount(bn, cnt);
     306     5615748 :                 TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
     307             :                           "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
     308             :                           ALGOBATPAR(bn));
     309             :         }
     310     5615748 :         return bn;
     311             : }
     312             : 
     313             : BAT *
     314           0 : BATattach(int tt, const char *heapfile, role_t role)
     315             : {
     316           0 :         BAT *bn;
     317           0 :         char *p;
     318           0 :         size_t m;
     319           0 :         FILE *f;
     320             : 
     321           0 :         ERRORcheck(tt <= 0 , "bad tail type (<=0)\n", NULL);
     322           0 :         ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "bad tail type (varsized and not str)\n", NULL);
     323           0 :         ERRORcheck(heapfile == NULL, "bad heapfile name\n", NULL);
     324             : 
     325           0 :         if ((f = MT_fopen(heapfile, "rb")) == NULL) {
     326           0 :                 GDKsyserror("BATattach: cannot open %s\n", heapfile);
     327           0 :                 return NULL;
     328             :         }
     329           0 :         if (ATOMstorage(tt) == TYPE_str) {
     330           0 :                 size_t n;
     331           0 :                 char *s;
     332           0 :                 int c, u;
     333             : 
     334           0 :                 if ((bn = COLnew(0, tt, 0, role)) == NULL) {
     335           0 :                         fclose(f);
     336           0 :                         return NULL;
     337             :                 }
     338           0 :                 m = 4096;
     339           0 :                 n = 0;
     340           0 :                 u = 0;
     341           0 :                 s = p = GDKmalloc(m);
     342           0 :                 if (p == NULL) {
     343           0 :                         fclose(f);
     344           0 :                         BBPreclaim(bn);
     345           0 :                         return NULL;
     346             :                 }
     347           0 :                 while ((c = getc(f)) != EOF) {
     348           0 :                         if (n == m) {
     349           0 :                                 m += 4096;
     350           0 :                                 s = GDKrealloc(p, m);
     351           0 :                                 if (s == NULL) {
     352           0 :                                         GDKfree(p);
     353           0 :                                         BBPreclaim(bn);
     354           0 :                                         fclose(f);
     355           0 :                                         return NULL;
     356             :                                 }
     357           0 :                                 p = s;
     358           0 :                                 s = p + n;
     359             :                         }
     360           0 :                         if (c == '\n' && n > 0 && s[-1] == '\r') {
     361             :                                 /* deal with CR-LF sequence */
     362           0 :                                 s[-1] = c;
     363             :                         } else {
     364           0 :                                 *s++ = c;
     365           0 :                                 n++;
     366             :                         }
     367           0 :                         if (u) {
     368           0 :                                 if ((c & 0xC0) == 0x80)
     369           0 :                                         u--;
     370             :                                 else
     371           0 :                                         goto notutf8;
     372           0 :                         } else if ((c & 0xF8) == 0xF0)
     373             :                                 u = 3;
     374           0 :                         else if ((c & 0xF0) == 0xE0)
     375             :                                 u = 2;
     376           0 :                         else if ((c & 0xE0) == 0xC0)
     377             :                                 u = 1;
     378           0 :                         else if ((c & 0x80) == 0x80)
     379           0 :                                 goto notutf8;
     380           0 :                         else if (c == 0) {
     381           0 :                                 if (BUNappend(bn, p, false) != GDK_SUCCEED) {
     382           0 :                                         BBPreclaim(bn);
     383           0 :                                         fclose(f);
     384           0 :                                         GDKfree(p);
     385           0 :                                         return NULL;
     386             :                                 }
     387             :                                 s = p;
     388             :                                 n = 0;
     389             :                         }
     390             :                 }
     391           0 :                 fclose(f);
     392           0 :                 GDKfree(p);
     393           0 :                 if (n > 0) {
     394           0 :                         BBPreclaim(bn);
     395           0 :                         GDKerror("last string is not null-terminated\n");
     396           0 :                         return NULL;
     397             :                 }
     398             :         } else {
     399           0 :                 struct stat st;
     400           0 :                 int atomsize;
     401           0 :                 BUN cap;
     402           0 :                 lng n;
     403             : 
     404           0 :                 if (fstat(fileno(f), &st) < 0) {
     405           0 :                         GDKsyserror("BATattach: cannot stat %s\n", heapfile);
     406           0 :                         fclose(f);
     407           0 :                         return NULL;
     408             :                 }
     409           0 :                 atomsize = ATOMsize(tt);
     410           0 :                 if (st.st_size % atomsize != 0) {
     411           0 :                         fclose(f);
     412           0 :                         GDKerror("heapfile size not integral number of atoms\n");
     413           0 :                         return NULL;
     414             :                 }
     415           0 :                 if (ATOMstorage(tt) == TYPE_msk ?
     416             :                     (st.st_size > (off_t) (BUN_MAX / 8)) :
     417           0 :                     ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) {
     418           0 :                         fclose(f);
     419           0 :                         GDKerror("heapfile too large\n");
     420           0 :                         return NULL;
     421             :                 }
     422           0 :                 cap = (BUN) (ATOMstorage(tt) == TYPE_msk ?
     423           0 :                              st.st_size * 8 :
     424           0 :                              st.st_size / atomsize);
     425           0 :                 bn = COLnew(0, tt, cap, role);
     426           0 :                 if (bn == NULL) {
     427           0 :                         fclose(f);
     428           0 :                         return NULL;
     429             :                 }
     430           0 :                 p = Tloc(bn, 0);
     431           0 :                 n = (lng) st.st_size;
     432           0 :                 while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
     433           0 :                         p += m;
     434           0 :                         n -= m;
     435             :                 }
     436           0 :                 fclose(f);
     437           0 :                 if (n > 0) {
     438           0 :                         GDKerror("couldn't read the complete file\n");
     439           0 :                         BBPreclaim(bn);
     440           0 :                         return NULL;
     441             :                 }
     442           0 :                 BATsetcount(bn, cap);
     443           0 :                 bn->tnonil = cap == 0;
     444           0 :                 bn->tnil = false;
     445           0 :                 bn->tseqbase = oid_nil;
     446           0 :                 if (cap > 1) {
     447           0 :                         bn->tsorted = false;
     448           0 :                         bn->trevsorted = false;
     449           0 :                         bn->tkey = false;
     450             :                 } else {
     451           0 :                         bn->tsorted = ATOMlinear(tt);
     452           0 :                         bn->trevsorted = ATOMlinear(tt);
     453           0 :                         bn->tkey = true;
     454             :                 }
     455             :         }
     456             :         return bn;
     457             : 
     458           0 :   notutf8:
     459           0 :         fclose(f);
     460           0 :         BBPreclaim(bn);
     461           0 :         GDKfree(p);
     462           0 :         GDKerror("input is not UTF-8\n");
     463           0 :         return NULL;
     464             : }
     465             : 
     466             : /*
     467             :  * If the BAT runs out of storage for BUNS it will reallocate space.
     468             :  * For memory mapped BATs we simple extend the administration after
     469             :  * having an assurance that the BAT still can be safely stored away.
     470             :  */
     471             : BUN
     472       21515 : BATgrows(BAT *b)
     473             : {
     474       21515 :         BUN oldcap, newcap;
     475             : 
     476       21515 :         BATcheck(b, 0);
     477             : 
     478       21515 :         newcap = oldcap = BATcapacity(b);
     479       21515 :         if (newcap < BATTINY)
     480             :                 newcap = 2 * BATTINY;
     481       21585 :         else if (newcap < 10 * BATTINY)
     482       20316 :                 newcap = 4 * newcap;
     483        1269 :         else if (newcap < 50 * BATTINY)
     484         762 :                 newcap = 2 * newcap;
     485         507 :         else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
     486         506 :                 newcap = (BUN) ((double) newcap * BATMARGIN);
     487             :         else
     488             :                 newcap = BUN_MAX;
     489       21585 :         if (newcap == oldcap) {
     490           0 :                 if (newcap <= BUN_MAX - 10)
     491           0 :                         newcap += 10;
     492             :                 else
     493             :                         newcap = BUN_MAX;
     494             :         }
     495       21515 :         if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
     496           2 :                 newcap = (newcap + 31) & ~(BUN)31;
     497             :         return newcap;
     498             : }
     499             : 
     500             : /*
     501             :  * The routine should ensure that the BAT keeps its location in the
     502             :  * BAT buffer.
     503             :  *
     504             :  * Overflow in the other heaps are dealt with in the atom routines.
     505             :  * Here we merely copy their references into the new administration
     506             :  * space.
     507             :  */
     508             : gdk_return
     509      181790 : BATextend(BAT *b, BUN newcap)
     510             : {
     511      181790 :         size_t theap_size;
     512      181790 :         gdk_return rc = GDK_SUCCEED;
     513             : 
     514      181790 :         assert(newcap <= BUN_MAX);
     515      181790 :         BATcheck(b, GDK_FAIL);
     516             :         /*
     517             :          * The main issue is to properly predict the new BAT size.
     518             :          * storage overflow. The assumption taken is that capacity
     519             :          * overflow is rare. It is changed only when the position of
     520             :          * the next available BUN surpasses the free area marker.  Be
     521             :          * aware that the newcap should be greater than the old value,
     522             :          * otherwise you may easily corrupt the administration of
     523             :          * malloc.
     524             :          */
     525      181790 :         if (newcap <= BATcapacity(b)) {
     526             :                 return GDK_SUCCEED;
     527             :         }
     528             : 
     529       31977 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
     530        1098 :                 newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
     531        1098 :                 theap_size = (size_t) (newcap / 8); /* in bytes */
     532             :         } else {
     533       30879 :                 theap_size = (size_t) newcap << b->tshift;
     534             :         }
     535             : 
     536       31977 :         MT_lock_set(&b->theaplock);
     537       32172 :         if (b->theap->base) {
     538       32172 :                 TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
     539             :                           b->theap->filename, b->theap->size, theap_size);
     540       32172 :                 rc = HEAPgrow(&b->theap, theap_size, b->batRestricted == BAT_READ);
     541       32227 :                 if (rc == GDK_SUCCEED)
     542       32227 :                         b->batCapacity = newcap;
     543             :         } else {
     544           0 :                 b->batCapacity = newcap;
     545             :         }
     546       32227 :         MT_lock_unset(&b->theaplock);
     547             : 
     548       32224 :         return rc;
     549             : }
     550             : 
     551             : 
     552             : 
     553             : /*
     554             :  * @+ BAT destruction
     555             :  * BATclear quickly removes all elements from a BAT. It must respect
     556             :  * the transaction rules; so stable elements must be moved to the
     557             :  * "deleted" section of the BAT (they cannot be fully deleted
     558             :  * yet). For the elements that really disappear, we must free
     559             :  * heapspace. As an optimization, in the case of no stable elements, we quickly empty
     560             :  * the heaps by copying a standard small empty image over them.
     561             :  */
     562             : gdk_return
     563         423 : BATclear(BAT *b, bool force)
     564             : {
     565         423 :         BUN p, q;
     566             : 
     567         423 :         BATcheck(b, GDK_FAIL);
     568             : 
     569         423 :         if (!force && b->batInserted > 0) {
     570           0 :                 GDKerror("cannot clear committed BAT\n");
     571           0 :                 return GDK_FAIL;
     572             :         }
     573             : 
     574         423 :         TRC_DEBUG(ALGO, ALGOBATFMT "\n", ALGOBATPAR(b));
     575             : 
     576             :         /* kill all search accelerators */
     577         423 :         HASHdestroy(b);
     578         423 :         IMPSdestroy(b);
     579         423 :         OIDXdestroy(b);
     580         423 :         STRMPdestroy(b);
     581         423 :         RTREEdestroy(b);
     582         423 :         PROPdestroy(b);
     583             : 
     584         423 :         bat tvp = 0;
     585             : 
     586             :         /* we must dispose of all inserted atoms */
     587         423 :         MT_lock_set(&b->theaplock);
     588         423 :         if (force && BATatoms[b->ttype].atomDel == NULL) {
     589         416 :                 assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
     590             :                 /* no stable elements: we do a quick heap clean */
     591             :                 /* need to clean heap which keeps data even though the
     592             :                    BUNs got removed. This means reinitialize when
     593             :                    free > 0
     594             :                 */
     595         416 :                 if (b->tvheap && b->tvheap->free > 0) {
     596          21 :                         Heap *th = GDKmalloc(sizeof(Heap));
     597             : 
     598          21 :                         if (th == NULL) {
     599           0 :                                 MT_lock_unset(&b->theaplock);
     600           0 :                                 return GDK_FAIL;
     601             :                         }
     602          21 :                         *th = (Heap) {
     603          21 :                                 .farmid = b->tvheap->farmid,
     604          21 :                                 .parentid = b->tvheap->parentid,
     605             :                                 .dirty = true,
     606          21 :                                 .hasfile = b->tvheap->hasfile,
     607             :                                 .refs = ATOMIC_VAR_INIT(1),
     608             :                         };
     609          21 :                         strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
     610          21 :                         if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
     611           0 :                                 MT_lock_unset(&b->theaplock);
     612           0 :                                 return GDK_FAIL;
     613             :                         }
     614          21 :                         tvp = b->tvheap->parentid;
     615          21 :                         HEAPdecref(b->tvheap, false);
     616          21 :                         b->tvheap = th;
     617             :                 }
     618             :         } else {
     619             :                 /* do heap-delete of all inserted atoms */
     620           7 :                 void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
     621             : 
     622             :                 /* TYPE_str has no del method, so we shouldn't get here */
     623           7 :                 assert(tatmdel == NULL || b->twidth == sizeof(var_t));
     624           0 :                 if (tatmdel) {
     625           0 :                         BATiter bi = bat_iterator_nolock(b);
     626             : 
     627           0 :                         for (p = b->batInserted, q = BATcount(b); p < q; p++)
     628           0 :                                 (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
     629           0 :                         b->tvheap->dirty = true;
     630             :                 }
     631             :         }
     632             : 
     633         423 :         b->batInserted = 0;
     634         423 :         b->batCount = 0;
     635         423 :         if (b->ttype == TYPE_void)
     636           0 :                 b->batCapacity = 0;
     637         423 :         b->theap->free = 0;
     638         423 :         BAThseqbase(b, 0);
     639         423 :         BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
     640         423 :         b->theap->dirty = true;
     641         423 :         b->tnonil = true;
     642         423 :         b->tnil = false;
     643         423 :         b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
     644         423 :         b->tnosorted = b->tnorevsorted = 0;
     645         423 :         b->tkey = true;
     646         423 :         b->tnokey[0] = b->tnokey[1] = 0;
     647         423 :         b->tminpos = b->tmaxpos = BUN_NONE;
     648         423 :         b->tunique_est = 0;
     649         423 :         MT_lock_unset(&b->theaplock);
     650         423 :         if (tvp != 0 && tvp != b->batCacheid)
     651           0 :                 BBPrelease(tvp);
     652             :         return GDK_SUCCEED;
     653             : }
     654             : 
     655             : /* free a cached BAT; leave the bat descriptor cached */
     656             : void
     657      566548 : BATfree(BAT *b)
     658             : {
     659      566548 :         if (b == NULL)
     660             :                 return;
     661             : 
     662             :         /* deallocate all memory for a bat */
     663      566548 :         MT_rwlock_rdlock(&b->thashlock);
     664      566548 :         BUN nunique = BUN_NONE;
     665      566548 :         if (b->thash && b->thash != (Hash *) 1) {
     666        1980 :                 nunique = b->thash->nunique;
     667             :         }
     668      566548 :         MT_rwlock_rdunlock(&b->thashlock);
     669      566548 :         HASHfree(b);
     670      566548 :         IMPSfree(b);
     671      566548 :         OIDXfree(b);
     672      566548 :         STRMPfree(b);
     673      566548 :         RTREEfree(b);
     674      566548 :         MT_lock_set(&b->theaplock);
     675      566548 :         if (nunique != BUN_NONE) {
     676        1980 :                 b->tunique_est = (double) nunique;
     677             :         }
     678             :         /* wait until there are no other references to the heap; a
     679             :          * reference is possible in e.g. BBPsync that uses a
     680             :          * bat_iterator directly on the BBP_desc, i.e. without fix */
     681      566548 :         while (b->theap && (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1) {
     682           0 :                 MT_lock_unset(&b->theaplock);
     683           0 :                 MT_sleep_ms(1);
     684      566548 :                 MT_lock_set(&b->theaplock);
     685             :         }
     686      566548 :         if (b->theap) {
     687      566548 :                 assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) == 1);
     688      566548 :                 assert(b->theap->parentid == b->batCacheid);
     689      566548 :                 HEAPfree(b->theap, false);
     690             :         }
     691             :         /* wait until there are no other references to the heap; a
     692             :          * reference is possible in e.g. BBPsync that uses a
     693             :          * bat_iterator directly on the BBP_desc, i.e. without fix */
     694       30226 :         while (b->tvheap && (ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) > 1) {
     695           0 :                 MT_lock_unset(&b->theaplock);
     696           0 :                 MT_sleep_ms(1);
     697      566548 :                 MT_lock_set(&b->theaplock);
     698             :         }
     699      566548 :         if (b->tvheap) {
     700       30226 :                 assert((ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) == 1);
     701       30226 :                 assert(b->tvheap->parentid == b->batCacheid);
     702       30226 :                 HEAPfree(b->tvheap, false);
     703             :         }
     704      566548 :         MT_lock_unset(&b->theaplock);
     705             : }
     706             : 
     707             : /* free a cached BAT descriptor */
     708             : void
     709    23525006 : BATdestroy(BAT *b)
     710             : {
     711    23525006 :         if (b->tvheap) {
     712       29066 :                 GDKfree(b->tvheap);
     713             :         }
     714    23525006 :         PROPdestroy_nolock(b);
     715    23507511 :         MT_lock_destroy(&b->theaplock);
     716    23528620 :         MT_lock_destroy(&b->batIdxLock);
     717    23537003 :         MT_rwlock_destroy(&b->thashlock);
     718    23536905 :         if (b->theap) {
     719      562694 :                 GDKfree(b->theap);
     720             :         }
     721    23546746 :         if (b->oldtail) {
     722           0 :                 ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
     723             :                 /* the bat has not been committed, so we cannot remove
     724             :                  * the old tail file */
     725           0 :                 HEAPdecref(b->oldtail, false);
     726           0 :                 b->oldtail = NULL;
     727             :         }
     728    23546746 :         *b = (BAT) {
     729             :                 .batCacheid = 0,
     730             :         };
     731    23546746 : }
     732             : 
     733             : /*
     734             :  * @+ BAT copying
     735             :  *
     736             :  * BAT copying is an often used operation. So it deserves attention.
     737             :  * When making a copy of a BAT, the following aspects are of
     738             :  * importance:
     739             :  *
     740             :  * - the requested head and tail types. The purpose of the copy may be
     741             :  *   to slightly change these types (e.g. void <-> oid). We may also
     742             :  *   remap between types as long as they share the same
     743             :  *   ATOMstorage(type), i.e. the types have the same physical
     744             :  *   implementation. We may even want to allow 'dirty' trick such as
     745             :  *   viewing a flt-column suddenly as int.
     746             :  *
     747             :  *   To allow such changes, the desired column-types is a
     748             :  *   parameter of COLcopy.
     749             :  *
     750             :  * - access mode. If we want a read-only copy of a read-only BAT, a
     751             :  *   VIEW may do (in this case, the user may be after just an
     752             :  *   independent BAT header and id). This is indicated by the
     753             :  *   parameter (writable = FALSE).
     754             :  *
     755             :  *   In other cases, we really want an independent physical copy
     756             :  *   (writable = TRUE).  Changing the mode to BAT_WRITE will be a
     757             :  *   zero-cost operation if the BAT was copied with (writable = TRUE).
     758             :  *
     759             :  * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
     760             :  * TRUE).
     761             :  *
     762             :  * In these cases the copy becomes a logical view on the original,
     763             :  * which ensures that the original cannot be modified or destroyed
     764             :  * (which could affect the shared heaps).
     765             :  */
     766             : static bool
     767         431 : wrongtype(int t1, int t2)
     768             : {
     769             :         /* check if types are compatible. be extremely forgiving */
     770         431 :         if (t1 != TYPE_void) {
     771         431 :                 t1 = ATOMtype(ATOMstorage(t1));
     772         431 :                 t2 = ATOMtype(ATOMstorage(t2));
     773         431 :                 if (t1 != t2) {
     774         364 :                         if (ATOMvarsized(t1) ||
     775         364 :                             ATOMvarsized(t2) ||
     776         364 :                             t1 == TYPE_msk || t2 == TYPE_msk ||
     777         364 :                             ATOMsize(t1) != ATOMsize(t2))
     778           0 :                                 return true;
     779             :                 }
     780             :         }
     781             :         return false;
     782             : }
     783             : 
     784             : /*
     785             :  * There are four main implementation cases:
     786             :  * (1) we are allowed to return a view (zero effort),
     787             :  * (2) the result is void,void (zero effort),
     788             :  * (3) we can copy the heaps (memcopy, or even VM page sharing)
     789             :  * (4) we must insert BUN-by-BUN into the result (fallback)
     790             :  * The latter case is still optimized for the case that the result
     791             :  * is bat[void,T] for a simple fixed-size type T. In that case we
     792             :  * do inline array[T] inserts.
     793             :  */
     794             : BAT *
     795       48301 : COLcopy(BAT *b, int tt, bool writable, role_t role)
     796             : {
     797       48301 :         bool slowcopy = false;
     798       48301 :         BAT *bn = NULL;
     799       48301 :         BATiter bi;
     800       48301 :         char strhash[GDK_STRHASHSIZE];
     801             : 
     802       48301 :         BATcheck(b, NULL);
     803             : 
     804             :         /* maybe a bit ugly to change the requested bat type?? */
     805       48301 :         if (b->ttype == TYPE_void && !writable)
     806       48301 :                 tt = TYPE_void;
     807             : 
     808       48301 :         if (tt != b->ttype && wrongtype(tt, b->ttype)) {
     809           0 :                 GDKerror("wrong tail-type requested\n");
     810           0 :                 return NULL;
     811             :         }
     812             : 
     813             :         /* in case of a string bat, we save the string heap hash table
     814             :          * while we have the lock so that we can restore it in the copy;
     815             :          * this is because during our operation, a parallel thread could
     816             :          * be adding strings to the vheap which would modify the hash
     817             :          * table and that would result in buckets containing values
     818             :          * beyond the original vheap that we're copying */
     819       48301 :         MT_lock_set(&b->theaplock);
     820       48336 :         BAT *pb = NULL, *pvb = NULL;
     821       48336 :         if (b->theap->parentid != b->batCacheid) {
     822       10791 :                 pb = BBP_desc(b->theap->parentid);
     823       10791 :                 MT_lock_set(&pb->theaplock);
     824             :         }
     825       48343 :         if (b->tvheap &&
     826       16448 :             b->tvheap->parentid != b->batCacheid &&
     827       11499 :             b->tvheap->parentid != b->theap->parentid) {
     828       10834 :                 pvb = BBP_desc(b->tvheap->parentid);
     829       10834 :                 MT_lock_set(&pvb->theaplock);
     830             :         }
     831       48341 :         bi = bat_iterator_nolock(b);
     832       48341 :         if (ATOMstorage(b->ttype) == TYPE_str && b->tvheap->free >= GDK_STRHASHSIZE)
     833       12195 :                 memcpy(strhash, b->tvheap->base, GDK_STRHASHSIZE);
     834             : 
     835       48341 :         bat_iterator_incref(&bi);
     836       48340 :         if (pvb)
     837       10838 :                 MT_lock_unset(&pvb->theaplock);
     838       48341 :         if (pb)
     839       10791 :                 MT_lock_unset(&pb->theaplock);
     840       48347 :         MT_lock_unset(&b->theaplock);
     841             : 
     842             :         /* first try case (1); create a view, possibly with different
     843             :          * atom-types */
     844       48333 :         if (!writable &&
     845       48333 :             role == TRANSIENT &&
     846       20283 :             bi.restricted == BAT_READ &&
     847       16159 :             ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
     848       16159 :             (bi.h == NULL ||
     849       16159 :              bi.h->parentid == b->batCacheid ||
     850        3624 :              BBP_desc(bi.h->parentid)->batRestricted == BAT_READ)) {
     851       16159 :                 bn = VIEWcreate(b->hseqbase, b, 0, BUN_MAX);
     852       16167 :                 if (bn == NULL) {
     853           0 :                         goto bunins_failed;
     854             :                 }
     855       16167 :                 if (tt != bn->ttype) {
     856          64 :                         bn->ttype = tt;
     857          64 :                         if (bn->tvheap && !ATOMvarsized(tt)) {
     858           0 :                                 if (bn->tvheap->parentid != bn->batCacheid)
     859           0 :                                         BBPrelease(bn->tvheap->parentid);
     860           0 :                                 HEAPdecref(bn->tvheap, false);
     861           0 :                                 bn->tvheap = NULL;
     862             :                         }
     863          64 :                         bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
     864             :                 }
     865       16167 :                 bat_iterator_end(&bi);
     866       16167 :                 return bn;
     867             :         } else {
     868             :                 /* check whether we need case (4); BUN-by-BUN copy (by
     869             :                  * setting slowcopy to true) */
     870       32174 :                 if (ATOMsize(tt) != ATOMsize(bi.type)) {
     871             :                         /* oops, void materialization */
     872             :                         slowcopy = true;
     873       31808 :                 } else if (bi.h && bi.h->parentid != b->batCacheid &&
     874        7166 :                            BATcapacity(BBP_desc(bi.h->parentid)) > bi.count + bi.count) {
     875             :                         /* reduced slice view: do not copy too much
     876             :                          * garbage */
     877             :                         slowcopy = true;
     878       24832 :                 } else if (bi.vh && bi.vh->parentid != b->batCacheid &&
     879       10264 :                            BATcount(BBP_desc(bi.vh->parentid)) > bi.count + bi.count) {
     880             :                         /* reduced vheap view: do not copy too much
     881             :                          * garbage; this really is a heuristic since the
     882             :                          * vheap could be used completely, even if the
     883             :                          * offset heap is only (less than) half the size
     884             :                          * of the parent's offset heap */
     885        9529 :                         slowcopy = true;
     886             :                 }
     887             : 
     888       32174 :                 bn = COLnew2(b->hseqbase, tt, bi.count, role, bi.width);
     889       32141 :                 if (bn == NULL) {
     890           0 :                         goto bunins_failed;
     891             :                 }
     892       32141 :                 if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
     893             :                         /* this combination can happen since the last
     894             :                          * argument of COLnew2 not being zero triggers a
     895             :                          * skip in the allocation of the tvheap */
     896       12434 :                         if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
     897           0 :                                 goto bunins_failed;
     898             :                         }
     899             :                 }
     900             : 
     901       32142 :                 if (tt == TYPE_void) {
     902             :                         /* case (2): a void,void result => nothing to
     903             :                          * copy! */
     904        1128 :                         bn->theap->free = 0;
     905       31014 :                 } else if (!slowcopy) {
     906             :                         /* case (3): just copy the heaps */
     907       21503 :                         if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
     908           0 :                                 goto bunins_failed;
     909             :                         }
     910       21514 :                         memcpy(bn->theap->base, bi.base, bi.hfree);
     911       21514 :                         bn->theap->free = bi.hfree;
     912       21514 :                         bn->theap->dirty = true;
     913       21514 :                         if (bn->tvheap) {
     914        9549 :                                 memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
     915        9549 :                                 bn->tvheap->free = bi.vhfree;
     916        9549 :                                 bn->tvheap->dirty = true;
     917        9549 :                                 bn->tascii = bi.ascii;
     918        9549 :                                 if (ATOMstorage(b->ttype) == TYPE_str && bi.vhfree >= GDK_STRHASHSIZE)
     919        8718 :                                         memcpy(bn->tvheap->base, strhash, GDK_STRHASHSIZE);
     920             :                         }
     921             : 
     922             :                         /* make sure we use the correct capacity */
     923       21514 :                         if (ATOMstorage(bn->ttype) == TYPE_msk)
     924           0 :                                 bn->batCapacity = (BUN) (bn->theap->size * 8);
     925       21514 :                         else if (bn->ttype)
     926       21514 :                                 bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
     927             :                         else
     928           0 :                                 bn->batCapacity = 0;
     929       19035 :                 } else if (tt != TYPE_void || ATOMextern(tt)) {
     930             :                         /* case (4): one-by-one BUN insert (really slow) */
     931        9511 :                         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     932             : 
     933     7775208 :                         TIMEOUT_LOOP_IDX_DECL(p, bi.count, qry_ctx) {
     934     7755705 :                                 const void *t = BUNtail(bi, p);
     935             : 
     936     7750209 :                                 if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
     937           0 :                                         goto bunins_failed;
     938             :                                 }
     939             :                         }
     940        9529 :                         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
     941        9524 :                         bn->theap->dirty |= bi.count > 0;
     942             :                 } else if (tt != TYPE_void && bi.type == TYPE_void) {
     943             :                         /* case (4): optimized for unary void
     944             :                          * materialization */
     945             :                         oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
     946             :                         const oid inc = !is_oid_nil(cur);
     947             : 
     948             :                         bn->theap->free = bi.count * sizeof(oid);
     949             :                         bn->theap->dirty |= bi.count > 0;
     950             :                         for (BUN p = 0; p < bi.count; p++) {
     951             :                                 dst[p] = cur;
     952             :                                 cur += inc;
     953             :                         }
     954             :                 } else if (ATOMstorage(bi.type) == TYPE_msk) {
     955             :                         /* convert number of bits to number of bytes,
     956             :                          * and round the latter up to a multiple of
     957             :                          * 4 (copy in units of 4 bytes) */
     958             :                         bn->theap->free = ((bi.count + 31) / 32) * 4;
     959             :                         bn->theap->dirty |= bi.count > 0;
     960             :                         memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
     961             :                 } else {
     962             :                         /* case (4): optimized for simple array copy */
     963             :                         bn->theap->free = bi.count << bn->tshift;
     964             :                         bn->theap->dirty |= bi.count > 0;
     965             :                         memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
     966             :                 }
     967             :                 /* copy all properties (size+other) from the source bat */
     968       32166 :                 BATsetcount(bn, bi.count);
     969             :         }
     970             :         /* set properties (note that types may have changed in the copy) */
     971       63949 :         if (ATOMtype(tt) == ATOMtype(bi.type)) {
     972       32155 :                 if (ATOMtype(tt) == TYPE_oid) {
     973        8025 :                         BATtseqbase(bn, bi.tseq);
     974             :                 } else {
     975       24130 :                         BATtseqbase(bn, oid_nil);
     976             :                 }
     977       32102 :                 BATkey(bn, bi.key);
     978       32091 :                 bn->tsorted = bi.sorted;
     979       32091 :                 bn->trevsorted = bi.revsorted;
     980       32091 :                 bn->tnorevsorted = bi.norevsorted;
     981       32091 :                 if (bi.nokey[0] != bi.nokey[1]) {
     982        2685 :                         bn->tnokey[0] = bi.nokey[0];
     983        2685 :                         bn->tnokey[1] = bi.nokey[1];
     984             :                 } else {
     985       29406 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     986             :                 }
     987       32091 :                 bn->tnosorted = bi.nosorted;
     988       32091 :                 bn->tnonil = bi.nonil;
     989       32091 :                 bn->tnil = bi.nil;
     990       32091 :                 bn->tminpos = bi.minpos;
     991       32091 :                 bn->tmaxpos = bi.maxpos;
     992       32091 :                 bn->tunique_est = bi.unique_est;
     993           3 :         } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
     994           3 :                    ATOMcompare(tt) == ATOMcompare(b->ttype)) {
     995           3 :                 BUN h = bi.count;
     996           3 :                 bn->tsorted = bi.sorted;
     997           3 :                 bn->trevsorted = bi.revsorted;
     998           3 :                 BATkey(bn, bi.key);
     999           3 :                 bn->tnonil = bi.nonil;
    1000           3 :                 bn->tnil = bi.nil;
    1001           3 :                 if (bi.nosorted > 0 && bi.nosorted < h)
    1002           1 :                         bn->tnosorted = bi.nosorted;
    1003             :                 else
    1004           2 :                         bn->tnosorted = 0;
    1005           3 :                 if (bi.norevsorted > 0 && bi.norevsorted < h)
    1006           3 :                         bn->tnorevsorted = bi.norevsorted;
    1007             :                 else
    1008           0 :                         bn->tnorevsorted = 0;
    1009           3 :                 if (bi.nokey[0] < h &&
    1010           3 :                     bi.nokey[1] < h &&
    1011             :                     bi.nokey[0] != bi.nokey[1]) {
    1012           0 :                         bn->tnokey[0] = bi.nokey[0];
    1013           0 :                         bn->tnokey[1] = bi.nokey[1];
    1014             :                 } else {
    1015           3 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
    1016             :                 }
    1017           3 :                 bn->tminpos = bi.minpos;
    1018           3 :                 bn->tmaxpos = bi.maxpos;
    1019           3 :                 bn->tunique_est = bi.unique_est;
    1020             :         } else {
    1021           0 :                 bn->tsorted = bn->trevsorted = false; /* set based on count later */
    1022           0 :                 bn->tnonil = bn->tnil = false;
    1023           0 :                 bn->tkey = false;
    1024           0 :                 bn->tnosorted = bn->tnorevsorted = 0;
    1025           0 :                 bn->tnokey[0] = bn->tnokey[1] = 0;
    1026             :         }
    1027       32094 :         if (BATcount(bn) <= 1) {
    1028        5984 :                 bn->tsorted = ATOMlinear(b->ttype);
    1029        5984 :                 bn->trevsorted = ATOMlinear(b->ttype);
    1030        5984 :                 bn->tkey = true;
    1031             :         }
    1032       32094 :         bat_iterator_end(&bi);
    1033       32163 :         if (!writable)
    1034        4116 :                 bn->batRestricted = BAT_READ;
    1035       32163 :         TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
    1036             :                   ALGOBATPAR(b), ALGOBATPAR(bn));
    1037             :         return bn;
    1038           0 :       bunins_failed:
    1039           0 :         bat_iterator_end(&bi);
    1040           0 :         BBPreclaim(bn);
    1041             :         return NULL;
    1042             : }
    1043             : 
    1044             : /* Append an array of values of length count to the bat.  For
    1045             :  * fixed-sized values, `values' is an array of values, for
    1046             :  * variable-sized values, `values' is an array of pointers to values.
    1047             :  * If values equals NULL, count times nil will be appended. */
    1048             : gdk_return
    1049    54569071 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
    1050             : {
    1051    54569071 :         BUN p;
    1052    54569071 :         BUN nunique = 0;
    1053             : 
    1054    54569071 :         BATcheck(b, GDK_FAIL);
    1055             : 
    1056    54569071 :         assert(!VIEWtparent(b));
    1057             : 
    1058    54569071 :         if (count == 0)
    1059             :                 return GDK_SUCCEED;
    1060             : 
    1061    54565404 :         TRC_DEBUG(ALGO, ALGOBATFMT " appending " BUNFMT " values%s\n", ALGOBATPAR(b), count, values ? "" : " (all nil)");
    1062             : 
    1063    54556875 :         p = BATcount(b);                /* insert at end */
    1064    54556875 :         if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
    1065           0 :                 GDKerror("bat too large\n");
    1066           0 :                 return GDK_FAIL;
    1067             :         }
    1068             : 
    1069    54556875 :         ALIGNapp(b, force, GDK_FAIL);
    1070             :         /* load hash so that we can maintain it */
    1071    54599167 :         (void) BATcheckhash(b);
    1072             : 
    1073    54593880 :         if (b->ttype == TYPE_void && BATtdense(b)) {
    1074           0 :                 const oid *ovals = values;
    1075           0 :                 bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
    1076           0 :                 if (ovals) {
    1077           0 :                         for (BUN i = 1; dense && i < count; i++) {
    1078           0 :                                 dense = ovals[i - 1] + 1 == ovals[i];
    1079             :                         }
    1080             :                 }
    1081           0 :                 if (dense) {
    1082           0 :                         MT_lock_set(&b->theaplock);
    1083           0 :                         if (b->batCount == 0)
    1084           0 :                                 b->tseqbase = ovals ? ovals[0] : oid_nil;
    1085           0 :                         BATsetcount(b, BATcount(b) + count);
    1086           0 :                         MT_lock_unset(&b->theaplock);
    1087           0 :                         return GDK_SUCCEED;
    1088             :                 } else {
    1089             :                         /* we need to materialize b; allocate enough capacity */
    1090           0 :                         if (BATmaterialize(b, BATcount(b) + count) != GDK_SUCCEED)
    1091             :                                 return GDK_FAIL;
    1092             :                 }
    1093             :         }
    1094             : 
    1095    54593880 :         if (unshare_varsized_heap(b) != GDK_SUCCEED) {
    1096             :                 return GDK_FAIL;
    1097             :         }
    1098             : 
    1099    54578831 :         if (BATcount(b) + count > BATcapacity(b)) {
    1100             :                 /* if needed space exceeds a normal growth extend just
    1101             :                  * with what's needed */
    1102       10606 :                 BUN ncap = BATcount(b) + count;
    1103       10606 :                 BUN grows = BATgrows(b);
    1104             : 
    1105       10602 :                 if (ncap > grows)
    1106             :                         grows = ncap;
    1107       10602 :                 gdk_return rc = BATextend(b, grows);
    1108       10611 :                 if (rc != GDK_SUCCEED)
    1109             :                         return rc;
    1110             :         }
    1111             : 
    1112    54578836 :         const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
    1113    54578836 :         MT_lock_set(&b->theaplock);
    1114    54601371 :         BATiter bi = bat_iterator_nolock(b);
    1115    54601371 :         const ValRecord *prop;
    1116    54601371 :         ValRecord minprop, maxprop;
    1117    54601371 :         const void *minbound = NULL, *maxbound = NULL;
    1118    54601382 :         if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
    1119          11 :             VALcopy(&minprop, prop) != NULL)
    1120          11 :                 minbound = VALptr(&minprop);
    1121    54503403 :         if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
    1122          10 :             VALcopy(&maxprop, prop) != NULL)
    1123          10 :                 maxbound = VALptr(&maxprop);
    1124    54563067 :         const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
    1125    54593273 :         MT_lock_unset(&b->theaplock);
    1126    54623034 :         MT_rwlock_wrlock(&b->thashlock);
    1127    54622632 :         if (values && b->ttype) {
    1128    54614163 :                 int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
    1129    54614163 :                 const void *atomnil = ATOMnilptr(b->ttype);
    1130    54614163 :                 const void *minvalp = NULL, *maxvalp = NULL;
    1131    54614163 :                 if (b->tvheap) {
    1132    21995453 :                         if (bi.minpos != BUN_NONE)
    1133    19110418 :                                 minvalp = BUNtvar(bi, bi.minpos);
    1134    21989834 :                         if (bi.maxpos != BUN_NONE)
    1135    19105134 :                                 maxvalp = BUNtvar(bi, bi.maxpos);
    1136    21993494 :                         const void *vbase = b->tvheap->base;
    1137    44000464 :                         for (BUN i = 0; i < count; i++) {
    1138    22011887 :                                 t = ((void **) values)[i];
    1139    22011887 :                                 bool isnil = atomcmp(t, atomnil) == 0;
    1140    22006536 :                                 gdk_return rc;
    1141    22006536 :                                 if (notnull && isnil) {
    1142           0 :                                         assert(0);
    1143             :                                         GDKerror("NULL value not within bounds\n");
    1144             :                                         rc = GDK_FAIL;
    1145    22006536 :                                 } else if (minbound &&
    1146    22006536 :                                            !isnil &&
    1147           0 :                                            atomcmp(t, minbound) < 0) {
    1148           0 :                                         assert(0);
    1149             :                                         GDKerror("value not within bounds\n");
    1150             :                                         rc = GDK_FAIL;
    1151    22006536 :                                 } else if (maxbound &&
    1152           0 :                                            !isnil &&
    1153           0 :                                            atomcmp(t, maxbound) >= 0) {
    1154           0 :                                         assert(0);
    1155             :                                         GDKerror("value not within bounds\n");
    1156             :                                         rc = GDK_FAIL;
    1157             :                                 } else {
    1158    22006536 :                                         rc = tfastins_nocheckVAR(b, p, t);
    1159             :                                 }
    1160    22006956 :                                 if (rc != GDK_SUCCEED) {
    1161           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1162           0 :                                         if (minbound)
    1163           0 :                                                 VALclear(&minprop);
    1164           0 :                                         if (maxbound)
    1165           0 :                                                 VALclear(&maxprop);
    1166           0 :                                         return rc;
    1167             :                                 }
    1168    22006956 :                                 if (vbase != b->tvheap->base) {
    1169             :                                         /* tvheap changed location, so
    1170             :                                          * pointers may need to be
    1171             :                                          * updated (not if they were
    1172             :                                          * initialized from t below, but
    1173             :                                          * we don't know) */
    1174        3390 :                                         BUN minpos = bi.minpos;
    1175        3390 :                                         BUN maxpos = bi.maxpos;
    1176        3390 :                                         MT_lock_set(&b->theaplock);
    1177        3390 :                                         bi = bat_iterator_nolock(b);
    1178        3390 :                                         MT_lock_unset(&b->theaplock);
    1179        3390 :                                         bi.minpos = minpos;
    1180        3390 :                                         bi.maxpos = maxpos;
    1181        3390 :                                         vbase = b->tvheap->base;
    1182        3390 :                                         if (bi.minpos != BUN_NONE)
    1183        2643 :                                                 minvalp = BUNtvar(bi, bi.minpos);
    1184        3390 :                                         if (bi.maxpos != BUN_NONE)
    1185        2642 :                                                 maxvalp = BUNtvar(bi, bi.maxpos);
    1186             :                                 }
    1187    22006956 :                                 if (!isnil) {
    1188    20333640 :                                         if (p == 0) {
    1189      202996 :                                                 bi.minpos = bi.maxpos = 0;
    1190      202996 :                                                 minvalp = maxvalp = t;
    1191             :                                         } else {
    1192    39195115 :                                                 if (bi.minpos != BUN_NONE &&
    1193    19064641 :                                                     atomcmp(minvalp, t) > 0) {
    1194       82446 :                                                         bi.minpos = p;
    1195       82446 :                                                         minvalp = t;
    1196             :                                                 }
    1197    39195868 :                                                 if (bi.maxpos != BUN_NONE &&
    1198    19065210 :                                                     atomcmp(maxvalp, t) < 0) {
    1199     2247378 :                                                         bi.maxpos = p;
    1200     2247378 :                                                         maxvalp = t;
    1201             :                                                 }
    1202             :                                         }
    1203             :                                 } else {
    1204     1673316 :                                         b->tnil = true;
    1205     1673316 :                                         b->tnonil = false;
    1206             :                                 }
    1207    22006970 :                                 p++;
    1208             :                         }
    1209    21988577 :                         if (minbound)
    1210           0 :                                 VALclear(&minprop);
    1211    21989352 :                         if (maxbound)
    1212           0 :                                 VALclear(&maxprop);
    1213    21985536 :                         if (b->thash) {
    1214        5811 :                                 p -= count;
    1215       11622 :                                 for (BUN i = 0; i < count; i++) {
    1216        5811 :                                         t = ((void **) values)[i];
    1217        5811 :                                         HASHappend_locked(b, p, t);
    1218        5811 :                                         p++;
    1219             :                                 }
    1220        5811 :                                 nunique = b->thash ? b->thash->nunique : 0;
    1221             :                         }
    1222    32618710 :                 } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1223        7881 :                         bi.minpos = bi.maxpos = BUN_NONE;
    1224        7881 :                         minvalp = maxvalp = NULL;
    1225        7881 :                         b->tnil = false;
    1226        7881 :                         b->tnonil = true;
    1227       15762 :                         for (BUN i = 0; i < count; i++) {
    1228        7881 :                                 t = (void *) ((char *) values + (i << b->tshift));
    1229        7881 :                                 mskSetVal(b, p, *(msk *) t);
    1230        7881 :                                 p++;
    1231             :                         }
    1232             :                 } else {
    1233    32610829 :                         if (bi.minpos != BUN_NONE)
    1234    31213789 :                                 minvalp = BUNtloc(bi, bi.minpos);
    1235    32610829 :                         if (bi.maxpos != BUN_NONE)
    1236    31260514 :                                 maxvalp = BUNtloc(bi, bi.maxpos);
    1237    65262012 :                         for (BUN i = 0; i < count; i++) {
    1238    32658782 :                                 t = (void *) ((char *) values + (i << b->tshift));
    1239    32658782 :                                 gdk_return rc = tfastins_nocheckFIX(b, p, t);
    1240    32652170 :                                 if (rc != GDK_SUCCEED) {
    1241           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1242           0 :                                         return rc;
    1243             :                                 }
    1244    32652170 :                                 if (b->thash) {
    1245      482125 :                                         HASHappend_locked(b, p, t);
    1246             :                                 }
    1247    32652170 :                                 if (atomcmp(t, atomnil) != 0) {
    1248    31751015 :                                         if (p == 0) {
    1249      370674 :                                                 bi.minpos = bi.maxpos = 0;
    1250      370674 :                                                 minvalp = maxvalp = t;
    1251             :                                         } else {
    1252    62620632 :                                                 if (bi.minpos != BUN_NONE &&
    1253    31238590 :                                                     atomcmp(minvalp, t) > 0) {
    1254       40184 :                                                         bi.minpos = p;
    1255       40184 :                                                         minvalp = t;
    1256             :                                                 }
    1257    62666736 :                                                 if (bi.maxpos != BUN_NONE &&
    1258    31285456 :                                                     atomcmp(maxvalp, t) < 0) {
    1259     7667580 :                                                         bi.maxpos = p;
    1260     7667580 :                                                         maxvalp = t;
    1261             :                                                 }
    1262             :                                         }
    1263             :                                 } else {
    1264      899229 :                                         b->tnil = true;
    1265      899229 :                                         b->tnonil = false;
    1266             :                                 }
    1267    32651183 :                                 p++;
    1268             :                         }
    1269    32603230 :                         nunique = b->thash ? b->thash->nunique : 0;
    1270             :                 }
    1271             :         } else {
    1272             :                 /* inserting nils, unless it's msk */
    1273       18531 :                 for (BUN i = 0; i < count; i++) {
    1274       10061 :                         gdk_return rc = tfastins_nocheck(b, p, t);
    1275       10062 :                         if (rc != GDK_SUCCEED) {
    1276           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1277           0 :                                 return rc;
    1278             :                         }
    1279       10062 :                         if (b->thash) {
    1280           0 :                                 HASHappend_locked(b, p, t);
    1281             :                         }
    1282       10062 :                         p++;
    1283             :                 }
    1284        8470 :                 nunique = b->thash ? b->thash->nunique : 0;
    1285        8470 :                 b->tnil = b->ttype != TYPE_msk;
    1286        8470 :                 b->tnonil = false;
    1287             :         }
    1288    54605117 :         MT_lock_set(&b->theaplock);
    1289    54582467 :         b->tminpos = bi.minpos;
    1290    54582467 :         b->tmaxpos = bi.maxpos;
    1291    54582467 :         if (count > BATcount(b) / gdk_unique_estimate_keep_fraction)
    1292    15981535 :                 b->tunique_est = 0;
    1293             : 
    1294    54582467 :         if (b->ttype == TYPE_oid) {
    1295             :                 /* spend extra effort on oid (possible candidate list) */
    1296      534652 :                 if (values == NULL || is_oid_nil(((oid *) values)[0])) {
    1297         158 :                         b->tsorted = false;
    1298         158 :                         b->trevsorted = false;
    1299         158 :                         b->tkey = false;
    1300         158 :                         b->tseqbase = oid_nil;
    1301             :                 } else {
    1302      534494 :                         if (b->batCount == 0) {
    1303        6336 :                                 b->tsorted = true;
    1304        6336 :                                 b->trevsorted = true;
    1305        6336 :                                 b->tkey = true;
    1306        6336 :                                 b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
    1307             :                         } else {
    1308      528158 :                                 if (!is_oid_nil(b->tseqbase) &&
    1309      320689 :                                     (count > 1 ||
    1310      320689 :                                      b->tseqbase + b->batCount != ((oid *) values)[0]))
    1311        2368 :                                         b->tseqbase = oid_nil;
    1312      528158 :                                 if (b->tsorted && !is_oid_nil(((oid *) b->theap->base)[b->batCount - 1]) && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
    1313          51 :                                         b->tsorted = false;
    1314          51 :                                         if (b->tnosorted == 0)
    1315          51 :                                                 b->tnosorted = b->batCount;
    1316             :                                 }
    1317      528158 :                                 if (b->trevsorted && !is_oid_nil(((oid *) values)[0]) && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
    1318        5527 :                                         b->trevsorted = false;
    1319        5527 :                                         if (b->tnorevsorted == 0)
    1320        5527 :                                                 b->tnorevsorted = b->batCount;
    1321             :                                 }
    1322      528158 :                                 if (b->tkey) {
    1323      525538 :                                         if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
    1324          17 :                                                 b->tkey = false;
    1325          17 :                                                 if (b->tnokey[1] == 0) {
    1326          17 :                                                         b->tnokey[0] = b->batCount - 1;
    1327          17 :                                                         b->tnokey[1] = b->batCount;
    1328             :                                                 }
    1329      525521 :                                         } else if (!b->tsorted && !b->trevsorted)
    1330          37 :                                                 b->tkey = false;
    1331             :                                 }
    1332             :                         }
    1333      534494 :                         for (BUN i = 1; i < count; i++) {
    1334           0 :                                 if (is_oid_nil(((oid *) values)[i])) {
    1335           0 :                                         b->tsorted = false;
    1336           0 :                                         b->trevsorted = false;
    1337           0 :                                         b->tkey = false;
    1338           0 :                                         b->tseqbase = oid_nil;
    1339           0 :                                         break;
    1340             :                                 }
    1341           0 :                                 if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
    1342           0 :                                         b->tkey = false;
    1343           0 :                                         if (b->tnokey[1] == 0) {
    1344           0 :                                                 b->tnokey[0] = b->batCount + i - 1;
    1345           0 :                                                 b->tnokey[1] = b->batCount + i;
    1346             :                                         }
    1347           0 :                                 } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
    1348           0 :                                         b->tsorted = false;
    1349           0 :                                         if (b->tnosorted == 0)
    1350           0 :                                                 b->tnosorted = b->batCount + i;
    1351           0 :                                         if (!b->trevsorted)
    1352           0 :                                                 b->tkey = false;
    1353             :                                 } else {
    1354           0 :                                         if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
    1355           0 :                                                 b->tseqbase = oid_nil;
    1356           0 :                                         b->trevsorted = false;
    1357           0 :                                         if (b->tnorevsorted == 0)
    1358           0 :                                                 b->tnorevsorted = b->batCount + i;
    1359           0 :                                         if (!b->tsorted)
    1360           0 :                                                 b->tkey = false;
    1361             :                                 }
    1362             :                         }
    1363             :                 }
    1364    54047815 :         } else if (!ATOMlinear(b->ttype)) {
    1365        7881 :                 b->tsorted = b->trevsorted = b->tkey = false;
    1366    54039934 :         } else if (b->batCount == 0) {
    1367      574214 :                 if (values == NULL) {
    1368           0 :                         b->tsorted = b->trevsorted = true;
    1369           0 :                         b->tkey = count == 1;
    1370           0 :                         b->tunique_est = 1;
    1371             :                 } else {
    1372      574214 :                         int c;
    1373      574214 :                         switch (count) {
    1374      573893 :                         case 1:
    1375      573893 :                                 b->tsorted = b->trevsorted = b->tkey = true;
    1376      573893 :                                 b->tunique_est = 1;
    1377      573893 :                                 break;
    1378         122 :                         case 2:
    1379         122 :                                 if (b->tvheap)
    1380          41 :                                         c = ATOMcmp(b->ttype,
    1381             :                                                     ((void **) values)[0],
    1382             :                                                     ((void **) values)[1]);
    1383             :                                 else
    1384          81 :                                         c = ATOMcmp(b->ttype,
    1385             :                                                     values,
    1386             :                                                     (char *) values + b->twidth);
    1387         122 :                                 b->tsorted = c <= 0;
    1388         122 :                                 b->tnosorted = !b->tsorted;
    1389         122 :                                 b->trevsorted = c >= 0;
    1390         122 :                                 b->tnorevsorted = !b->trevsorted;
    1391         122 :                                 b->tkey = c != 0;
    1392         122 :                                 b->tnokey[0] = 0;
    1393         122 :                                 b->tnokey[1] = !b->tkey;
    1394         122 :                                 b->tunique_est = (double) (1 + b->tkey);
    1395         122 :                                 break;
    1396         199 :                         default:
    1397         199 :                                 b->tsorted = b->trevsorted = b->tkey = false;
    1398         199 :                                 break;
    1399             :                         }
    1400             :                 }
    1401    53465720 :         } else if (b->batCount == 1 && count == 1) {
    1402      427417 :                 bi = bat_iterator_nolock(b);
    1403      427417 :                 t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
    1404      427417 :                 if (values != NULL) {
    1405      424843 :                         if (b->tvheap)
    1406      159671 :                                 t = ((void **) values)[0];
    1407             :                         else
    1408             :                                 t = values;
    1409             :                 }
    1410      427417 :                 int c = ATOMcmp(b->ttype, BUNtail(bi, 0), t);
    1411      423858 :                 b->tsorted = c <= 0;
    1412      423858 :                 b->tnosorted = !b->tsorted;
    1413      423858 :                 b->trevsorted = c >= 0;
    1414      423858 :                 b->tnorevsorted = !b->trevsorted;
    1415      423858 :                 b->tkey = c != 0;
    1416      423858 :                 b->tnokey[0] = 0;
    1417      423858 :                 b->tnokey[1] = !b->tkey;
    1418      423858 :                 b->tunique_est = (double) (1 + b->tkey);
    1419             :         } else {
    1420    53038303 :                 b->tsorted = b->trevsorted = b->tkey = false;
    1421             :         }
    1422    54578908 :         BATsetcount(b, p);
    1423    54578966 :         if (nunique != 0)
    1424      487936 :                 b->tunique_est = (double) nunique;
    1425    54578966 :         MT_lock_unset(&b->theaplock);
    1426    54616017 :         MT_rwlock_wrunlock(&b->thashlock);
    1427             : 
    1428    54613262 :         IMPSdestroy(b);         /* no support for inserts in imprints yet */
    1429    54622318 :         OIDXdestroy(b);
    1430    54619366 :         STRMPdestroy(b);        /* TODO: use STRMPappendBitstring */
    1431    54613430 :         RTREEdestroy(b);
    1432    54613430 :         return GDK_SUCCEED;
    1433             : }
    1434             : 
    1435             : /* Append a single value to the bat. */
    1436             : gdk_return
    1437    39761957 : BUNappend(BAT *b, const void *t, bool force)
    1438             : {
    1439    39761957 :         return BUNappendmulti(b, b->ttype && b->tvheap ? (const void *) &t : (const void *) t, 1, force);
    1440             : }
    1441             : 
    1442             : gdk_return
    1443           4 : BUNdelete(BAT *b, oid o)
    1444             : {
    1445           4 :         BUN p;
    1446           4 :         BATiter bi = bat_iterator_nolock(b);
    1447           4 :         const void *val;
    1448           4 :         bool locked = false;
    1449           4 :         BUN nunique;
    1450             : 
    1451           4 :         assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
    1452           4 :         if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
    1453             :                 /* value already not there */
    1454             :                 return GDK_SUCCEED;
    1455             :         }
    1456           4 :         assert(BATcount(b) > 0); /* follows from "if" above */
    1457           4 :         p = o - b->hseqbase;
    1458           4 :         if (p < b->batInserted) {
    1459           0 :                 GDKerror("cannot delete committed value\n");
    1460           0 :                 return GDK_FAIL;
    1461             :         }
    1462           4 :         TRC_DEBUG(ALGO, ALGOBATFMT " deleting oid " OIDFMT "\n", ALGOBATPAR(b), o);
    1463             :         /* load hash so that we can maintain it */
    1464           4 :         (void) BATcheckhash(b);
    1465             : 
    1466           4 :         val = BUNtail(bi, p);
    1467             :         /* writing the values should be locked, reading could be done
    1468             :          * unlocked (since we're the only thread that should be changing
    1469             :          * anything) */
    1470           4 :         MT_lock_set(&b->theaplock);
    1471           4 :         if (b->tmaxpos == p)
    1472           1 :                 b->tmaxpos = BUN_NONE;
    1473           4 :         if (b->tminpos == p)
    1474           0 :                 b->tminpos = BUN_NONE;
    1475           4 :         MT_lock_unset(&b->theaplock);
    1476           4 :         nunique = HASHdelete(&bi, p, val);
    1477           4 :         ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
    1478           4 :         if (p != BATcount(b) - 1 &&
    1479           2 :             (b->ttype != TYPE_void || BATtdense(b))) {
    1480             :                 /* replace to-be-delete BUN with last BUN; materialize
    1481             :                  * void column before doing so */
    1482           2 :                 if (b->ttype == TYPE_void &&
    1483           0 :                     BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
    1484             :                         return GDK_FAIL;
    1485           2 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    1486           0 :                         msk mval = mskGetVal(b, BATcount(b) - 1);
    1487           0 :                         assert(b->thash == NULL);
    1488           0 :                         mskSetVal(b, p, mval);
    1489             :                         /* don't leave garbage */
    1490           0 :                         mskClr(b, BATcount(b) - 1);
    1491             :                 } else {
    1492           2 :                         val = Tloc(b, BATcount(b) - 1);
    1493           2 :                         nunique = HASHdelete(&bi, BATcount(b) - 1, val);
    1494           2 :                         memcpy(Tloc(b, p), val, b->twidth);
    1495           2 :                         nunique = HASHinsert(&bi, p, val);
    1496           2 :                         MT_lock_set(&b->theaplock);
    1497           2 :                         locked = true;
    1498           2 :                         if (b->tminpos == BATcount(b) - 1)
    1499           0 :                                 b->tminpos = p;
    1500           2 :                         if (b->tmaxpos == BATcount(b) - 1)
    1501           1 :                                 b->tmaxpos = p;
    1502             :                 }
    1503             :                 /* no longer sorted */
    1504           2 :                 if (!locked) {
    1505           0 :                         MT_lock_set(&b->theaplock);
    1506           0 :                         locked = true;
    1507             :                 }
    1508           2 :                 b->tsorted = b->trevsorted = false;
    1509           2 :                 b->theap->dirty = true;
    1510             :         }
    1511           4 :         if (!locked)
    1512           2 :                 MT_lock_set(&b->theaplock);
    1513           4 :         if (b->tnosorted >= p)
    1514           0 :                 b->tnosorted = 0;
    1515           4 :         if (b->tnorevsorted >= p)
    1516           1 :                 b->tnorevsorted = 0;
    1517           4 :         b->batCount--;
    1518           4 :         if (nunique != 0)
    1519           0 :                 b->tunique_est = (double) nunique;
    1520           4 :         else if (BATcount(b) < gdk_unique_estimate_keep_fraction)
    1521           4 :                 b->tunique_est = 0;
    1522           4 :         if (b->batCount <= 1) {
    1523             :                 /* some trivial properties */
    1524           0 :                 b->tkey = true;
    1525           0 :                 b->tsorted = b->trevsorted = true;
    1526           0 :                 b->tnosorted = b->tnorevsorted = 0;
    1527           0 :                 if (b->batCount == 0) {
    1528           0 :                         b->tnil = false;
    1529           0 :                         b->tnonil = true;
    1530             :                 }
    1531             :         }
    1532           4 :         MT_lock_unset(&b->theaplock);
    1533           4 :         IMPSdestroy(b);
    1534           4 :         OIDXdestroy(b);
    1535           4 :         return GDK_SUCCEED;
    1536             : }
    1537             : 
    1538             : /* @-  BUN replace
    1539             :  * The last operation in this context is BUN replace. It assumes that
    1540             :  * the header denotes a key. The old value association is destroyed
    1541             :  * (if it exists in the first place) and the new value takes its
    1542             :  * place.
    1543             :  *
    1544             :  * In order to make updates on void columns workable; replaces on them
    1545             :  * are always done in-place. Performing them without bun-movements
    1546             :  * greatly simplifies the problem. The 'downside' is that when
    1547             :  * transaction management has to be performed, replaced values should
    1548             :  * be saved explicitly.
    1549             :  */
    1550             : static gdk_return
    1551     1044280 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
    1552             : {
    1553     1044280 :         BUN prv, nxt;
    1554     1044280 :         const void *val;
    1555     1044280 :         int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
    1556     1044280 :         const void *atomnil = ATOMnilptr(b->ttype);
    1557             : 
    1558     1044280 :         MT_lock_set(&b->theaplock);
    1559     1043473 :         BUN last = BATcount(b) - 1;
    1560     1043473 :         BATiter bi = bat_iterator_nolock(b);
    1561             :         /* zap alignment info */
    1562     1043473 :         if (!force && (b->batRestricted != BAT_WRITE ||
    1563      561151 :                        ((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1))) {
    1564           0 :                 MT_lock_unset(&b->theaplock);
    1565           0 :                 GDKerror("access denied to %s, aborting.\n",
    1566             :                          BATgetId(b));
    1567           0 :                 assert(0);
    1568             :                 return GDK_FAIL;
    1569             :         }
    1570     1043473 :         TRC_DEBUG(ALGO, ALGOBATFMT " replacing " BUNFMT " values\n", ALGOBATPAR(b), count);
    1571     1037711 :         if (b->ttype == TYPE_void) {
    1572           0 :                 PROPdestroy(b);
    1573           0 :                 b->tminpos = BUN_NONE;
    1574           0 :                 b->tmaxpos = BUN_NONE;
    1575           0 :                 b->tunique_est = 0.0;
    1576     1037711 :         } else if (count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
    1577      382689 :                 b->tunique_est = 0;
    1578             :         }
    1579     1037711 :         const ValRecord *prop;
    1580     1037711 :         ValRecord minprop, maxprop;
    1581     1037711 :         const void *minbound = NULL, *maxbound = NULL;
    1582     1037711 :         if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
    1583           0 :             VALcopy(&minprop, prop) != NULL)
    1584           0 :                 minbound = VALptr(&minprop);
    1585     1038075 :         if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
    1586           0 :             VALcopy(&maxprop, prop) != NULL)
    1587           0 :                 maxbound = VALptr(&maxprop);
    1588     1038944 :         const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
    1589     1046762 :         MT_lock_unset(&b->theaplock);
    1590             :         /* load hash so that we can maintain it */
    1591     1050835 :         (void) BATcheckhash(b);
    1592     1050402 :         MT_rwlock_wrlock(&b->thashlock);
    1593     2101337 :         for (BUN i = 0; i < count; i++) {
    1594     1051148 :                 BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
    1595     1051101 :                 const void *t = b->ttype && b->tvheap ?
    1596     1190924 :                         ((const void **) values)[i] :
    1597      911372 :                         (const void *) ((const char *) values + (i << b->tshift));
    1598     1051148 :                 bool isnil = atomnil && atomcmp(t, atomnil) == 0;
    1599     1049393 :                 if (notnull && isnil) {
    1600           0 :                         assert(0);
    1601             :                         GDKerror("NULL value not within bounds\n");
    1602             :                         MT_rwlock_wrunlock(&b->thashlock);
    1603             :                         goto bailout;
    1604     1049393 :                 } else if (!isnil &&
    1605           0 :                            ((minbound &&
    1606     1001346 :                              atomcmp(t, minbound) < 0) ||
    1607           0 :                             (maxbound &&
    1608           0 :                              atomcmp(t, maxbound) >= 0))) {
    1609           0 :                         assert(0);
    1610             :                         GDKerror("value not within bounds\n");
    1611             :                         MT_rwlock_wrunlock(&b->thashlock);
    1612             :                         goto bailout;
    1613             :                 }
    1614             : 
    1615             :                 /* retrieve old value, but if this comes from the
    1616             :                  * logger, we need to deal with offsets that point
    1617             :                  * outside of the valid vheap */
    1618     1049036 :                 if (b->ttype == TYPE_void) {
    1619           0 :                         val = BUNtpos(bi, p);
    1620     1049036 :                 } else if (bi.type == TYPE_msk) {
    1621         118 :                         val = BUNtmsk(bi, p);
    1622     1048918 :                 } else if (b->tvheap) {
    1623      138986 :                         size_t off = BUNtvaroff(bi, p);
    1624      139522 :                         if (off < bi.vhfree)
    1625      139696 :                                 val = bi.vh->base + off;
    1626             :                         else
    1627             :                                 val = NULL; /* bad offset */
    1628             :                 } else {
    1629      909932 :                         val = BUNtloc(bi, p);
    1630             :                 }
    1631             : 
    1632     1049746 :                 if (val) {
    1633     1049746 :                         if (atomcmp(val, t) == 0)
    1634      233079 :                                 continue; /* nothing to do */
    1635      817521 :                         if (!isnil &&
    1636      165943 :                             b->tnil &&
    1637      165944 :                             atomcmp(val, atomnil) == 0) {
    1638             :                                 /* if old value is nil and new value
    1639             :                                  * isn't, we're not sure anymore about
    1640             :                                  * the nil property, so we must clear
    1641             :                                  * it */
    1642      165783 :                                 MT_lock_set(&b->theaplock);
    1643      165784 :                                 b->tnil = false;
    1644      165784 :                                 MT_lock_unset(&b->theaplock);
    1645             :                         }
    1646      817525 :                         if (b->ttype != TYPE_void) {
    1647      817050 :                                 if (bi.maxpos != BUN_NONE) {
    1648      526970 :                                         if (!isnil && atomcmp(BUNtail(bi, bi.maxpos), t) < 0) {
    1649             :                                                 /* new value is larger
    1650             :                                                  * than previous
    1651             :                                                  * largest */
    1652       56168 :                                                 bi.maxpos = p;
    1653      470800 :                                         } else if (bi.maxpos == p && atomcmp(BUNtail(bi, bi.maxpos), t) != 0) {
    1654             :                                                 /* old value is equal to
    1655             :                                                  * largest and new value
    1656             :                                                  * is smaller or nil (see
    1657             :                                                  * above), so we don't
    1658             :                                                  * know anymore which is
    1659             :                                                  * the largest */
    1660         499 :                                                 bi.maxpos = BUN_NONE;
    1661             :                                         }
    1662             :                                 }
    1663      817048 :                                 if (bi.minpos != BUN_NONE) {
    1664      366380 :                                         if (!isnil && atomcmp(BUNtail(bi, bi.minpos), t) > 0) {
    1665             :                                                 /* new value is smaller
    1666             :                                                  * than previous
    1667             :                                                  * smallest */
    1668         423 :                                                 bi.minpos = p;
    1669      365957 :                                         } else if (bi.minpos == p && atomcmp(BUNtail(bi, bi.minpos), t) != 0) {
    1670             :                                                 /* old value is equal to
    1671             :                                                  * smallest and new value
    1672             :                                                  * is larger or nil (see
    1673             :                                                  * above), so we don't
    1674             :                                                  * know anymore which is
    1675             :                                                  * the largest */
    1676         618 :                                                 bi.minpos = BUN_NONE;
    1677             :                                         }
    1678             :                                 }
    1679             :                         }
    1680      817523 :                         HASHdelete_locked(&bi, p, val);     /* first delete old value from hash */
    1681             :                 } else {
    1682             :                         /* out of range old value, so the properties and
    1683             :                          * hash cannot be trusted */
    1684           0 :                         PROPdestroy(b);
    1685           0 :                         Hash *hs = b->thash;
    1686           0 :                         if (hs) {
    1687           0 :                                 b->thash = NULL;
    1688           0 :                                 doHASHdestroy(b, hs);
    1689             :                         }
    1690           0 :                         MT_lock_set(&b->theaplock);
    1691           0 :                         bi.minpos = BUN_NONE;
    1692           0 :                         bi.maxpos = BUN_NONE;
    1693           0 :                         b->tunique_est = 0.0;
    1694           0 :                         MT_lock_unset(&b->theaplock);
    1695             :                 }
    1696      816235 :                 OIDXdestroy(b);
    1697      817341 :                 IMPSdestroy(b);
    1698      817866 :                 STRMPdestroy(b);
    1699      817647 :                 RTREEdestroy(b);
    1700             : 
    1701      817727 :                 if (b->tvheap && b->ttype) {
    1702       66818 :                         var_t _d;
    1703       66818 :                         ptr _ptr;
    1704       66818 :                         _ptr = BUNtloc(bi, p);
    1705       66818 :                         switch (b->twidth) {
    1706        8751 :                         case 1:
    1707        8751 :                                 _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
    1708        8751 :                                 break;
    1709       53214 :                         case 2:
    1710       53214 :                                 _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
    1711       53214 :                                 break;
    1712        4853 :                         case 4:
    1713        4853 :                                 _d = (var_t) * (uint32_t *) _ptr;
    1714        4853 :                                 break;
    1715             : #if SIZEOF_VAR_T == 8
    1716           0 :                         case 8:
    1717           0 :                                 _d = (var_t) * (uint64_t *) _ptr;
    1718           0 :                                 break;
    1719             : #endif
    1720             :                         default:
    1721           0 :                                 MT_UNREACHABLE();
    1722             :                         }
    1723       66818 :                         MT_lock_set(&b->theaplock);
    1724       66842 :                         if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
    1725           0 :                                 MT_lock_unset(&b->theaplock);
    1726           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1727           0 :                                 goto bailout;
    1728             :                         }
    1729       66938 :                         MT_lock_unset(&b->theaplock);
    1730       66548 :                         if (b->twidth < SIZEOF_VAR_T &&
    1731       66991 :                             (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
    1732             :                                 /* doesn't fit in current heap, upgrade it */
    1733          84 :                                 if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
    1734           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1735           0 :                                         goto bailout;
    1736             :                                 }
    1737             :                         }
    1738             :                         /* reinitialize iterator after possible heap upgrade */
    1739             :                         {
    1740             :                                 /* save and restore minpos/maxpos */
    1741       66548 :                                 BUN minpos = bi.minpos;
    1742       66548 :                                 BUN maxpos = bi.maxpos;
    1743       66548 :                                 bi = bat_iterator_nolock(b);
    1744       66548 :                                 bi.minpos = minpos;
    1745       66548 :                                 bi.maxpos = maxpos;
    1746             :                         }
    1747       66548 :                         _ptr = BUNtloc(bi, p);
    1748       66548 :                         switch (b->twidth) {
    1749        8676 :                         case 1:
    1750        8676 :                                 * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
    1751        8676 :                                 break;
    1752       53010 :                         case 2:
    1753       53010 :                                 * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
    1754       53010 :                                 break;
    1755        4862 :                         case 4:
    1756        4862 :                                 * (uint32_t *) _ptr = (uint32_t) _d;
    1757        4862 :                                 break;
    1758             : #if SIZEOF_VAR_T == 8
    1759           0 :                         case 8:
    1760           0 :                                 * (uint64_t *) _ptr = (uint64_t) _d;
    1761           0 :                                 break;
    1762             : #endif
    1763             :                         default:
    1764       66548 :                                 MT_UNREACHABLE();
    1765             :                         }
    1766      750909 :                 } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1767         118 :                         mskSetVal(b, p, * (msk *) t);
    1768             :                 } else {
    1769      750791 :                         assert(BATatoms[b->ttype].atomPut == NULL);
    1770      750791 :                         switch (ATOMsize(b->ttype)) {
    1771             :                         case 0:      /* void */
    1772             :                                 break;
    1773       13081 :                         case 1:
    1774       13081 :                                 ((bte *) b->theap->base)[p] = * (bte *) t;
    1775       13081 :                                 break;
    1776        6789 :                         case 2:
    1777        6789 :                                 ((sht *) b->theap->base)[p] = * (sht *) t;
    1778        6789 :                                 break;
    1779      168956 :                         case 4:
    1780      168956 :                                 ((int *) b->theap->base)[p] = * (int *) t;
    1781      168956 :                                 break;
    1782      561965 :                         case 8:
    1783      561965 :                                 ((lng *) b->theap->base)[p] = * (lng *) t;
    1784      561965 :                                 break;
    1785           0 :                         case 16:
    1786             : #ifdef HAVE_HGE
    1787           0 :                                 ((hge *) b->theap->base)[p] = * (hge *) t;
    1788             : #else
    1789             :                                 ((uuid *) b->theap->base)[p] = * (uuid *) t;
    1790             : #endif
    1791           0 :                                 break;
    1792           0 :                         default:
    1793           0 :                                 memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
    1794           0 :                                 break;
    1795             :                         }
    1796             :                 }
    1797             : 
    1798      817457 :                 HASHinsert_locked(&bi, p, t);       /* insert new value into hash */
    1799             : 
    1800      815884 :                 prv = p > 0 ? p - 1 : BUN_NONE;
    1801      815884 :                 nxt = p < last ? p + 1 : BUN_NONE;
    1802             : 
    1803      815884 :                 MT_lock_set(&b->theaplock);
    1804      817036 :                 if (b->tsorted) {
    1805        2956 :                         if (prv != BUN_NONE &&
    1806        1167 :                             atomcmp(t, BUNtail(bi, prv)) < 0) {
    1807          26 :                                 b->tsorted = false;
    1808          26 :                                 b->tnosorted = p;
    1809        2488 :                         } else if (nxt != BUN_NONE &&
    1810         725 :                                    atomcmp(t, BUNtail(bi, nxt)) > 0) {
    1811         648 :                                 b->tsorted = false;
    1812         648 :                                 b->tnosorted = nxt;
    1813        1115 :                         } else if (b->ttype != TYPE_void && BATtdense(b)) {
    1814           0 :                                 if (prv != BUN_NONE &&
    1815           0 :                                     1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
    1816           0 :                                         b->tseqbase = oid_nil;
    1817           0 :                                 } else if (nxt != BUN_NONE &&
    1818           0 :                                            * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
    1819           0 :                                         b->tseqbase = oid_nil;
    1820           0 :                                 } else if (prv == BUN_NONE &&
    1821           0 :                                            nxt == BUN_NONE) {
    1822           0 :                                         b->tseqbase = * (oid *) t;
    1823             :                                 }
    1824             :                         }
    1825      815247 :                 } else if (b->tnosorted >= p)
    1826        3316 :                         b->tnosorted = 0;
    1827      817036 :                 if (b->trevsorted) {
    1828        1377 :                         if (prv != BUN_NONE &&
    1829         406 :                             atomcmp(t, BUNtail(bi, prv)) > 0) {
    1830          52 :                                 b->trevsorted = false;
    1831          52 :                                 b->tnorevsorted = p;
    1832        1057 :                         } else if (nxt != BUN_NONE &&
    1833         138 :                                    atomcmp(t, BUNtail(bi, nxt)) < 0) {
    1834          27 :                                 b->trevsorted = false;
    1835          27 :                                 b->tnorevsorted = nxt;
    1836             :                         }
    1837      816065 :                 } else if (b->tnorevsorted >= p)
    1838        1696 :                         b->tnorevsorted = 0;
    1839      817036 :                 if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
    1840         938 :                         BATkey(b, false);
    1841      816098 :                 } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
    1842         865 :                         b->tnokey[0] = b->tnokey[1] = 0;
    1843      817036 :                 if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
    1844      592867 :                         b->tnonil = t && atomcmp(t, atomnil) != 0;
    1845     1050107 :                 MT_lock_unset(&b->theaplock);
    1846             :         }
    1847     1049518 :         BUN nunique = b->thash ? b->thash->nunique : 0;
    1848     1049518 :         MT_rwlock_wrunlock(&b->thashlock);
    1849     1050693 :         MT_lock_set(&b->theaplock);
    1850     1047877 :         if (nunique != 0)
    1851       21248 :                 b->tunique_est = (double) nunique;
    1852     1047877 :         b->tminpos = bi.minpos;
    1853     1047877 :         b->tmaxpos = bi.maxpos;
    1854     1047877 :         b->theap->dirty = true;
    1855     1047877 :         if (b->tvheap)
    1856      138910 :                 b->tvheap->dirty = true;
    1857     1047877 :         MT_lock_unset(&b->theaplock);
    1858             : 
    1859     1048565 :         return GDK_SUCCEED;
    1860             : 
    1861           0 :   bailout:
    1862           0 :         if (minbound)
    1863           0 :                 VALclear(&minprop);
    1864           0 :         if (maxbound)
    1865           0 :                 VALclear(&maxprop);
    1866             :         return GDK_FAIL;
    1867             : }
    1868             : 
    1869             : /* Replace multiple values given by their positions with the given values. */
    1870             : gdk_return
    1871      564803 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
    1872             : {
    1873      564803 :         BATcheck(b, GDK_FAIL);
    1874             : 
    1875      564803 :         if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
    1876             :                 return GDK_FAIL;
    1877             : 
    1878      564803 :         return BUNinplacemulti(b, positions, values, count, force, false);
    1879             : }
    1880             : 
    1881             : /* Replace multiple values starting from a given position with the given
    1882             :  * values. */
    1883             : gdk_return
    1884      479246 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
    1885             : {
    1886      479246 :         BATcheck(b, GDK_FAIL);
    1887             : 
    1888      479246 :         if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
    1889             :                 return GDK_FAIL;
    1890             : 
    1891      479246 :         return BUNinplacemulti(b, &position, values, count, force, true);
    1892             : }
    1893             : 
    1894             : gdk_return
    1895      564803 : BUNreplace(BAT *b, oid id, const void *t, bool force)
    1896             : {
    1897      564803 :         return BUNreplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &t : t, 1, force);
    1898             : }
    1899             : 
    1900             : /* very much like BUNreplace, but this doesn't make any changes if the
    1901             :  * tail column is void */
    1902             : gdk_return
    1903        3543 : void_inplace(BAT *b, oid id, const void *val, bool force)
    1904             : {
    1905        3543 :         assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
    1906        3543 :         if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
    1907             :                 GDKerror("id out of range\n");
    1908             :                 return GDK_FAIL;
    1909             :         }
    1910        3543 :         if (b->ttype == TYPE_void)
    1911             :                 return GDK_SUCCEED;
    1912        3543 :         return BUNinplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &val : (const void *) val, 1, force, false);
    1913             : }
    1914             : 
    1915             : /*
    1916             :  * @- BUN Lookup
    1917             :  * Location of a BUN using a value should use the available indexes to
    1918             :  * speed up access. If indexes are lacking then a hash index is
    1919             :  * constructed under the assumption that 1) multiple access to the BAT
    1920             :  * can be expected and 2) building the hash is only slightly more
    1921             :  * expensive than the full linear scan.  BUN_NONE is returned if no
    1922             :  * such element could be found.  In those cases where the type is
    1923             :  * known and a hash index is available, one should use the inline
    1924             :  * functions to speed-up processing.
    1925             :  */
    1926             : static BUN
    1927           0 : slowfnd(BAT *b, const void *v)
    1928             : {
    1929           0 :         BATiter bi = bat_iterator(b);
    1930           0 :         BUN p, q;
    1931           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(bi.type);
    1932             : 
    1933           0 :         BATloop(b, p, q) {
    1934           0 :                 if ((*cmp)(v, BUNtail(bi, p)) == 0) {
    1935           0 :                         bat_iterator_end(&bi);
    1936           0 :                         return p;
    1937             :                 }
    1938             :         }
    1939           0 :         bat_iterator_end(&bi);
    1940           0 :         return BUN_NONE;
    1941             : }
    1942             : 
    1943             : static BUN
    1944           0 : mskfnd(BAT *b, msk v)
    1945             : {
    1946           0 :         BUN p, q;
    1947             : 
    1948           0 :         if (v) {
    1949             :                 /* find a 1 value */
    1950           0 :                 for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
    1951           0 :                         if (((uint32_t *) b->theap->base)[p] != 0) {
    1952             :                                 /* there's at least one 1 bit */
    1953           0 :                                 return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
    1954             :                         }
    1955             :                 }
    1956             :         } else {
    1957             :                 /* find a 0 value */
    1958           0 :                 for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
    1959           0 :                         if (((uint32_t *) b->theap->base)[p] != ~0U) {
    1960             :                                 /* there's at least one 0 bit */
    1961           0 :                                 return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
    1962             :                         }
    1963             :                 }
    1964             :         }
    1965             :         return BUN_NONE;
    1966             : }
    1967             : 
    1968             : BUN
    1969     1616083 : BUNfnd(BAT *b, const void *v)
    1970             : {
    1971     1616083 :         BUN r = BUN_NONE;
    1972     1616083 :         BATiter bi;
    1973             : 
    1974     1616083 :         BATcheck(b, BUN_NONE);
    1975     1616083 :         if (!v || BATcount(b) == 0)
    1976             :                 return r;
    1977     1204051 :         if (complex_cand(b)) {
    1978           0 :                 struct canditer ci;
    1979           0 :                 canditer_init(&ci, NULL, b);
    1980           0 :                 return canditer_search(&ci, * (const oid *) v, false);
    1981             :         }
    1982     1204051 :         if (BATtvoid(b))
    1983      675957 :                 return BUNfndVOID(b, v);
    1984      528094 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1985           0 :                 return mskfnd(b, *(msk*)v);
    1986             :         }
    1987      528094 :         if (!BATcheckhash(b)) {
    1988       54078 :                 if (BATordered(b) || BATordered_rev(b))
    1989       53744 :                         return SORTfnd(b, v);
    1990             :         }
    1991      474333 :         if (BAThash(b) == GDK_SUCCEED) {
    1992      474345 :                 bi = bat_iterator(b); /* outside of hashlock */
    1993      474356 :                 MT_rwlock_rdlock(&b->thashlock);
    1994      474356 :                 if (b->thash == NULL) {
    1995           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1996           0 :                         bat_iterator_end(&bi);
    1997           0 :                         goto hashfnd_failed;
    1998             :                 }
    1999      948516 :                 switch (ATOMbasetype(bi.type)) {
    2000           5 :                 case TYPE_bte:
    2001           5 :                         HASHloop_bte(bi, b->thash, r, v)
    2002             :                                 break;
    2003             :                         break;
    2004           0 :                 case TYPE_sht:
    2005           0 :                         HASHloop_sht(bi, b->thash, r, v)
    2006             :                                 break;
    2007             :                         break;
    2008          32 :                 case TYPE_int:
    2009          32 :                         HASHloop_int(bi, b->thash, r, v)
    2010             :                                 break;
    2011             :                         break;
    2012           0 :                 case TYPE_flt:
    2013           0 :                         HASHloop_flt(bi, b->thash, r, v)
    2014             :                                 break;
    2015             :                         break;
    2016           0 :                 case TYPE_dbl:
    2017           0 :                         HASHloop_dbl(bi, b->thash, r, v)
    2018             :                                 break;
    2019             :                         break;
    2020         179 :                 case TYPE_lng:
    2021         179 :                         HASHloop_lng(bi, b->thash, r, v)
    2022             :                                 break;
    2023             :                         break;
    2024             : #ifdef HAVE_HGE
    2025           0 :                 case TYPE_hge:
    2026           0 :                         HASHloop_hge(bi, b->thash, r, v)
    2027             :                                 break;
    2028             :                         break;
    2029             : #endif
    2030           0 :                 case TYPE_uuid:
    2031           0 :                         HASHloop_uuid(bi, b->thash, r, v)
    2032             :                                 break;
    2033             :                         break;
    2034      474139 :                 case TYPE_str:
    2035      658222 :                         HASHloop_str(bi, b->thash, r, v)
    2036             :                                 break;
    2037             :                         break;
    2038           1 :                 default:
    2039           3 :                         HASHloop(bi, b->thash, r, v)
    2040             :                                 break;
    2041             :                         break;
    2042             :                 }
    2043      474356 :                 MT_rwlock_rdunlock(&b->thashlock);
    2044      474356 :                 bat_iterator_end(&bi);
    2045      474356 :                 return r;
    2046             :         }
    2047           0 :   hashfnd_failed:
    2048             :         /* can't build hash table, search the slow way */
    2049           0 :         GDKclrerr();
    2050           0 :         return slowfnd(b, v);
    2051             : }
    2052             : 
    2053             : /*
    2054             :  * @+ BAT Property Management
    2055             :  *
    2056             :  * The function BATcount returns the number of active elements in a
    2057             :  * BAT.  Counting is type independent.  It can be implemented quickly,
    2058             :  * because the system ensures a dense BUN list.
    2059             :  */
    2060             : void
    2061     2981545 : BATsetcapacity(BAT *b, BUN cnt)
    2062             : {
    2063     2981545 :         b->batCapacity = cnt;
    2064     2981545 :         assert(b->batCount <= cnt);
    2065     2981545 : }
    2066             : 
    2067             : /* Set the batCount value for the bat and also set some dependent
    2068             :  * properties.  This function should be called only when it is save from
    2069             :  * concurrent use (e.g. when theaplock is being held). */
    2070             : void
    2071    79880322 : BATsetcount(BAT *b, BUN cnt)
    2072             : {
    2073             :         /* head column is always VOID, and some head properties never change */
    2074    79880322 :         assert(!is_oid_nil(b->hseqbase));
    2075    79880322 :         assert(cnt <= BUN_MAX);
    2076             : 
    2077    79880322 :         b->batCount = cnt;
    2078    79880322 :         if (b->theap->parentid == b->batCacheid) {
    2079    76845450 :                 b->theap->dirty |= b->ttype != TYPE_void && cnt > 0;
    2080    76845450 :                 b->theap->free = tailsize(b, cnt);
    2081             :         }
    2082    79880322 :         if (b->ttype == TYPE_void)
    2083    12100845 :                 b->batCapacity = cnt;
    2084    79880322 :         if (cnt <= 1) {
    2085    17561094 :                 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
    2086    17561094 :                 b->tnosorted = b->tnorevsorted = 0;
    2087             :         }
    2088             :         /* if the BAT was made smaller, we need to zap some values */
    2089    79880322 :         if (b->tnosorted >= BATcount(b))
    2090    15788501 :                 b->tnosorted = 0;
    2091    79880322 :         if (b->tnorevsorted >= BATcount(b))
    2092    15580920 :                 b->tnorevsorted = 0;
    2093    79880322 :         if (b->tnokey[0] >= BATcount(b) || b->tnokey[1] >= BATcount(b)) {
    2094    15595842 :                 b->tnokey[0] = 0;
    2095    15595842 :                 b->tnokey[1] = 0;
    2096             :         }
    2097    79880322 :         if (b->ttype == TYPE_void) {
    2098    12125857 :                 b->tsorted = true;
    2099    12125857 :                 if (is_oid_nil(b->tseqbase)) {
    2100     6234749 :                         b->tkey = cnt <= 1;
    2101     6234749 :                         b->trevsorted = true;
    2102     6234749 :                         b->tnil = true;
    2103     6234749 :                         b->tnonil = false;
    2104             :                 } else {
    2105     5891108 :                         b->tkey = true;
    2106     5891108 :                         b->trevsorted = cnt <= 1;
    2107     5891108 :                         b->tnil = false;
    2108     5891108 :                         b->tnonil = true;
    2109             :                 }
    2110             :         }
    2111    79880322 :         assert(b->batCapacity >= cnt);
    2112    79880322 : }
    2113             : 
    2114             : /*
    2115             :  * The key and name properties can be changed at any time.  Keyed
    2116             :  * dimensions are automatically supported by an auxiliary hash-based
    2117             :  * access structure to speed up searching. Turning off the key
    2118             :  * integrity property does not cause the index to disappear. It can
    2119             :  * still be used to speed-up retrieval. The routine BATkey sets the
    2120             :  * key property of the association head.
    2121             :  */
    2122             : gdk_return
    2123       53768 : BATkey(BAT *b, bool flag)
    2124             : {
    2125       53768 :         BATcheck(b, GDK_FAIL);
    2126       53768 :         if (b->ttype == TYPE_void) {
    2127        1134 :                 if (BATtdense(b) && !flag) {
    2128           0 :                         GDKerror("dense column must be unique.\n");
    2129           0 :                         return GDK_FAIL;
    2130             :                 }
    2131        1134 :                 if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
    2132           0 :                         GDKerror("void column cannot be unique.\n");
    2133           0 :                         return GDK_FAIL;
    2134             :                 }
    2135             :         }
    2136       53768 :         b->tkey = flag;
    2137       53768 :         if (!flag) {
    2138       38815 :                 b->tseqbase = oid_nil;
    2139             :         } else
    2140       14953 :                 b->tnokey[0] = b->tnokey[1] = 0;
    2141       53768 :         gdk_return rc = GDK_SUCCEED;
    2142       53768 :         if (flag && VIEWtparent(b)) {
    2143             :                 /* if a view is key, then so is the parent if the two
    2144             :                  * are aligned */
    2145           2 :                 BAT *bp = BATdescriptor(VIEWtparent(b));
    2146           2 :                 if (bp != NULL) {
    2147           2 :                         MT_lock_set(&bp->theaplock);
    2148           4 :                         if (BATcount(b) == BATcount(bp) &&
    2149           2 :                             ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
    2150           2 :                             !BATtkey(bp) &&
    2151           0 :                             ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
    2152             :                              BATcount(b) == 0))
    2153           0 :                                 rc = BATkey(bp, true);
    2154           2 :                         MT_lock_unset(&bp->theaplock);
    2155           2 :                         BBPunfix(bp->batCacheid);
    2156             :                 }
    2157             :         }
    2158             :         return rc;
    2159             : }
    2160             : 
    2161             : void
    2162     2913580 : BAThseqbase(BAT *b, oid o)
    2163             : {
    2164     2913580 :         if (b != NULL) {
    2165     2913580 :                 assert(o <= GDK_oid_max);    /* i.e., not oid_nil */
    2166     2913580 :                 assert(o + BATcount(b) <= GDK_oid_max);
    2167     2913580 :                 b->hseqbase = o;
    2168             :         }
    2169     2913580 : }
    2170             : 
    2171             : void
    2172     7220405 : BATtseqbase(BAT *b, oid o)
    2173             : {
    2174     7220405 :         assert(o <= oid_nil);
    2175     7220405 :         if (b == NULL)
    2176             :                 return;
    2177     7220405 :         assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
    2178     7220405 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    2179     6117856 :                 b->tseqbase = o;
    2180             : 
    2181             :                 /* adapt keyness */
    2182     6117856 :                 if (BATtvoid(b)) {
    2183     6091109 :                         b->tsorted = true;
    2184     6091109 :                         if (is_oid_nil(o)) {
    2185         112 :                                 b->tkey = b->batCount <= 1;
    2186         112 :                                 b->tnonil = b->batCount == 0;
    2187         112 :                                 b->tnil = b->batCount > 0;
    2188         112 :                                 b->trevsorted = true;
    2189         112 :                                 b->tnosorted = b->tnorevsorted = 0;
    2190         112 :                                 if (!b->tkey) {
    2191           0 :                                         b->tnokey[0] = 0;
    2192           0 :                                         b->tnokey[1] = 1;
    2193             :                                 } else {
    2194         112 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    2195             :                                 }
    2196             :                         } else {
    2197     6090997 :                                 if (!b->tkey) {
    2198       19370 :                                         b->tkey = true;
    2199       19370 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    2200             :                                 }
    2201     6090997 :                                 b->tnonil = true;
    2202     6090997 :                                 b->tnil = false;
    2203     6090997 :                                 b->trevsorted = b->batCount <= 1;
    2204     6090997 :                                 if (!b->trevsorted)
    2205       19676 :                                         b->tnorevsorted = 1;
    2206             :                         }
    2207             :                 }
    2208             :         } else {
    2209     1102549 :                 assert(o == oid_nil);
    2210     1102549 :                 b->tseqbase = oid_nil;
    2211             :         }
    2212             : }
    2213             : 
    2214             : /*
    2215             :  * @- Change the BAT access permissions (read, append, write)
    2216             :  * Regrettably, BAT access-permissions, persistent status and memory
    2217             :  * map modes, interact in ways that makes one's brain sizzle. This
    2218             :  * makes BATsetaccess and TMcommit (where a change in BAT persistence
    2219             :  * mode is made permanent) points in which the memory map status of
    2220             :  * bats needs to be carefully re-assessed and ensured.
    2221             :  *
    2222             :  * Another complication is the fact that during commit, concurrent
    2223             :  * users may access the heaps, such that the simple solution
    2224             :  * unmap;re-map is out of the question.
    2225             :  * Even worse, it is not possible to even rename an open mmap file in
    2226             :  * Windows. For this purpose, we dropped the old .priv scheme, which
    2227             :  * relied on file moves. Now, the file that is opened with mmap is
    2228             :  * always the X file, in case of newstorage=STORE_PRIV, we save in a
    2229             :  * new file X.new
    2230             :  *
    2231             :  * we must consider the following dimensions:
    2232             :  *
    2233             :  * persistence:
    2234             :  *     not simply the current persistence mode but whether the bat *was*
    2235             :  *     present at the last commit point (BBP status & BBPEXISTING).
    2236             :  *     The crucial issue is namely whether we must guarantee recovery
    2237             :  *     to a previous sane state.
    2238             :  *
    2239             :  * access:
    2240             :  *     whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
    2241             :  *     is usually the same as BAT_READ (as our concern are only data pages
    2242             :  *     that already existed at the last commit).
    2243             :  *
    2244             :  * storage:
    2245             :  *     the current way the heap file X is memory-mapped;
    2246             :  *     STORE_MMAP uses direct mapping (so dirty pages may be flushed
    2247             :  *     at any time to disk), STORE_PRIV uses copy-on-write.
    2248             :  *
    2249             :  * newstorage:
    2250             :  *     the current save-regime. STORE_MMAP calls msync() on the heap X,
    2251             :  *     whereas STORE_PRIV writes the *entire* heap in a file: X.new
    2252             :  *     If a BAT is loaded from disk, the field newstorage is used
    2253             :  *     to set storage as well (so before change-access and commit-
    2254             :  *     persistence mayhem, we always have newstorage=storage).
    2255             :  *
    2256             :  * change-access:
    2257             :  *     what happens if the bat-access mode is changed from
    2258             :  *     BAT_READ into BAT_WRITE (or vice versa).
    2259             :  *
    2260             :  * commit-persistence:
    2261             :  *     what happens during commit if the bat-persistence mode was
    2262             :  *     changed (from TRANSIENT into PERSISTENT, or vice versa).
    2263             :  *
    2264             :  * this is the scheme:
    2265             :  *
    2266             :  *  persistence access    newstorage storage    change-access commit-persistence
    2267             :  *  =========== ========= ========== ========== ============= ==================
    2268             :  * 0 transient  BAT_READ  STORE_MMAP STORE_MMAP =>2           =>4
    2269             :  * 1 transient  BAT_READ  STORE_PRIV STORE_PRIV =>3           =>5
    2270             :  * 2 transient  BAT_WRITE STORE_MMAP STORE_MMAP =>0           =>6+
    2271             :  * 3 transient  BAT_WRITE STORE_PRIV STORE_PRIV =>1           =>7
    2272             :  * 4 persistent BAT_READ  STORE_MMAP STORE_MMAP =>6+          =>0
    2273             :  * 5 persistent BAT_READ  STORE_PRIV STORE_PRIV =>7           =>1
    2274             :  * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
    2275             :  * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5           =>3
    2276             :  *
    2277             :  * exception states:
    2278             :  * a transient  BAT_READ  STORE_PRIV STORE_MMAP =>b           =>c
    2279             :  * b transient  BAT_WRITE STORE_PRIV STORE_MMAP =>a           =>6
    2280             :  * c persistent BAT_READ  STORE_PRIV STORE_MMAP =>6           =>a
    2281             :  *
    2282             :  * (+) indicates that we must ensure that the heap gets saved in its new mode
    2283             :  *
    2284             :  * Note that we now allow a heap with save-regime STORE_PRIV that was
    2285             :  * actually mapped STORE_MMAP. In effect, the potential corruption of
    2286             :  * the X file is compensated by writing out full X.new files that take
    2287             :  * precedence.  When transitioning out of this state towards one with
    2288             :  * both storage regime and OS as STORE_MMAP we need to move the X.new
    2289             :  * files into the backup directory. Then msync the X file and (on
    2290             :  * success) remove the X.new; see backup_new().
    2291             :  *
    2292             :  * Exception states are only reachable if the commit fails and those
    2293             :  * new persistent bats have already been processed (but never become
    2294             :  * part of a committed state). In that case a transition 2=>6 may end
    2295             :  * up 2=>b.  Exception states a and c are reachable from b.
    2296             :  *
    2297             :  * Errors in HEAPchangeaccess() can be handled atomically inside the
    2298             :  * routine.  The work on changing mmap modes HEAPcommitpersistence()
    2299             :  * is done during the BBPsync() for all bats that are newly persistent
    2300             :  * (BBPNEW). After the TMcommit(), it is done for those bats that are
    2301             :  * no longer persistent after the commit (BBPDELETED), only if it
    2302             :  * succeeds.  Such transient bats cannot be processed before the
    2303             :  * commit, because the commit may fail and then the more unsafe
    2304             :  * transient mmap modes would be present on a persistent bat.
    2305             :  *
    2306             :  * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
    2307             :  * gdk_tm.c.
    2308             :  *
    2309             :  * Including the exception states, we have 11 of the 16
    2310             :  * combinations. As for the 5 avoided states, all four
    2311             :  * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
    2312             :  * omitted (this would amount to an msync() save regime on a
    2313             :  * copy-on-write heap -- which does not work). The remaining avoided
    2314             :  * state is the patently unsafe
    2315             :  * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
    2316             :  *
    2317             :  * Note that after a server restart exception states are gone, as on
    2318             :  * BAT loads the saved descriptor is inspected again (which will
    2319             :  * reproduce the state at the last succeeded commit).
    2320             :  *
    2321             :  * To avoid exception states, a TMsubcommit protocol would need to be
    2322             :  * used which is too heavy for BATsetaccess().
    2323             :  *
    2324             :  * Note that this code is not about making heaps mmap-ed in the first
    2325             :  * place.  It is just about determining which flavor of mmap should be
    2326             :  * used. The MAL user is oblivious of such details.
    2327             :  */
    2328             : 
    2329             : /* rather than deleting X.new, we comply with the commit protocol and
    2330             :  * move it to backup storage */
    2331             : static gdk_return
    2332           0 : backup_new(Heap *hp, bool lock)
    2333             : {
    2334           0 :         int batret, bakret, ret = -1;
    2335           0 :         char *batpath, *bakpath;
    2336           0 :         struct stat st;
    2337             : 
    2338           0 :         char *bak_filename = NULL;
    2339           0 :         if ((bak_filename = strrchr(hp->filename, DIR_SEP)) != NULL)
    2340           0 :                 bak_filename++;
    2341             :         else
    2342             :                 bak_filename = hp->filename;
    2343             :         /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
    2344           0 :         batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, "new");
    2345           0 :         bakpath = GDKfilepath(hp->farmid, BAKDIR, bak_filename, "new");
    2346           0 :         if (batpath != NULL && bakpath != NULL) {
    2347             :                 /* file actions here interact with the global commits */
    2348           0 :                 if (lock)
    2349           0 :                         BBPtmlock();
    2350             : 
    2351           0 :                 batret = MT_stat(batpath, &st);
    2352           0 :                 bakret = MT_stat(bakpath, &st);
    2353             : 
    2354           0 :                 if (batret == 0 && bakret) {
    2355             :                         /* no backup yet, so move the existing X.new there out
    2356             :                          * of the way */
    2357           0 :                         if ((ret = MT_rename(batpath, bakpath)) < 0)
    2358           0 :                                 GDKsyserror("backup_new: rename %s to %s failed\n",
    2359             :                                             batpath, bakpath);
    2360           0 :                         TRC_DEBUG(IO_, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
    2361           0 :                 } else if (batret == 0) {
    2362             :                         /* there is a backup already; just remove the X.new */
    2363           0 :                         if ((ret = MT_remove(batpath)) != 0)
    2364           0 :                                 GDKsyserror("backup_new: remove %s failed\n", batpath);
    2365           0 :                         TRC_DEBUG(IO_, "remove(%s) = %d\n", batpath, ret);
    2366             :                 } else {
    2367             :                         ret = 0;
    2368             :                 }
    2369           0 :                 if (lock)
    2370           0 :                         BBPtmunlock();
    2371             :         }
    2372           0 :         GDKfree(batpath);
    2373           0 :         GDKfree(bakpath);
    2374           0 :         return ret ? GDK_FAIL : GDK_SUCCEED;
    2375             : }
    2376             : 
    2377             : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
    2378             : 
    2379             : /* transition heap from readonly to writable */
    2380             : static storage_t
    2381     7215232 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
    2382             : {
    2383     7215232 :         if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
    2384     7215232 :                 return hp->newstorage;       /* 0<=>2,1<=>3,a<=>b */
    2385             : 
    2386           0 :         if (dstmode == BAT_WRITE) {
    2387           0 :                 if (hp->storage != STORE_PRIV)
    2388           0 :                         hp->dirty = true;    /* exception c does not make it dirty */
    2389           0 :                 return STORE_PRIV;      /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
    2390             :         }
    2391           0 :         if (hp->storage == STORE_MMAP) {     /* 6=>4 */
    2392           0 :                 hp->dirty = true;
    2393           0 :                 return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;        /* only called for existing bats */
    2394             :         }
    2395             :         return hp->storage;  /* 7=>5 */
    2396             : }
    2397             : 
    2398             : /* heap changes persistence mode (at commit point) */
    2399             : static storage_t
    2400      154573 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
    2401             : {
    2402      154573 :         if (existing) {         /* existing, ie will become transient */
    2403       37265 :                 if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) {      /* 6=>2 */
    2404           0 :                         hp->dirty = true;
    2405           0 :                         return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;       /* only called for existing bats */
    2406             :                 }
    2407       37265 :                 return hp->newstorage;       /* 4=>0,5=>1,7=>3,c=>a no change */
    2408             :         }
    2409             :         /* !existing, ie will become persistent */
    2410      117308 :         if (hp->newstorage == STORE_MEM)
    2411             :                 return hp->newstorage;
    2412         772 :         if (hp->newstorage == STORE_MMAP && !writable)
    2413             :                 return STORE_MMAP;      /* 0=>4 STORE_MMAP */
    2414             : 
    2415           0 :         if (hp->newstorage == STORE_MMAP)
    2416           0 :                 hp->dirty = true;    /* 2=>6 */
    2417             :         return STORE_PRIV;      /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
    2418             : }
    2419             : 
    2420             : 
    2421             : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
    2422             : 
    2423             : /* change the heap modes at a commit */
    2424             : gdk_return
    2425      131338 : BATcheckmodes(BAT *b, bool existing)
    2426             : {
    2427      131338 :         storage_t m1 = STORE_MEM, m3 = STORE_MEM;
    2428      131338 :         bool dirty = false, wr;
    2429             : 
    2430      131338 :         BATcheck(b, GDK_FAIL);
    2431             : 
    2432      131338 :         wr = (b->batRestricted == BAT_WRITE);
    2433      131338 :         if (b->ttype) {
    2434      131338 :                 m1 = HEAPcommitpersistence(b->theap, wr, existing);
    2435      131338 :                 dirty |= (b->theap->newstorage != m1);
    2436             :         }
    2437             : 
    2438      131338 :         if (b->tvheap) {
    2439       23235 :                 bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
    2440       23235 :                 m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
    2441       23235 :                 dirty |= (b->tvheap->newstorage != m3);
    2442             :         }
    2443      131338 :         if (m1 == STORE_INVALID || m3 == STORE_INVALID)
    2444             :                 return GDK_FAIL;
    2445             : 
    2446      131338 :         if (dirty) {
    2447           0 :                 b->theap->newstorage = m1;
    2448           0 :                 if (b->tvheap)
    2449           0 :                         b->tvheap->newstorage = m3;
    2450             :         }
    2451             :         return GDK_SUCCEED;
    2452             : }
    2453             : 
    2454             : BAT *
    2455     9019386 : BATsetaccess(BAT *b, restrict_t newmode)
    2456             : {
    2457     9019386 :         restrict_t bakmode;
    2458             : 
    2459     9019386 :         BATcheck(b, NULL);
    2460     9019386 :         if (newmode != BAT_READ &&
    2461       19015 :             (isVIEW(b) || (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
    2462           0 :                 BAT *bn = COLcopy(b, b->ttype, true, b->batRole);
    2463           0 :                 BBPunfix(b->batCacheid);
    2464           0 :                 if (bn == NULL)
    2465             :                         return NULL;
    2466             :                 b = bn;
    2467             :         }
    2468     9019386 :         MT_lock_set(&b->theaplock);
    2469     9052593 :         bakmode = b->batRestricted;
    2470     9052593 :         if (bakmode != newmode) {
    2471     6335810 :                 bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
    2472     6335810 :                 bool wr = (newmode == BAT_WRITE);
    2473     6335810 :                 bool rd = (bakmode == BAT_WRITE);
    2474     6335810 :                 storage_t m1 = STORE_MEM, m3 = STORE_MEM;
    2475     6335810 :                 storage_t b1 = STORE_MEM, b3 = STORE_MEM;
    2476             : 
    2477     6335810 :                 if (b->theap->parentid == b->batCacheid) {
    2478     6320817 :                         b1 = b->theap->newstorage;
    2479     6331060 :                         m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
    2480             :                 }
    2481     6320778 :                 if (b->tvheap && b->tvheap->parentid == b->batCacheid) {
    2482      916973 :                         bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
    2483      916973 :                         b3 = b->tvheap->newstorage;
    2484     1835834 :                         m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
    2485             :                 }
    2486     6325049 :                 if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
    2487     2403825 :                         MT_lock_unset(&b->theaplock);
    2488     2407872 :                         BBPunfix(b->batCacheid);
    2489     2407872 :                         return NULL;
    2490             :                 }
    2491             : 
    2492             :                 /* set new access mode and mmap modes */
    2493     3921224 :                 b->batRestricted = newmode;
    2494     3921224 :                 if (b->theap->parentid == b->batCacheid)
    2495     3918568 :                         b->theap->newstorage = m1;
    2496     3921224 :                 if (b->tvheap && b->tvheap->parentid == b->batCacheid)
    2497      903388 :                         b->tvheap->newstorage = m3;
    2498             : 
    2499     3921224 :                 MT_lock_unset(&b->theaplock);
    2500     3931705 :                 if (existing && !isVIEW(b) && BBPsave(b) != GDK_SUCCEED) {
    2501             :                         /* roll back all changes */
    2502           0 :                         MT_lock_set(&b->theaplock);
    2503           0 :                         b->batRestricted = bakmode;
    2504           0 :                         b->theap->newstorage = b1;
    2505           0 :                         if (b->tvheap)
    2506           0 :                                 b->tvheap->newstorage = b3;
    2507           0 :                         MT_lock_unset(&b->theaplock);
    2508           0 :                         BBPunfix(b->batCacheid);
    2509           0 :                         return NULL;
    2510             :                 }
    2511             :         } else {
    2512     2716783 :                 MT_lock_unset(&b->theaplock);
    2513             :         }
    2514             :         return b;
    2515             : }
    2516             : 
    2517             : restrict_t
    2518           0 : BATgetaccess(BAT *b)
    2519             : {
    2520           0 :         BATcheck(b, BAT_WRITE);
    2521           0 :         MT_lock_set(&b->theaplock);
    2522           0 :         restrict_t restricted = b->batRestricted;
    2523           0 :         MT_lock_unset(&b->theaplock);
    2524           0 :         return restricted;
    2525             : }
    2526             : 
    2527             : /*
    2528             :  * @- change BAT persistency (persistent,session,transient)
    2529             :  * In the past, we prevented BATS with certain types from being saved at all:
    2530             :  * - BATs of BATs, as having recursive bats creates cascading
    2531             :  *   complexities in commits/aborts.
    2532             :  * - any atom with refcounts, as the BBP has no overview of such
    2533             :  *   user-defined refcounts.
    2534             :  * - pointer types, as the values they point to are bound to be transient.
    2535             :  *
    2536             :  * However, nowadays we do allow such saves, as the BBP swapping
    2537             :  * mechanism was altered to be able to save transient bats temporarily
    2538             :  * to disk in order to make room.  Thus, we must be able to save any
    2539             :  * transient BAT to disk.
    2540             :  *
    2541             :  * What we don't allow is to make such bats persistent.
    2542             :  *
    2543             :  * Although the persistent state does influence the allowed mmap
    2544             :  * modes, this only goes for the *real* committed persistent
    2545             :  * state. Making the bat persistent with BATmode does not matter for
    2546             :  * the heap modes until the commit point is reached. So we do not need
    2547             :  * to do anything with heap modes yet at this point.
    2548             :  */
    2549             : gdk_return
    2550      428049 : BATmode(BAT *b, bool transient)
    2551             : {
    2552      428049 :         BATcheck(b, GDK_FAIL);
    2553             : 
    2554             :         /* can only make a bat PERSISTENT if its role is already
    2555             :          * PERSISTENT */
    2556      428049 :         assert(transient || b->batRole == PERSISTENT);
    2557             :         /* cannot make a view PERSISTENT */
    2558      244526 :         assert(transient || !isVIEW(b));
    2559             : 
    2560      428049 :         if (b->batRole == TRANSIENT && !transient) {
    2561           0 :                 GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
    2562           0 :                 return GDK_FAIL;
    2563             :         }
    2564             : 
    2565      428049 :         BATiter bi = bat_iterator(b);
    2566      428049 :         bool mustrelease = false;
    2567      428049 :         bool mustretain = false;
    2568      428049 :         bat bid = b->batCacheid;
    2569             : 
    2570      428049 :         if (transient != bi.transient) {
    2571      428049 :                 if (!transient) {
    2572      244526 :                         if (ATOMisdescendant(b->ttype, TYPE_ptr)) {
    2573           0 :                                 GDKerror("%s type implies that %s[%s] "
    2574             :                                          "cannot be made persistent.\n",
    2575             :                                          ATOMname(b->ttype), BATgetId(b),
    2576             :                                          ATOMname(b->ttype));
    2577           0 :                                 bat_iterator_end(&bi);
    2578           0 :                                 return GDK_FAIL;
    2579             :                         }
    2580             :                 }
    2581             : 
    2582             :                 /* we need to delay the calls to BBPretain and
    2583             :                  * BBPrelease until after we have released our reference
    2584             :                  * to the heaps (i.e. until after bat_iterator_end),
    2585             :                  * because in either case, BBPfree can be called (either
    2586             :                  * directly here or in BBPtrim) which waits for the heap
    2587             :                  * reference to come down.  BBPretain calls incref which
    2588             :                  * waits until the trim that is waiting for us is done,
    2589             :                  * so that causes deadlock, and BBPrelease can call
    2590             :                  * BBPfree which causes deadlock with a single thread */
    2591      183523 :                 if (!transient) {
    2592             :                         /* persistent BATs get a logical reference */
    2593             :                         mustretain = true;
    2594      183523 :                 } else if (!bi.transient) {
    2595             :                         /* transient BATs loose their logical reference */
    2596      183523 :                         mustrelease = true;
    2597             :                 }
    2598      428049 :                 MT_lock_set(&GDKswapLock(bid));
    2599      428049 :                 if (!transient) {
    2600      244526 :                         if (BBP_status(bid) & BBPDELETED) {
    2601           0 :                                 BBP_status_on(bid, BBPEXISTING);
    2602           0 :                                 BBP_status_off(bid, BBPDELETED);
    2603             :                         } else
    2604      244526 :                                 BBP_status_on(bid, BBPNEW);
    2605      183523 :                 } else if (!bi.transient) {
    2606      183523 :                         if (!(BBP_status(bid) & BBPNEW))
    2607       37175 :                                 BBP_status_on(bid, BBPDELETED);
    2608      183523 :                         BBP_status_off(bid, BBPPERSISTENT);
    2609             :                 }
    2610             :                 /* session bats or persistent bats that did not
    2611             :                  * witness a commit yet may have been saved */
    2612      428049 :                 MT_lock_set(&b->theaplock);
    2613      428049 :                 if (b->batCopiedtodisk) {
    2614       27643 :                         if (!transient) {
    2615         546 :                                 BBP_status_off(bid, BBPTMP);
    2616             :                         } else {
    2617             :                                 /* TMcommit must remove it to
    2618             :                                  * guarantee free space */
    2619       27097 :                                 BBP_status_on(bid, BBPTMP);
    2620             :                         }
    2621             :                 }
    2622      428049 :                 b->batTransient = transient;
    2623      428049 :                 MT_lock_unset(&b->theaplock);
    2624      428049 :                 MT_lock_unset(&GDKswapLock(bid));
    2625             :         }
    2626      428049 :         bat_iterator_end(&bi);
    2627             :         /* retain/release after bat_iterator_end because of refs to heaps */
    2628      428049 :         if (mustretain)
    2629      244526 :                 BBPretain(bid);
    2630      183523 :         else if (mustrelease)
    2631      183523 :                 BBPrelease(bid);
    2632             :         return GDK_SUCCEED;
    2633             : }
    2634             : 
    2635             : /* BATassertProps checks whether properties are set correctly.  Under
    2636             :  * no circumstances will it change any properties.  Note that the
    2637             :  * "nil" property is not actually used anywhere, but it is checked. */
    2638             : 
    2639             : #ifdef NDEBUG
    2640             : /* assertions are disabled, turn failing tests into a message */
    2641             : #undef assert
    2642             : #define assert(test)    ((void) ((test) || (TRC_CRITICAL_ENDIF(CHECK_, "Assertion `%s' failed\n", #test), 0)))
    2643             : #endif
    2644             : 
    2645             : static void
    2646   173116809 : assert_ascii(const char *s)
    2647             : {
    2648   346233618 :         if (!strNil(s)) {
    2649  3457321198 :                 while (*s) {
    2650  3330993364 :                         assert((*s & 0x80) == 0);
    2651  3330993364 :                         s++;
    2652             :                 }
    2653             :         }
    2654   173116809 : }
    2655             : 
    2656             : /* Assert that properties are set correctly.
    2657             :  *
    2658             :  * A BAT can have a bunch of properties set.  Mostly, the property
    2659             :  * bits are set if we *know* the property holds, and not set if we
    2660             :  * don't know whether the property holds (or if we know it doesn't
    2661             :  * hold).  All properties are per column.
    2662             :  *
    2663             :  * The properties currently maintained are:
    2664             :  *
    2665             :  * seqbase      Only valid for TYPE_oid and TYPE_void columns: each
    2666             :  *              value in the column is exactly one more than the
    2667             :  *              previous value, starting at position 0 with the value
    2668             :  *              stored in this property.
    2669             :  *              This implies sorted, key, nonil (which therefore need
    2670             :  *              to be set).
    2671             :  * nil          There is at least one NIL value in the column.
    2672             :  * nonil        There are no NIL values in the column.
    2673             :  * key          All values in the column are distinct.
    2674             :  * sorted       The column is sorted (ascending).  If also revsorted,
    2675             :  *              then all values are equal.
    2676             :  * revsorted    The column is reversely sorted (descending).  If
    2677             :  *              also sorted, then all values are equal.
    2678             :  * nosorted     BUN position which proofs not sorted (given position
    2679             :  *              and one before are not ordered correctly).
    2680             :  * norevsorted  BUN position which proofs not revsorted (given position
    2681             :  *              and one before are not ordered correctly).
    2682             :  * nokey        Pair of BUN positions that proof not all values are
    2683             :  *              distinct (i.e. values at given locations are equal).
    2684             :  * ascii        Only valid for TYPE_str columns: all strings in the column
    2685             :  *              are ASCII, i.e. the UTF-8 encoding for all characters is a
    2686             :  *              single byte.
    2687             :  *
    2688             :  * Note that the functions BATtseqbase and BATkey also set more
    2689             :  * properties than you might suspect.  When setting properties on a
    2690             :  * newly created and filled BAT, you may want to first make sure the
    2691             :  * batCount is set correctly (e.g. by calling BATsetcount), then use
    2692             :  * BATtseqbase and BATkey, and finally set the other properties.
    2693             :  *
    2694             :  * For a view, we cannot check all properties, since it is possible with
    2695             :  * the way the SQL layer works, that a parent BAT gets changed, changing
    2696             :  * the properties, while there is a view.  The view is supposed to look
    2697             :  * at only the non-changing part of the BAT (through candidate lists),
    2698             :  * but this means that the properties of the view might not be correct.
    2699             :  * For this reason, for views, we skip all property checking that looks
    2700             :  * at the BAT content.
    2701             :  */
    2702             : 
    2703             : void
    2704    20182068 : BATassertProps(BAT *b)
    2705             : {
    2706    20182068 :         unsigned bbpstatus;
    2707    20182068 :         BUN p, q;
    2708    20182068 :         int (*cmpf)(const void *, const void *);
    2709    20182068 :         int cmp;
    2710    20182068 :         const void *prev = NULL, *valp, *nilp;
    2711    20182068 :         char filename[sizeof(b->theap->filename)];
    2712    20182068 :         bool isview1, isview2;
    2713             : 
    2714             :         /* do the complete check within a lock */
    2715    20182068 :         MT_lock_set(&b->theaplock);
    2716             : 
    2717             :         /* general BAT sanity */
    2718    20216624 :         assert(b != NULL);
    2719    20216624 :         assert(b->batCacheid > 0);
    2720    20216624 :         assert(b->batCacheid < getBBPsize());
    2721    20129503 :         assert(b == BBP_desc(b->batCacheid));
    2722    20129503 :         assert(b->batCount >= b->batInserted);
    2723             : 
    2724             :         /* headless */
    2725    20129503 :         assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
    2726    20129503 :         assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
    2727             : 
    2728    20129503 :         isview1 = b->theap->parentid != b->batCacheid;
    2729    20129503 :         isview2 = b->tvheap && b->tvheap->parentid != b->batCacheid;
    2730             : 
    2731    20129503 :         bbpstatus = BBP_status(b->batCacheid);
    2732             :         /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
    2733    20129503 :         assert(((bbpstatus & BBPDELETED) != 0) +
    2734             :                ((bbpstatus & BBPEXISTING) != 0) +
    2735             :                ((bbpstatus & BBPNEW) != 0) <= 1);
    2736             : 
    2737    20129503 :         assert(b->ttype >= TYPE_void);
    2738    20129503 :         assert(b->ttype < GDKatomcnt);
    2739    20129503 :         assert(isview1 ||
    2740             :                b->ttype == TYPE_void ||
    2741             :                BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
    2742    20129503 :         assert(isview2 ||
    2743             :                b->tvheap == NULL ||
    2744             :                (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
    2745             : 
    2746    20129503 :         cmpf = ATOMcompare(b->ttype);
    2747    20129503 :         nilp = ATOMnilptr(b->ttype);
    2748             : 
    2749    20129503 :         assert(isview1 || b->theap->free >= tailsize(b, BATcount(b)));
    2750    20129503 :         if (b->ttype != TYPE_void) {
    2751    15133197 :                 assert(b->batCount <= b->batCapacity);
    2752    15133197 :                 assert(isview1 || b->theap->size >= b->theap->free);
    2753    15133197 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    2754             :                         /* 32 values per 4-byte word (that's not the
    2755             :                          * same as 8 values per byte...) */
    2756        3913 :                         assert(isview1 || b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
    2757             :                 } else
    2758    15129284 :                         assert(isview1 || b->theap->size >> b->tshift >= b->batCapacity);
    2759             :         }
    2760    20129503 :         if (!isview1) {
    2761    15333570 :                 strconcat_len(filename, sizeof(filename),
    2762    15333570 :                               BBP_physical(b->theap->parentid),
    2763     2689238 :                               b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
    2764             : #if SIZEOF_VAR_T == 8
    2765             :                               b->twidth == 4 ? ".tail4" :
    2766             : #endif
    2767             :                               ".tail" : ".tail",
    2768             :                               NULL);
    2769    15396206 :                 assert(strcmp(b->theap->filename, filename) == 0);
    2770             :         }
    2771    20192139 :         if (!isview2 && b->tvheap) {
    2772     2532314 :                 strconcat_len(filename, sizeof(filename),
    2773     2532314 :                               BBP_physical(b->tvheap->parentid),
    2774             :                               ".theap",
    2775             :                               NULL);
    2776     2532067 :                 assert(strcmp(b->tvheap->filename, filename) == 0);
    2777             :         }
    2778             : 
    2779             :         /* void, str and blob imply varsized */
    2780    20191892 :         if (ATOMstorage(b->ttype) == TYPE_str ||
    2781             :             ATOMstorage(b->ttype) == TYPE_blob)
    2782     3481962 :                 assert(b->tvheap != NULL);
    2783             :         /* other "known" types are not varsized */
    2784    20191892 :         if (ATOMstorage(b->ttype) > TYPE_void &&
    2785             :             ATOMstorage(b->ttype) < TYPE_str)
    2786    11680325 :                 assert(b->tvheap == NULL);
    2787             :         /* shift and width have a particular relationship */
    2788    20191892 :         if (ATOMstorage(b->ttype) == TYPE_str)
    2789     3463265 :                 assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
    2790             :         else
    2791    16728627 :                 assert(b->twidth == ATOMsize(b->ttype));
    2792    20191892 :         assert(b->tseqbase <= oid_nil);
    2793             :         /* only oid/void columns can be dense */
    2794    20191892 :         assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
    2795             :         /* a column cannot both have and not have NILs */
    2796    20191892 :         assert(!b->tnil || !b->tnonil);
    2797             :         /* only string columns can be ASCII */
    2798    20191892 :         assert(!b->tascii || ATOMstorage(b->ttype) == TYPE_str);
    2799    20191892 :         if (b->ttype == TYPE_void) {
    2800     5132557 :                 assert(b->tshift == 0);
    2801     5132557 :                 assert(b->twidth == 0);
    2802     5132557 :                 assert(b->tsorted);
    2803     5132557 :                 if (is_oid_nil(b->tseqbase)) {
    2804         266 :                         assert(b->tvheap == NULL);
    2805         266 :                         assert(BATcount(b) == 0 || !b->tnonil);
    2806         266 :                         assert(BATcount(b) <= 1 || !b->tkey);
    2807         266 :                         assert(b->trevsorted);
    2808             :                 } else {
    2809     5132291 :                         if (b->tvheap != NULL) {
    2810             :                                 /* candidate list with exceptions */
    2811       37383 :                                 assert(b->batRole == TRANSIENT || b->batRole == SYSTRANS);
    2812       37383 :                                 assert(b->tvheap->free <= b->tvheap->size);
    2813       37383 :                                 assert(b->tvheap->free >= sizeof(ccand_t));
    2814       37383 :                                 assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
    2815       37383 :                                 if (negoid_cand(b) && ccand_free(b) > 0) {
    2816         208 :                                         const oid *oids = (const oid *) ccand_first(b);
    2817         208 :                                         q = ccand_free(b) / SIZEOF_OID;
    2818         208 :                                         assert(oids != NULL);
    2819         208 :                                         assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
    2820             :                                         /* exceptions within range */
    2821         208 :                                         assert(oids[0] >= b->tseqbase);
    2822         208 :                                         assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
    2823             :                                         /* exceptions sorted */
    2824      188740 :                                         for (p = 1; p < q; p++)
    2825      188532 :                                                 assert(oids[p - 1] < oids[p]);
    2826             :                                 }
    2827             :                         }
    2828     5132291 :                         assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2829     5132291 :                         assert(BATcount(b) == 0 || !b->tnil);
    2830     5132291 :                         assert(BATcount(b) <= 1 || !b->trevsorted);
    2831     5132291 :                         assert(b->tkey);
    2832     5132291 :                         assert(b->tnonil);
    2833             :                 }
    2834     5132557 :                 MT_lock_unset(&b->theaplock);
    2835    12925005 :                 return;
    2836             :         }
    2837             : 
    2838    15059335 :         BATiter bi  = bat_iterator_nolock(b);
    2839             : 
    2840    15059335 :         if (BATtdense(b)) {
    2841      249035 :                 assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2842      249035 :                 assert(b->ttype == TYPE_oid);
    2843      249035 :                 assert(b->tsorted);
    2844      249035 :                 assert(b->tkey);
    2845      249035 :                 assert(b->tnonil);
    2846      249035 :                 if ((q = b->batCount) != 0) {
    2847      181761 :                         const oid *o = (const oid *) Tloc(b, 0);
    2848      181761 :                         assert(*o == b->tseqbase);
    2849    32397121 :                         for (p = 1; p < q; p++)
    2850    32215360 :                                 assert(o[p - 1] + 1 == o[p]);
    2851             :                 }
    2852      249035 :                 MT_lock_unset(&b->theaplock);
    2853      249209 :                 return;
    2854             :         }
    2855    14810300 :         assert(1 << b->tshift == b->twidth);
    2856             :         /* only linear atoms can be sorted */
    2857    14810300 :         assert(!b->tsorted || ATOMlinear(b->ttype));
    2858    14810300 :         assert(!b->trevsorted || ATOMlinear(b->ttype));
    2859    14810300 :         if (ATOMlinear(b->ttype)) {
    2860    14784364 :                 assert(b->tnosorted == 0 ||
    2861             :                        (b->tnosorted > 0 &&
    2862             :                         b->tnosorted < b->batCount));
    2863    14784364 :                 assert(!b->tsorted || b->tnosorted == 0);
    2864    14784364 :                 if (!isview1 &&
    2865    14784364 :                     !isview2 &&
    2866     1786746 :                     !b->tsorted &&
    2867      124596 :                     b->tnosorted > 0 &&
    2868      124596 :                     b->tnosorted < b->batCount)
    2869      124868 :                         assert(cmpf(BUNtail(bi, b->tnosorted - 1),
    2870             :                                     BUNtail(bi, b->tnosorted)) > 0);
    2871    14783578 :                 assert(b->tnorevsorted == 0 ||
    2872             :                        (b->tnorevsorted > 0 &&
    2873             :                         b->tnorevsorted < b->batCount));
    2874    14783578 :                 assert(!b->trevsorted || b->tnorevsorted == 0);
    2875    14783578 :                 if (!isview1 &&
    2876     9720772 :                     !isview2 &&
    2877     2434231 :                     !b->trevsorted &&
    2878      560472 :                     b->tnorevsorted > 0 &&
    2879      560472 :                     b->tnorevsorted < b->batCount)
    2880      559839 :                         assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
    2881             :                                     BUNtail(bi, b->tnorevsorted)) < 0);
    2882             :         }
    2883             :         /* if tkey property set, both tnokey values must be 0 */
    2884    14808030 :         assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
    2885    14808030 :         if (!isview1 &&
    2886    14808030 :             !isview2 &&
    2887     2218439 :             !b->tkey &&
    2888     2218439 :             (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
    2889             :                 /* if tkey not set and tnokey indicates a proof of
    2890             :                  * non-key-ness, make sure the tnokey values are in
    2891             :                  * range and indeed provide a proof */
    2892      555080 :                 assert(b->tnokey[0] != b->tnokey[1]);
    2893      555080 :                 assert(b->tnokey[0] < b->batCount);
    2894      555080 :                 assert(b->tnokey[1] < b->batCount);
    2895      555080 :                 assert(cmpf(BUNtail(bi, b->tnokey[0]),
    2896             :                             BUNtail(bi, b->tnokey[1])) == 0);
    2897             :         }
    2898             :         /* var heaps must have sane sizes */
    2899    14806621 :         assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
    2900             : 
    2901    14806621 :         if (!b->tkey && !b->tsorted && !b->trevsorted &&
    2902     5068487 :             !b->tnonil && !b->tnil) {
    2903             :                 /* nothing more to prove */
    2904     2387041 :                 MT_lock_unset(&b->theaplock);
    2905     2384997 :                 return;
    2906             :         }
    2907             : 
    2908             :         /* only do a scan if the bat is not a view */
    2909    12419580 :         if (!isview1 && !isview2) {
    2910     9277469 :                 const ValRecord *prop;
    2911     9277469 :                 const void *maxval = NULL;
    2912     9277469 :                 const void *minval = NULL;
    2913     9277469 :                 const void *maxbound = NULL;
    2914     9277469 :                 const void *minbound = NULL;
    2915     9277469 :                 const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
    2916     9264692 :                 bool seenmax = false, seenmin = false;
    2917     9264692 :                 bool seennil = false;
    2918             : 
    2919     9264692 :                 if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL)
    2920          12 :                         maxbound = VALptr(prop);
    2921     9308707 :                 if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL)
    2922        2221 :                         minbound = VALptr(prop);
    2923     9313761 :                 if (b->tmaxpos != BUN_NONE) {
    2924     1099423 :                         assert(b->tmaxpos < BATcount(b));
    2925     1099423 :                         maxval = BUNtail(bi, b->tmaxpos);
    2926     1093395 :                         assert(cmpf(maxval, nilp) != 0);
    2927             :                 }
    2928     9306715 :                 if (b->tminpos != BUN_NONE) {
    2929     1040783 :                         assert(b->tminpos < BATcount(b));
    2930     1040783 :                         minval = BUNtail(bi, b->tminpos);
    2931     1043196 :                         assert(cmpf(minval, nilp) != 0);
    2932             :                 }
    2933     9309124 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    2934             :                         /* for now, don't do extra checks for bit mask */
    2935             :                         ;
    2936     9305271 :                 } else if (b->tsorted || b->trevsorted || !b->tkey) {
    2937             :                         /* if sorted (either way), or we don't have to
    2938             :                          * prove uniqueness, we can do a simple
    2939             :                          * scan */
    2940             :                         /* only call compare function if we have to */
    2941     9286894 :                         bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
    2942             : 
    2943  2129065719 :                         BATloop(b, p, q) {
    2944  2119983869 :                                 valp = BUNtail(bi, p);
    2945  2119893693 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2946  2061382019 :                                 assert(!isnil || !notnull);
    2947  2061382019 :                                 assert(!b->tnonil || !isnil);
    2948  2061382019 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2949  2061382019 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2950  2061382019 :                                 if (b->tascii)
    2951   170690554 :                                         assert_ascii(valp);
    2952  2061498804 :                                 if (minbound && !isnil) {
    2953          30 :                                         cmp = cmpf(minbound, valp);
    2954          30 :                                         assert(cmp <= 0);
    2955             :                                 }
    2956  2061498804 :                                 if (maxbound && !isnil) {
    2957          30 :                                         cmp = cmpf(maxbound, valp);
    2958          30 :                                         assert(cmp > 0);
    2959             :                                 }
    2960  2061498804 :                                 if (maxval && !isnil) {
    2961    74814187 :                                         cmp = cmpf(maxval, valp);
    2962    74818315 :                                         assert(cmp >= 0);
    2963    74818315 :                                         seenmax |= cmp == 0;
    2964             :                                 }
    2965  2061502932 :                                 if (minval && !isnil) {
    2966    48417126 :                                         cmp = cmpf(minval, valp);
    2967    48414026 :                                         assert(cmp <= 0);
    2968    48414026 :                                         seenmin |= cmp == 0;
    2969             :                                 }
    2970  2061499832 :                                 if (prev && cmpprv) {
    2971  1028570565 :                                         cmp = cmpf(prev, valp);
    2972  1087018114 :                                         assert(!b->tsorted || cmp <= 0);
    2973  1087018114 :                                         assert(!b->trevsorted || cmp >= 0);
    2974  1087018114 :                                         assert(!b->tkey || cmp != 0);
    2975             :                                 }
    2976  2119947381 :                                 seennil |= isnil;
    2977  2119947381 :                                 if (seennil && !cmpprv &&
    2978     4623354 :                                     maxval == NULL && minval == NULL &&
    2979      168515 :                                     minbound == NULL && maxbound == NULL) {
    2980             :                                         /* we've done all the checking
    2981             :                                          * we can do */
    2982             :                                         break;
    2983             :                                 }
    2984  2119778825 :                                 prev = valp;
    2985             :                         }
    2986             :                 } else {        /* b->tkey && !b->tsorted && !b->trevsorted */
    2987             :                         /* we need to check for uniqueness the hard
    2988             :                          * way (i.e. using a hash table) */
    2989       18377 :                         const char *nme = BBP_physical(b->batCacheid);
    2990       18377 :                         Hash *hs = NULL;
    2991       18377 :                         BUN mask;
    2992             : 
    2993       18377 :                         if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
    2994           0 :                                 TRC_WARNING(BAT_, "Cannot allocate hash table\n");
    2995           0 :                                 goto abort_check;
    2996             :                         }
    2997       18383 :                         if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heaplink.filename) ||
    2998       18376 :                             snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heapbckt.filename)) {
    2999             :                                 /* cannot happen, see comment in gdk.h
    3000             :                                  * about sizes near definition of
    3001             :                                  * BBPINIT */
    3002           0 :                                 GDKfree(hs);
    3003           0 :                                 TRC_CRITICAL(BAT_, "Heap filename is too large\n");
    3004           0 :                                 goto abort_check;
    3005             :                         }
    3006       18380 :                         if (ATOMsize(b->ttype) == 1)
    3007             :                                 mask = (BUN) 1 << 8;
    3008       18374 :                         else if (ATOMsize(b->ttype) == 2)
    3009             :                                 mask = (BUN) 1 << 16;
    3010             :                         else
    3011       18374 :                                 mask = HASHmask(b->batCount);
    3012       18380 :                         hs->heapbckt.parentid = b->batCacheid;
    3013       18380 :                         hs->heaplink.parentid = b->batCacheid;
    3014       18380 :                         if ((hs->heaplink.farmid = BBPselectfarm(
    3015       18379 :                                      TRANSIENT, b->ttype, hashheap)) < 0 ||
    3016       18379 :                             (hs->heapbckt.farmid = BBPselectfarm(
    3017       36752 :                                     TRANSIENT, b->ttype, hashheap)) < 0 ||
    3018       18379 :                             HASHnew(hs, b->ttype, BATcount(b),
    3019             :                                     mask, BUN_NONE, false) != GDK_SUCCEED) {
    3020           0 :                                 GDKfree(hs);
    3021           0 :                                 TRC_WARNING(BAT_, "Cannot allocate hash table\n");
    3022           0 :                                 goto abort_check;
    3023             :                         }
    3024    35146814 :                         BATloop(b, p, q) {
    3025    35128434 :                                 BUN hb;
    3026    35128434 :                                 BUN prb;
    3027    35128434 :                                 valp = BUNtail(bi, p);
    3028    35128434 :                                 bool isnil = cmpf(valp, nilp) == 0;
    3029    35120409 :                                 assert(!isnil || !notnull);
    3030    35120409 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    3031    35120409 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    3032    35120409 :                                 if (b->tascii)
    3033       10156 :                                         assert_ascii(valp);
    3034    35120409 :                                 if (minbound && !isnil) {
    3035           0 :                                         cmp = cmpf(minbound, valp);
    3036           0 :                                         assert(cmp <= 0);
    3037             :                                 }
    3038    35120409 :                                 if (maxbound && !isnil) {
    3039           0 :                                         cmp = cmpf(maxbound, valp);
    3040           0 :                                         assert(cmp > 0);
    3041             :                                 }
    3042    35120409 :                                 if (maxval && !isnil) {
    3043        5699 :                                         cmp = cmpf(maxval, valp);
    3044        5699 :                                         assert(cmp >= 0);
    3045        5699 :                                         seenmax |= cmp == 0;
    3046             :                                 }
    3047    35120409 :                                 if (minval && !isnil) {
    3048        5699 :                                         cmp = cmpf(minval, valp);
    3049        5699 :                                         assert(cmp <= 0);
    3050        5699 :                                         seenmin |= cmp == 0;
    3051             :                                 }
    3052    35120409 :                                 prb = HASHprobe(hs, valp);
    3053    35122878 :                                 for (hb = HASHget(hs, prb);
    3054    35727987 :                                      hb != BUN_NONE;
    3055      605109 :                                      hb = HASHgetlink(hs, hb))
    3056      607235 :                                         if (cmpf(valp, BUNtail(bi, hb)) == 0)
    3057           0 :                                                 assert(!b->tkey);
    3058    35120752 :                                 HASHputlink(hs, p, HASHget(hs, prb));
    3059    35123455 :                                 HASHput(hs, prb, p);
    3060    35128436 :                                 assert(!b->tnonil || !isnil);
    3061    35128436 :                                 seennil |= isnil;
    3062             :                         }
    3063       18380 :                         HEAPfree(&hs->heaplink, true);
    3064       18381 :                         HEAPfree(&hs->heapbckt, true);
    3065       18384 :                         GDKfree(hs);
    3066             :                 }
    3067     9272644 :           abort_check:
    3068     9272644 :                 GDKclrerr();
    3069     9239518 :                 assert(maxval == NULL || seenmax);
    3070     9239518 :                 assert(minval == NULL || seenmin);
    3071     9239518 :                 assert(!b->tnil || seennil);
    3072             :         }
    3073    12381629 :         MT_lock_unset(&b->theaplock);
    3074             : }

Generated by: LCOV version 1.14