LCOV - code coverage report
Current view: top level - gdk - gdk_bat.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1239 1562 79.3 %
Date: 2025-03-25 21:27:32 Functions: 36 40 90.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a M. L. Kersten, P. Boncz, N. Nes
      15             :  * @* BAT Module
      16             :  * In this Chapter we describe the BAT implementation in more detail.
      17             :  * The routines mentioned are primarily meant to simplify the library
      18             :  * implementation.
      19             :  *
      20             :  * @+ BAT Construction
      21             :  * BATs are implemented in several blocks of memory, prepared for disk
      22             :  * storage and easy shipment over a network.
      23             :  *
      24             :  * The BAT starts with a descriptor, which indicates the required BAT
      25             :  * library version and the BAT administration details.  In particular,
      26             :  * it describes the binary relationship maintained and the location of
      27             :  * fields required for storage.
      28             :  *
      29             :  * The general layout of the BAT in this implementation is as follows.
      30             :  * Each BAT comes with a heap for the loc-size buns and, optionally,
      31             :  * with heaps to manage the variable-sized data items of both
      32             :  * dimensions.  The buns are assumed to be stored as loc-size objects.
      33             :  * This is essentially an array of structs to store the associations.
      34             :  * The size is determined at BAT creation time using an upper bound on
      35             :  * the number of elements to be accommodated.  In case of overflow,
      36             :  * its storage space is extended automatically.
      37             :  *
      38             :  * The capacity of a BAT places an upper limit on the number of BUNs
      39             :  * to be stored initially. The actual space set aside may be quite
      40             :  * large.  Moreover, the size is aligned to int boundaries to speedup
      41             :  * access and avoid some machine limitations.
      42             :  *
      43             :  * Initialization of the variable parts rely on type specific routines
      44             :  * called atomHeap.
      45             :  */
      46             : #include "monetdb_config.h"
      47             : #include "gdk.h"
      48             : #include "gdk_private.h"
      49             : #include "mutils.h"
      50             : 
      51             : #ifdef ALIGN
      52             : #undef ALIGN
      53             : #endif
      54             : #define ALIGN(n,b)      ((b)?(b)*(1+(((n)-1)/(b))):n)
      55             : 
      56             : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
      57             : 
      58             : BAT *
      59    24844190 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
      60             : {
      61    24844190 :         bat bid;
      62    24844190 :         BAT *bn;
      63    24844190 :         Heap *h = NULL, *vh = NULL;
      64             : 
      65             :         /*
      66             :          * Alloc space for the BAT and its dependent records.
      67             :          */
      68    24844190 :         assert(tt >= 0);
      69             : 
      70    24844190 :         if (heapnames) {
      71    12442483 :                 if ((h = GDKmalloc(sizeof(Heap))) == NULL) {
      72             :                         return NULL;
      73             :                 }
      74    24965808 :                 *h = (Heap) {
      75    12488194 :                         .farmid = BBPselectfarm(role, tt, offheap),
      76             :                         .dirty = true,
      77             :                         .refs = ATOMIC_VAR_INIT(1),
      78             :                 };
      79             : 
      80    12477614 :                 if (ATOMneedheap(tt)) {
      81     1072316 :                         if ((vh = GDKmalloc(sizeof(Heap))) == NULL) {
      82           0 :                                 GDKfree(h);
      83           0 :                                 return NULL;
      84             :                         }
      85     1081187 :                         *vh = (Heap) {
      86     1080695 :                                 .farmid = BBPselectfarm(role, tt, varheap),
      87             :                                 .dirty = true,
      88             :                                 .refs = ATOMIC_VAR_INIT(1),
      89             :                         };
      90             :                 }
      91             :         }
      92             : 
      93    24888192 :         bid = BBPallocbat(tt);
      94    24969345 :         if (bid == 0) {
      95           0 :                 GDKfree(h);
      96           0 :                 GDKfree(vh);
      97           0 :                 return NULL;
      98             :         }
      99    24969345 :         bn = BBP_desc(bid);
     100             : 
     101             :         /*
     102             :          * Fill in basic column info
     103             :          */
     104    49873534 :         *bn = (BAT) {
     105             :                 .batCacheid = bid,
     106             :                 .hseqbase = hseq,
     107             : 
     108             :                 .ttype = tt,
     109             :                 .tkey = true,
     110             :                 .tnonil = true,
     111             :                 .tnil = false,
     112    24904189 :                 .tsorted = ATOMlinear(tt),
     113             :                 .trevsorted = ATOMlinear(tt),
     114    24904189 :                 .tascii = tt == TYPE_str,
     115             :                 .tseqbase = oid_nil,
     116             :                 .tminpos = BUN_NONE,
     117             :                 .tmaxpos = BUN_NONE,
     118             :                 .tunique_est = 0.0,
     119             : 
     120             :                 .batRole = role,
     121             :                 .batTransient = true,
     122             :                 .batRestricted = BAT_WRITE,
     123             :                 .theap = h,
     124             :                 .tvheap = vh,
     125    24969345 :                 .creator_tid = MT_getpid(),
     126             :         };
     127             : 
     128    24904189 :         if (bn->theap) {
     129    12463003 :                 bn->theap->parentid = bn->batCacheid;
     130    12463003 :                 const char *nme = BBP_physical(bn->batCacheid);
     131    12463003 :                 settailname(bn->theap, nme, tt, width);
     132             : 
     133    12485785 :                 if (bn->tvheap) {
     134     1074506 :                         bn->tvheap->parentid = bn->batCacheid;
     135     1074506 :                         strconcat_len(bn->tvheap->filename,
     136             :                                       sizeof(bn->tvheap->filename),
     137             :                                       nme, ".theap", NULL);
     138             :                 }
     139             :         }
     140    24932966 :         char name[MT_NAME_LEN];
     141    24932966 :         snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
     142    24932966 :         MT_lock_init(&bn->theaplock, name);
     143    24874433 :         snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     144    24874433 :         MT_lock_init(&bn->batIdxLock, name);
     145    24920314 :         snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
     146    24920314 :         MT_rwlock_init(&bn->thashlock, name);
     147    24831079 :         return bn;
     148             : }
     149             : 
     150             : uint8_t
     151    12467163 : ATOMelmshift(int sz)
     152             : {
     153    12467163 :         uint8_t sh;
     154    12467163 :         int i = sz >> 1;
     155             : 
     156    22943000 :         for (sh = 0; i != 0; sh++) {
     157    10475837 :                 i >>= 1;
     158             :         }
     159    12467163 :         return sh;
     160             : }
     161             : 
     162             : 
     163             : void
     164    12441553 : BATsetdims(BAT *b, uint16_t width)
     165             : {
     166    12441553 :         b->twidth = b->ttype == TYPE_str ? width > 0 ? width : 1 : ATOMsize(b->ttype);
     167    12441553 :         b->tshift = ATOMelmshift(b->twidth);
     168    12441553 :         assert_shift_width(b->tshift, b->twidth);
     169    12441553 : }
     170             : 
     171             : const char *
     172        1280 : BATtailname(const BAT *b)
     173             : {
     174        1280 :         if (b->ttype == TYPE_str) {
     175         340 :                 switch (b->twidth) {
     176         250 :                 case 1:
     177         250 :                         return "tail1";
     178          75 :                 case 2:
     179          75 :                         return "tail2";
     180          15 :                 case 4:
     181             : #if SIZEOF_VAR_T == 8
     182          15 :                         return "tail4";
     183             :                 case 8:
     184             : #endif
     185             :                         break;
     186             :                 default:
     187           0 :                         MT_UNREACHABLE();
     188             :                 }
     189             :         }
     190             :         return "tail";
     191             : }
     192             : 
     193             : void
     194    12533606 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
     195             : {
     196    12533606 :         if (tt == TYPE_str) {
     197     1103961 :                 switch (width) {
     198     1042849 :                 case 0:
     199             :                 case 1:
     200     1042849 :                         strconcat_len(tail->filename,
     201             :                                       sizeof(tail->filename), physnme,
     202             :                                       ".tail1", NULL);
     203     1042849 :                         return;
     204       55923 :                 case 2:
     205       55923 :                         strconcat_len(tail->filename,
     206             :                                       sizeof(tail->filename), physnme,
     207             :                                       ".tail2", NULL);
     208       55923 :                         return;
     209        5189 :                 case 4:
     210             : #if SIZEOF_VAR_T == 8
     211        5189 :                         strconcat_len(tail->filename,
     212             :                                       sizeof(tail->filename), physnme,
     213             :                                       ".tail4", NULL);
     214        5189 :                         return;
     215             :                 case 8:
     216             : #endif
     217             :                         break;
     218             :                 default:
     219           0 :                         MT_UNREACHABLE();
     220             :                 }
     221             :         }
     222    11429645 :         strconcat_len(tail->filename, sizeof(tail->filename), physnme,
     223             :                       ".tail", NULL);
     224             : }
     225             : 
     226             : /*
     227             :  * @- BAT allocation
     228             :  * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
     229             :  * We now initialize new BATs with their heapname such that the
     230             :  * modified HEAPalloc/HEAPextend primitives can possibly use memory
     231             :  * mapped files as temporary heap storage.
     232             :  *
     233             :  * In case of huge bats, we want HEAPalloc to write a file to disk,
     234             :  * and memory map it. To make this possible, we must provide it with
     235             :  * filenames.
     236             :  */
     237             : BAT *
     238    12451039 : COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
     239             : {
     240    12451039 :         BAT *bn;
     241             : 
     242    12451039 :         assert(cap <= BUN_MAX);
     243    12451039 :         assert(hseq <= oid_nil);
     244    12451039 :         ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
     245             : 
     246             :         /* round up to multiple of BATTINY */
     247    12451039 :         if (cap < BUN_MAX - BATTINY)
     248    12436125 :                 cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
     249    12451039 :         if (ATOMstorage(tt) == TYPE_msk) {
     250      178737 :                 if (cap < 8*BATTINY)
     251             :                         cap = 8*BATTINY;
     252             :                 else
     253       59596 :                         cap = (cap + 31) & ~(BUN)31;
     254    12272302 :         } else if (cap < BATTINY)
     255             :                 cap = BATTINY;
     256             :         /* limit the size */
     257     3651579 :         if (cap > BUN_MAX)
     258             :                 cap = BUN_MAX;
     259             : 
     260    12451039 :         bn = BATcreatedesc(hseq, tt, true, role, width);
     261    12426144 :         if (bn == NULL)
     262             :                 return NULL;
     263             : 
     264    12426144 :         BATsetdims(bn, width);
     265    12439000 :         bn->batCapacity = cap;
     266             : 
     267    12439000 :         if (ATOMstorage(tt) == TYPE_msk)
     268      178720 :                 cap /= 8;       /* 8 values per byte */
     269             : 
     270             :         /* alloc the main heaps */
     271    12439000 :         if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
     272           0 :                 goto bailout;
     273             :         }
     274             : 
     275    12478153 :         if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
     276           0 :                 HEAPfree(bn->theap, true);
     277           0 :                 goto bailout;
     278             :         }
     279    12470548 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {
     280             :                 /* cannot happen, function always returns success */
     281           0 :                 goto bailout;
     282             :         }
     283    12489297 :         TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
     284             :         return bn;
     285           0 :   bailout:
     286           0 :         BBPclear(bn->batCacheid);
     287           0 :         return NULL;
     288             : }
     289             : 
     290             : BAT *
     291    12146491 : COLnew(oid hseq, int tt, BUN cap, role_t role)
     292             : {
     293    12146491 :         return COLnew2(hseq, tt, cap, role, 0);
     294             : }
     295             : 
     296             : BAT *
     297     5747081 : BATdense(oid hseq, oid tseq, BUN cnt)
     298             : {
     299     5747081 :         BAT *bn;
     300             : 
     301     5747081 :         bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
     302     5755954 :         if (bn != NULL) {
     303     5755954 :                 BATtseqbase(bn, tseq);
     304     5753988 :                 BATsetcount(bn, cnt);
     305     5749891 :                 TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
     306             :                           "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
     307             :                           ALGOBATPAR(bn));
     308             :         }
     309     5749891 :         return bn;
     310             : }
     311             : 
     312             : /*
     313             :  * If the BAT runs out of storage for BUNS it will reallocate space.
     314             :  * For memory mapped BATs we simple extend the administration after
     315             :  * having an assurance that the BAT still can be safely stored away.
     316             :  */
     317             : BUN
     318       25272 : BATgrows(BAT *b)
     319             : {
     320       25272 :         BUN oldcap, newcap;
     321             : 
     322       25272 :         BATcheck(b, 0);
     323             : 
     324       25272 :         newcap = oldcap = BATcapacity(b);
     325       25272 :         if (newcap < BATTINY)
     326             :                 newcap = 2 * BATTINY;
     327       25360 :         else if (newcap < 10 * BATTINY)
     328       24157 :                 newcap = 4 * newcap;
     329        1203 :         else if (newcap < 50 * BATTINY)
     330         708 :                 newcap = 2 * newcap;
     331         495 :         else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
     332         493 :                 newcap = (BUN) ((double) newcap * BATMARGIN);
     333             :         else
     334             :                 newcap = BUN_MAX;
     335       25360 :         if (newcap == oldcap) {
     336           0 :                 if (newcap <= BUN_MAX - 10)
     337           0 :                         newcap += 10;
     338             :                 else
     339             :                         newcap = BUN_MAX;
     340             :         }
     341       25272 :         if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
     342           5 :                 newcap = (newcap + 31) & ~(BUN)31;
     343             :         return newcap;
     344             : }
     345             : 
     346             : /*
     347             :  * The routine should ensure that the BAT keeps its location in the
     348             :  * BAT buffer.
     349             :  *
     350             :  * Overflow in the other heaps are dealt with in the atom routines.
     351             :  * Here we merely copy their references into the new administration
     352             :  * space.
     353             :  */
     354             : gdk_return
     355      187508 : BATextend(BAT *b, BUN newcap)
     356             : {
     357      187508 :         size_t theap_size;
     358      187508 :         gdk_return rc = GDK_SUCCEED;
     359             : 
     360      187508 :         assert(newcap <= BUN_MAX);
     361      187508 :         BATcheck(b, GDK_FAIL);
     362             :         /*
     363             :          * The main issue is to properly predict the new BAT size.
     364             :          * storage overflow. The assumption taken is that capacity
     365             :          * overflow is rare. It is changed only when the position of
     366             :          * the next available BUN surpasses the free area marker.  Be
     367             :          * aware that the newcap should be greater than the old value,
     368             :          * otherwise you may easily corrupt the administration of
     369             :          * malloc.
     370             :          */
     371      187508 :         MT_lock_set(&b->theaplock);
     372      188053 :         if (newcap <= BATcapacity(b)) {
     373      151918 :                 MT_lock_unset(&b->theaplock);
     374      151931 :                 return GDK_SUCCEED;
     375             :         }
     376             : 
     377       36135 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
     378        1200 :                 newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
     379        1200 :                 theap_size = (size_t) (newcap / 8); /* in bytes */
     380             :         } else {
     381       34935 :                 theap_size = (size_t) newcap << b->tshift;
     382             :         }
     383             : 
     384       36135 :         if (b->theap->base) {
     385       36135 :                 TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
     386             :                           b->theap->filename, b->theap->size, theap_size);
     387       36135 :                 rc = HEAPgrow(&b->theap, theap_size, b->batRestricted == BAT_READ);
     388       36200 :                 if (rc == GDK_SUCCEED)
     389       36201 :                         b->batCapacity = newcap;
     390             :         } else {
     391           0 :                 b->batCapacity = newcap;
     392             :         }
     393       36200 :         MT_lock_unset(&b->theaplock);
     394             : 
     395       36191 :         return rc;
     396             : }
     397             : 
     398             : 
     399             : 
     400             : /*
     401             :  * @+ BAT destruction
     402             :  * BATclear quickly removes all elements from a BAT. It must respect
     403             :  * the transaction rules; so stable elements must be moved to the
     404             :  * "deleted" section of the BAT (they cannot be fully deleted
     405             :  * yet). For the elements that really disappear, we must free
     406             :  * heapspace. As an optimization, in the case of no stable elements, we quickly empty
     407             :  * the heaps by copying a standard small empty image over them.
     408             :  */
     409             : gdk_return
     410         434 : BATclear(BAT *b, bool force)
     411             : {
     412         434 :         BUN p, q;
     413             : 
     414         434 :         BATcheck(b, GDK_FAIL);
     415             : 
     416         434 :         if (!force && b->batInserted > 0) {
     417           0 :                 GDKerror("cannot clear committed BAT\n");
     418           0 :                 return GDK_FAIL;
     419             :         }
     420             : 
     421         434 :         TRC_DEBUG(ALGO, ALGOBATFMT "\n", ALGOBATPAR(b));
     422             : 
     423             :         /* kill all search accelerators */
     424         434 :         HASHdestroy(b);
     425         434 :         OIDXdestroy(b);
     426         434 :         STRMPdestroy(b);
     427         434 :         RTREEdestroy(b);
     428         434 :         PROPdestroy(b);
     429             : 
     430         434 :         bat tvp = 0;
     431             : 
     432             :         /* we must dispose of all inserted atoms */
     433         434 :         MT_lock_set(&b->theaplock);
     434         434 :         if (force && BATatoms[b->ttype].atomDel == NULL) {
     435         427 :                 assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
     436             :                 /* no stable elements: we do a quick heap clean */
     437             :                 /* need to clean heap which keeps data even though the
     438             :                    BUNs got removed. This means reinitialize when
     439             :                    free > 0
     440             :                 */
     441         427 :                 if (b->tvheap && b->tvheap->free > 0) {
     442          21 :                         Heap *th = GDKmalloc(sizeof(Heap));
     443             : 
     444          21 :                         if (th == NULL) {
     445           0 :                                 MT_lock_unset(&b->theaplock);
     446           0 :                                 return GDK_FAIL;
     447             :                         }
     448          21 :                         *th = (Heap) {
     449          21 :                                 .farmid = b->tvheap->farmid,
     450          21 :                                 .parentid = b->tvheap->parentid,
     451             :                                 .dirty = true,
     452          21 :                                 .hasfile = b->tvheap->hasfile,
     453             :                                 .refs = ATOMIC_VAR_INIT(1),
     454             :                         };
     455          21 :                         strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
     456          21 :                         if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
     457           0 :                                 MT_lock_unset(&b->theaplock);
     458           0 :                                 return GDK_FAIL;
     459             :                         }
     460          21 :                         tvp = b->tvheap->parentid;
     461          21 :                         HEAPdecref(b->tvheap, false);
     462          21 :                         b->tvheap = th;
     463             :                 }
     464             :         } else {
     465             :                 /* do heap-delete of all inserted atoms */
     466           7 :                 void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
     467             : 
     468             :                 /* TYPE_str has no del method, so we shouldn't get here */
     469           7 :                 assert(tatmdel == NULL || b->twidth == sizeof(var_t));
     470           0 :                 if (tatmdel) {
     471           0 :                         BATiter bi = bat_iterator_nolock(b);
     472             : 
     473           0 :                         for (p = b->batInserted, q = BATcount(b); p < q; p++)
     474           0 :                                 (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
     475           0 :                         b->tvheap->dirty = true;
     476             :                 }
     477             :         }
     478             : 
     479         434 :         b->batInserted = 0;
     480         434 :         b->batCount = 0;
     481         434 :         if (b->ttype == TYPE_void)
     482           0 :                 b->batCapacity = 0;
     483         434 :         b->theap->free = 0;
     484         434 :         BAThseqbase(b, 0);
     485         434 :         BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
     486         434 :         b->theap->dirty = true;
     487         434 :         b->tnonil = true;
     488         434 :         b->tnil = false;
     489         434 :         b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
     490         434 :         b->tnosorted = b->tnorevsorted = 0;
     491         434 :         b->tkey = true;
     492         434 :         b->tnokey[0] = b->tnokey[1] = 0;
     493         434 :         b->tminpos = b->tmaxpos = BUN_NONE;
     494         434 :         b->tunique_est = 0;
     495         434 :         MT_lock_unset(&b->theaplock);
     496         434 :         if (tvp != 0 && tvp != b->batCacheid)
     497           0 :                 BBPrelease(tvp);
     498             :         return GDK_SUCCEED;
     499             : }
     500             : 
     501             : /* free a cached BAT; leave the bat descriptor cached */
     502             : void
     503      582418 : BATfree(BAT *b)
     504             : {
     505      582418 :         if (b == NULL)
     506             :                 return;
     507             : 
     508             :         /* deallocate all memory for a bat */
     509      582418 :         MT_rwlock_rdlock(&b->thashlock);
     510      582418 :         BUN nunique = BUN_NONE;
     511      582418 :         if (b->thash && b->thash != (Hash *) 1) {
     512        2020 :                 nunique = b->thash->nunique;
     513             :         }
     514      582418 :         MT_rwlock_rdunlock(&b->thashlock);
     515      582418 :         HASHfree(b);
     516      582418 :         OIDXfree(b);
     517      582418 :         STRMPfree(b);
     518      582418 :         RTREEfree(b);
     519      582418 :         MT_lock_set(&b->theaplock);
     520      582417 :         if (nunique != BUN_NONE) {
     521        2020 :                 b->tunique_est = (double) nunique;
     522             :         }
     523             :         /* wait until there are no other references to the heap; a
     524             :          * reference is possible in e.g. BBPsync that uses a
     525             :          * bat_iterator directly on the BBP_desc, i.e. without fix */
     526      582418 :         while (b->theap && (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1) {
     527           0 :                 MT_lock_unset(&b->theaplock);
     528           0 :                 MT_sleep_ms(1);
     529      582417 :                 MT_lock_set(&b->theaplock);
     530             :         }
     531      582418 :         if (b->theap) {
     532      582418 :                 assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) == 1);
     533      582418 :                 assert(b->theap->parentid == b->batCacheid);
     534      582418 :                 HEAPfree(b->theap, false);
     535             :         }
     536             :         /* wait until there are no other references to the heap; a
     537             :          * reference is possible in e.g. BBPsync that uses a
     538             :          * bat_iterator directly on the BBP_desc, i.e. without fix */
     539       28498 :         while (b->tvheap && (ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) > 1) {
     540           0 :                 MT_lock_unset(&b->theaplock);
     541           0 :                 MT_sleep_ms(1);
     542      582418 :                 MT_lock_set(&b->theaplock);
     543             :         }
     544      582418 :         if (b->tvheap) {
     545       28498 :                 assert((ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) == 1);
     546       28498 :                 assert(b->tvheap->parentid == b->batCacheid);
     547       28498 :                 HEAPfree(b->tvheap, false);
     548             :         }
     549      582418 :         MT_lock_unset(&b->theaplock);
     550             : }
     551             : 
     552             : /* free a cached BAT descriptor */
     553             : void
     554    24971439 : BATdestroy(BAT *b)
     555             : {
     556    24971439 :         if (b->tvheap) {
     557       27647 :                 GDKfree(b->tvheap);
     558             :         }
     559    24971439 :         PROPdestroy_nolock(b);
     560    24939304 :         MT_lock_destroy(&b->theaplock);
     561    24938292 :         MT_lock_destroy(&b->batIdxLock);
     562    24956796 :         MT_rwlock_destroy(&b->thashlock);
     563    24969119 :         if (b->theap) {
     564      579475 :                 GDKfree(b->theap);
     565             :         }
     566    24966496 :         if (b->oldtail) {
     567           0 :                 ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
     568             :                 /* the bat has not been committed, so we cannot remove
     569             :                  * the old tail file */
     570           0 :                 HEAPdecref(b->oldtail, false);
     571           0 :                 b->oldtail = NULL;
     572             :         }
     573    24966496 :         *b = (BAT) {
     574             :                 .batCacheid = 0,
     575             :         };
     576    24966496 : }
     577             : 
     578             : /*
     579             :  * @+ BAT copying
     580             :  *
     581             :  * BAT copying is an often used operation. So it deserves attention.
     582             :  * When making a copy of a BAT, the following aspects are of
     583             :  * importance:
     584             :  *
     585             :  * - the requested head and tail types. The purpose of the copy may be
     586             :  *   to slightly change these types (e.g. void <-> oid). We may also
     587             :  *   remap between types as long as they share the same
     588             :  *   ATOMstorage(type), i.e. the types have the same physical
     589             :  *   implementation. We may even want to allow 'dirty' trick such as
     590             :  *   viewing a flt-column suddenly as int.
     591             :  *
     592             :  *   To allow such changes, the desired column-types is a
     593             :  *   parameter of COLcopy.
     594             :  *
     595             :  * - access mode. If we want a read-only copy of a read-only BAT, a
     596             :  *   VIEW may do (in this case, the user may be after just an
     597             :  *   independent BAT header and id). This is indicated by the
     598             :  *   parameter (writable = FALSE).
     599             :  *
     600             :  *   In other cases, we really want an independent physical copy
     601             :  *   (writable = TRUE).  Changing the mode to BAT_WRITE will be a
     602             :  *   zero-cost operation if the BAT was copied with (writable = TRUE).
     603             :  *
     604             :  * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
     605             :  * TRUE).
     606             :  *
     607             :  * In these cases the copy becomes a logical view on the original,
     608             :  * which ensures that the original cannot be modified or destroyed
     609             :  * (which could affect the shared heaps).
     610             :  */
     611             : static bool
     612         624 : wrongtype(int t1, int t2)
     613             : {
     614             :         /* check if types are compatible. be extremely forgiving */
     615         624 :         if (t1 != TYPE_void) {
     616         623 :                 t1 = ATOMtype(ATOMstorage(t1));
     617         623 :                 t2 = ATOMtype(ATOMstorage(t2));
     618         623 :                 if (t1 != t2) {
     619         564 :                         if (ATOMvarsized(t1) ||
     620         564 :                             ATOMvarsized(t2) ||
     621         564 :                             t1 == TYPE_msk || t2 == TYPE_msk ||
     622         564 :                             ATOMsize(t1) != ATOMsize(t2))
     623           0 :                                 return true;
     624             :                 }
     625             :         }
     626             :         return false;
     627             : }
     628             : 
     629             : /*
     630             :  * There are four main implementation cases:
     631             :  * (1) we are allowed to return a view (zero effort),
     632             :  * (2) the result is void,void (zero effort),
     633             :  * (3) we can copy the heaps (memcopy, or even VM page sharing)
     634             :  * (4) we must insert BUN-by-BUN into the result (fallback)
     635             :  * The latter case is still optimized for the case that the result
     636             :  * is bat[void,T] for a simple fixed-size type T. In that case we
     637             :  * do inline array[T] inserts.
     638             :  */
     639             : BAT *
     640       53324 : COLcopy(BAT *b, int tt, bool writable, role_t role)
     641             : {
     642       53324 :         bool slowcopy = false;
     643       53324 :         BAT *bn = NULL;
     644       53324 :         BATiter bi;
     645       53324 :         char strhash[GDK_STRHASHSIZE];
     646             : 
     647       53324 :         BATcheck(b, NULL);
     648             : 
     649             :         /* maybe a bit ugly to change the requested bat type?? */
     650       53324 :         if (b->ttype == TYPE_void && !writable)
     651       53324 :                 tt = TYPE_void;
     652             : 
     653       53324 :         if (tt != b->ttype && wrongtype(tt, b->ttype)) {
     654           0 :                 GDKerror("wrong tail-type requested\n");
     655           0 :                 return NULL;
     656             :         }
     657             : 
     658             :         /* in case of a string bat, we save the string heap hash table
     659             :          * while we have the lock so that we can restore it in the copy;
     660             :          * this is because during our operation, a parallel thread could
     661             :          * be adding strings to the vheap which would modify the hash
     662             :          * table and that would result in buckets containing values
     663             :          * beyond the original vheap that we're copying */
     664       53324 :         MT_lock_set(&b->theaplock);
     665       53375 :         BAT *pb = NULL, *pvb = NULL;
     666       53375 :         if (b->theap->parentid != b->batCacheid) {
     667       11619 :                 pb = BBP_desc(b->theap->parentid);
     668       11619 :                 MT_lock_set(&pb->theaplock);
     669             :         }
     670       53372 :         if (b->tvheap &&
     671       16698 :             b->tvheap->parentid != b->batCacheid &&
     672       11653 :             b->tvheap->parentid != b->theap->parentid) {
     673       10977 :                 pvb = BBP_desc(b->tvheap->parentid);
     674       10977 :                 MT_lock_set(&pvb->theaplock);
     675             :         }
     676       53368 :         bi = bat_iterator_nolock(b);
     677       53368 :         if (ATOMstorage(b->ttype) == TYPE_str && b->tvheap->free >= GDK_STRHASHSIZE)
     678       12490 :                 memcpy(strhash, b->tvheap->base, GDK_STRHASHSIZE);
     679             : 
     680       53368 :         bat_iterator_incref(&bi);
     681       53375 :         if (pvb)
     682       10977 :                 MT_lock_unset(&pvb->theaplock);
     683       53361 :         if (pb)
     684       11616 :                 MT_lock_unset(&pb->theaplock);
     685       53366 :         MT_lock_unset(&b->theaplock);
     686             : 
     687             :         /* first try case (1); create a view, possibly with different
     688             :          * atom-types */
     689       53372 :         if (!writable &&
     690       53372 :             role == TRANSIENT &&
     691       23201 :             bi.restricted == BAT_READ &&
     692       18653 :             ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
     693       18653 :             (bi.h == NULL ||
     694       18653 :              bi.h->parentid == b->batCacheid ||
     695        3606 :              BBP_desc(bi.h->parentid)->batRestricted == BAT_READ)) {
     696       18653 :                 bn = VIEWcreate(b->hseqbase, b, 0, BUN_MAX);
     697       18655 :                 if (bn == NULL) {
     698           0 :                         goto bunins_failed;
     699             :                 }
     700       18655 :                 if (tt != bn->ttype) {
     701          56 :                         bn->ttype = tt;
     702          56 :                         if (bn->tvheap && !ATOMvarsized(tt)) {
     703           0 :                                 if (bn->tvheap->parentid != bn->batCacheid)
     704           0 :                                         BBPrelease(bn->tvheap->parentid);
     705           0 :                                 HEAPdecref(bn->tvheap, false);
     706           0 :                                 bn->tvheap = NULL;
     707             :                         }
     708          56 :                         bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
     709             :                 }
     710       18655 :                 bat_iterator_end(&bi);
     711       18655 :                 return bn;
     712             :         } else {
     713             :                 /* check whether we need case (4); BUN-by-BUN copy (by
     714             :                  * setting slowcopy to true) */
     715       34719 :                 if (ATOMsize(tt) != ATOMsize(bi.type)) {
     716             :                         /* oops, void materialization */
     717             :                         slowcopy = true;
     718       34153 :                 } else if (bi.h && bi.h->parentid != b->batCacheid &&
     719        8007 :                            BATcapacity(BBP_desc(bi.h->parentid)) > bi.count + bi.count) {
     720             :                         /* reduced slice view: do not copy too much
     721             :                          * garbage */
     722             :                         slowcopy = true;
     723       26357 :                 } else if (bi.vh && bi.vh->parentid != b->batCacheid &&
     724       10414 :                            BATcount(BBP_desc(bi.vh->parentid)) > bi.count + bi.count) {
     725             :                         /* reduced vheap view: do not copy too much
     726             :                          * garbage; this really is a heuristic since the
     727             :                          * vheap could be used completely, even if the
     728             :                          * offset heap is only (less than) half the size
     729             :                          * of the parent's offset heap */
     730       10642 :                         slowcopy = true;
     731             :                 }
     732             : 
     733       34719 :                 bn = COLnew2(b->hseqbase, tt, bi.count, role, bi.width);
     734       34679 :                 if (bn == NULL) {
     735           0 :                         goto bunins_failed;
     736             :                 }
     737       34679 :                 if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
     738             :                         /* this combination can happen since the last
     739             :                          * argument of COLnew2 not being zero triggers a
     740             :                          * skip in the allocation of the tvheap */
     741       12667 :                         if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
     742           0 :                                 goto bunins_failed;
     743             :                         }
     744             :                 }
     745             : 
     746       34691 :                 if (tt == TYPE_void) {
     747             :                         /* case (2): a void,void result => nothing to
     748             :                          * copy! */
     749        1161 :                         bn->theap->free = 0;
     750       33530 :                 } else if (!slowcopy) {
     751             :                         /* case (3): just copy the heaps */
     752       22906 :                         if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
     753           0 :                                 goto bunins_failed;
     754             :                         }
     755       22904 :                         memcpy(bn->theap->base, bi.base, bi.hfree);
     756       22904 :                         bn->theap->free = bi.hfree;
     757       22904 :                         bn->theap->dirty = true;
     758       22904 :                         if (bn->tvheap) {
     759        9696 :                                 memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
     760        9696 :                                 bn->tvheap->free = bi.vhfree;
     761        9696 :                                 bn->tvheap->dirty = true;
     762        9696 :                                 bn->tascii = bi.ascii;
     763        9696 :                                 if (ATOMstorage(b->ttype) == TYPE_str && bi.vhfree >= GDK_STRHASHSIZE)
     764        8925 :                                         memcpy(bn->tvheap->base, strhash, GDK_STRHASHSIZE);
     765             :                         }
     766             : 
     767             :                         /* make sure we use the correct capacity */
     768       22904 :                         if (ATOMstorage(bn->ttype) == TYPE_msk)
     769          32 :                                 bn->batCapacity = (BUN) (bn->theap->size * 8);
     770       22872 :                         else if (bn->ttype)
     771       22872 :                                 bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
     772             :                         else
     773           0 :                                 bn->batCapacity = 0;
     774       21261 :                 } else if (tt != TYPE_void || ATOMextern(tt)) {
     775             :                         /* case (4): one-by-one BUN insert (really slow) */
     776       10624 :                         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     777             : 
     778     8798748 :                         TIMEOUT_LOOP_IDX_DECL(p, bi.count, qry_ctx) {
     779     8777025 :                                 const void *t = BUNtail(bi, p);
     780             : 
     781     8768468 :                                 if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
     782           0 :                                         goto bunins_failed;
     783             :                                 }
     784             :                         }
     785       10640 :                         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
     786       10637 :                         bn->theap->dirty |= bi.count > 0;
     787             :                 } else if (tt != TYPE_void && bi.type == TYPE_void) {
     788             :                         /* case (4): optimized for unary void
     789             :                          * materialization */
     790             :                         oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
     791             :                         const oid inc = !is_oid_nil(cur);
     792             : 
     793             :                         for (BUN p = 0; p < bi.count; p++) {
     794             :                                 dst[p] = cur;
     795             :                                 cur += inc;
     796             :                         }
     797             :                         bn->theap->free = bi.count * sizeof(oid);
     798             :                         bn->theap->dirty |= bi.count > 0;
     799             :                 } else if (ATOMstorage(bi.type) == TYPE_msk) {
     800             :                         /* convert number of bits to number of bytes,
     801             :                          * and round the latter up to a multiple of
     802             :                          * 4 (copy in units of 4 bytes) */
     803             :                         bn->theap->free = ((bi.count + 31) / 32) * 4;
     804             :                         memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
     805             :                         bn->theap->dirty |= bi.count > 0;
     806             :                 } else {
     807             :                         /* case (4): optimized for simple array copy */
     808             :                         bn->theap->free = bi.count << bn->tshift;
     809             :                         memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
     810             :                         bn->theap->dirty |= bi.count > 0;
     811             :                 }
     812             :                 /* copy all properties (size+other) from the source bat */
     813       34702 :                 BATsetcount(bn, bi.count);
     814             :         }
     815             :         /* set properties (note that types may have changed in the copy) */
     816       68823 :         if (ATOMtype(tt) == ATOMtype(bi.type)) {
     817       34693 :                 if (ATOMtype(tt) == TYPE_oid) {
     818        9198 :                         BATtseqbase(bn, bi.tseq);
     819             :                 } else {
     820       25495 :                         BATtseqbase(bn, oid_nil);
     821             :                 }
     822       34650 :                 BATkey(bn, bi.key);
     823       34640 :                 bn->tsorted = bi.sorted;
     824       34640 :                 bn->trevsorted = bi.revsorted;
     825       34640 :                 bn->tnorevsorted = bi.norevsorted;
     826       34640 :                 if (bi.nokey[0] != bi.nokey[1]) {
     827        2771 :                         bn->tnokey[0] = bi.nokey[0];
     828        2771 :                         bn->tnokey[1] = bi.nokey[1];
     829             :                 } else {
     830       31869 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     831             :                 }
     832       34640 :                 bn->tnosorted = bi.nosorted;
     833       34640 :                 bn->tnonil = bi.nonil;
     834       34640 :                 bn->tnil = bi.nil;
     835       34640 :                 bn->tminpos = bi.minpos;
     836       34640 :                 bn->tmaxpos = bi.maxpos;
     837       34640 :                 bn->tunique_est = bi.unique_est;
     838           3 :         } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
     839           3 :                    ATOMcompare(tt) == ATOMcompare(b->ttype)) {
     840           3 :                 BUN h = bi.count;
     841           3 :                 bn->tsorted = bi.sorted;
     842           3 :                 bn->trevsorted = bi.revsorted;
     843           3 :                 BATkey(bn, bi.key);
     844           3 :                 bn->tnonil = bi.nonil;
     845           3 :                 bn->tnil = bi.nil;
     846           3 :                 if (bi.nosorted > 0 && bi.nosorted < h)
     847           1 :                         bn->tnosorted = bi.nosorted;
     848             :                 else
     849           2 :                         bn->tnosorted = 0;
     850           3 :                 if (bi.norevsorted > 0 && bi.norevsorted < h)
     851           3 :                         bn->tnorevsorted = bi.norevsorted;
     852             :                 else
     853           0 :                         bn->tnorevsorted = 0;
     854           3 :                 if (bi.nokey[0] < h &&
     855           3 :                     bi.nokey[1] < h &&
     856             :                     bi.nokey[0] != bi.nokey[1]) {
     857           0 :                         bn->tnokey[0] = bi.nokey[0];
     858           0 :                         bn->tnokey[1] = bi.nokey[1];
     859             :                 } else {
     860           3 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     861             :                 }
     862           3 :                 bn->tminpos = bi.minpos;
     863           3 :                 bn->tmaxpos = bi.maxpos;
     864           3 :                 bn->tunique_est = bi.unique_est;
     865             :         } else {
     866           0 :                 bn->tsorted = bn->trevsorted = false; /* set based on count later */
     867           0 :                 bn->tnonil = bn->tnil = false;
     868           0 :                 bn->tkey = false;
     869           0 :                 bn->tnosorted = bn->tnorevsorted = 0;
     870           0 :                 bn->tnokey[0] = bn->tnokey[1] = 0;
     871             :         }
     872       34643 :         if (BATcount(bn) <= 1) {
     873        6118 :                 bn->tsorted = ATOMlinear(b->ttype);
     874        6118 :                 bn->trevsorted = ATOMlinear(b->ttype);
     875        6118 :                 bn->tkey = true;
     876             :         }
     877       34643 :         bat_iterator_end(&bi);
     878       34707 :         if (!writable)
     879        4545 :                 bn->batRestricted = BAT_READ;
     880       34707 :         TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
     881             :                   ALGOBATPAR(b), ALGOBATPAR(bn));
     882             :         return bn;
     883           0 :       bunins_failed:
     884           0 :         bat_iterator_end(&bi);
     885           0 :         BBPreclaim(bn);
     886             :         return NULL;
     887             : }
     888             : 
     889             : /* Append an array of values of length count to the bat.  For
     890             :  * fixed-sized values, `values' is an array of values, for
     891             :  * variable-sized values, `values' is an array of pointers to values.
     892             :  * If values equals NULL, count times nil will be appended. */
     893             : gdk_return
     894    56893800 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
     895             : {
     896    56893800 :         BUN p;
     897    56893800 :         BUN nunique = 0;
     898             : 
     899    56893800 :         BATcheck(b, GDK_FAIL);
     900             : 
     901    56893800 :         assert(!VIEWtparent(b));
     902             : 
     903    56893800 :         if (count == 0)
     904             :                 return GDK_SUCCEED;
     905             : 
     906    56889606 :         TRC_DEBUG(ALGO, ALGOBATFMT " appending " BUNFMT " values%s\n", ALGOBATPAR(b), count, values ? "" : " (all nil)");
     907             : 
     908    56886980 :         p = BATcount(b);                /* insert at end */
     909    56886980 :         if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
     910           0 :                 GDKerror("bat too large\n");
     911           0 :                 return GDK_FAIL;
     912             :         }
     913             : 
     914    56886980 :         ALIGNapp(b, force, GDK_FAIL);
     915             :         /* load hash so that we can maintain it */
     916    56935806 :         (void) BATcheckhash(b);
     917             : 
     918    56922312 :         if (b->ttype == TYPE_void && BATtdense(b)) {
     919           0 :                 const oid *ovals = values;
     920           0 :                 bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
     921           0 :                 if (ovals) {
     922           0 :                         for (BUN i = 1; dense && i < count; i++) {
     923           0 :                                 dense = ovals[i - 1] + 1 == ovals[i];
     924             :                         }
     925             :                 }
     926           0 :                 if (dense) {
     927           0 :                         MT_lock_set(&b->theaplock);
     928           0 :                         assert(BATtdense(b)); /* no change (coverity) */
     929           0 :                         if (b->batCount == 0)
     930           0 :                                 b->tseqbase = ovals ? ovals[0] : oid_nil;
     931           0 :                         BATsetcount(b, BATcount(b) + count);
     932           0 :                         MT_lock_unset(&b->theaplock);
     933           0 :                         return GDK_SUCCEED;
     934             :                 } else {
     935             :                         /* we need to materialize b; allocate enough capacity */
     936           0 :                         if (BATmaterialize(b, BATcount(b) + count) != GDK_SUCCEED)
     937             :                                 return GDK_FAIL;
     938             :                 }
     939             :         }
     940             : 
     941    56922312 :         if (unshare_varsized_heap(b) != GDK_SUCCEED) {
     942             :                 return GDK_FAIL;
     943             :         }
     944             : 
     945    56904298 :         if (BATcount(b) + count > BATcapacity(b)) {
     946             :                 /* if needed space exceeds a normal growth extend just
     947             :                  * with what's needed */
     948       14263 :                 BUN ncap = BATcount(b) + count;
     949       14263 :                 BUN grows = BATgrows(b);
     950             : 
     951       14257 :                 if (ncap > grows)
     952             :                         grows = ncap;
     953       14257 :                 gdk_return rc = BATextend(b, grows);
     954       14275 :                 if (rc != GDK_SUCCEED)
     955             :                         return rc;
     956             :         }
     957             : 
     958    56904310 :         const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
     959    56904310 :         MT_lock_set(&b->theaplock);
     960    56929131 :         BATiter bi = bat_iterator_nolock(b);
     961    56929131 :         const ValRecord *prop;
     962    56929131 :         ValRecord minprop, maxprop;
     963    56929131 :         const void *minbound = NULL, *maxbound = NULL;
     964    56929142 :         if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
     965          11 :             VALcopy(&minprop, prop) != NULL)
     966          11 :                 minbound = VALptr(&minprop);
     967    56854230 :         if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
     968          10 :             VALcopy(&maxprop, prop) != NULL)
     969          10 :                 maxbound = VALptr(&maxprop);
     970    56888551 :         const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
     971    56927972 :         bool setnil = false;
     972    56927972 :         MT_lock_unset(&b->theaplock);
     973    56962086 :         MT_rwlock_wrlock(&b->thashlock);
     974    56960876 :         if (values && b->ttype) {
     975    56954333 :                 int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
     976    56954333 :                 const void *atomnil = ATOMnilptr(b->ttype);
     977    56954333 :                 const void *minvalp = NULL, *maxvalp = NULL;
     978    56954333 :                 if (b->tvheap) {
     979    22261748 :                         if (bi.minpos != BUN_NONE)
     980    19342998 :                                 minvalp = BUNtvar(bi, bi.minpos);
     981    22261748 :                         if (bi.maxpos != BUN_NONE)
     982    19336108 :                                 maxvalp = BUNtvar(bi, bi.maxpos);
     983    22261748 :                         const void *vbase = b->tvheap->base;
     984    44535995 :                         for (BUN i = 0; i < count; i++) {
     985    22287456 :                                 t = ((void **) values)[i];
     986    22287456 :                                 bool isnil = atomcmp(t, atomnil) == 0;
     987    22272892 :                                 gdk_return rc;
     988    22272892 :                                 if (notnull && isnil) {
     989           0 :                                         assert(0);
     990             :                                         GDKerror("NULL value not within bounds\n");
     991             :                                         rc = GDK_FAIL;
     992    22272892 :                                 } else if (minbound &&
     993    22272892 :                                            !isnil &&
     994           0 :                                            atomcmp(t, minbound) < 0) {
     995           0 :                                         assert(0);
     996             :                                         GDKerror("value not within bounds\n");
     997             :                                         rc = GDK_FAIL;
     998    22272892 :                                 } else if (maxbound &&
     999           0 :                                            !isnil &&
    1000           0 :                                            atomcmp(t, maxbound) >= 0) {
    1001           0 :                                         assert(0);
    1002             :                                         GDKerror("value not within bounds\n");
    1003             :                                         rc = GDK_FAIL;
    1004             :                                 } else {
    1005    22272892 :                                         rc = tfastins_nocheckVAR(b, p, t);
    1006             :                                 }
    1007    22274111 :                                 if (rc != GDK_SUCCEED) {
    1008           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1009           0 :                                         if (minbound)
    1010           0 :                                                 VALclear(&minprop);
    1011           0 :                                         if (maxbound)
    1012           0 :                                                 VALclear(&maxprop);
    1013           0 :                                         return rc;
    1014             :                                 }
    1015    22274111 :                                 if (vbase != b->tvheap->base) {
    1016             :                                         /* tvheap changed location, so
    1017             :                                          * pointers may need to be
    1018             :                                          * updated (not if they were
    1019             :                                          * initialized from t below, but
    1020             :                                          * we don't know) */
    1021        3505 :                                         BUN minpos = bi.minpos;
    1022        3505 :                                         BUN maxpos = bi.maxpos;
    1023        3505 :                                         MT_lock_set(&b->theaplock);
    1024        3505 :                                         bi = bat_iterator_nolock(b);
    1025        3505 :                                         MT_lock_unset(&b->theaplock);
    1026        3505 :                                         bi.minpos = minpos;
    1027        3505 :                                         bi.maxpos = maxpos;
    1028        3505 :                                         vbase = b->tvheap->base;
    1029        3505 :                                         if (bi.minpos != BUN_NONE)
    1030        2736 :                                                 minvalp = BUNtvar(bi, bi.minpos);
    1031        3505 :                                         if (bi.maxpos != BUN_NONE)
    1032        2735 :                                                 maxvalp = BUNtvar(bi, bi.maxpos);
    1033             :                                 }
    1034    22274111 :                                 if (!isnil) {
    1035    20571363 :                                         if (p == 0) {
    1036      212414 :                                                 bi.minpos = bi.maxpos = 0;
    1037      212414 :                                                 minvalp = maxvalp = t;
    1038             :                                         } else {
    1039    39649451 :                                                 if (bi.minpos != BUN_NONE &&
    1040    19290924 :                                                     atomcmp(minvalp, t) > 0) {
    1041       83354 :                                                         bi.minpos = p;
    1042       83354 :                                                         minvalp = t;
    1043             :                                                 }
    1044    39649965 :                                                 if (bi.maxpos != BUN_NONE &&
    1045    19290880 :                                                     atomcmp(maxvalp, t) < 0) {
    1046     2248357 :                                                         bi.maxpos = p;
    1047     2248357 :                                                         maxvalp = t;
    1048             :                                                 }
    1049             :                                         }
    1050             :                                 } else {
    1051             :                                         setnil = true;
    1052             :                                 }
    1053    22274247 :                                 p++;
    1054             :                         }
    1055    22248539 :                         if (minbound)
    1056           0 :                                 VALclear(&minprop);
    1057    22250196 :                         if (maxbound)
    1058           0 :                                 VALclear(&maxprop);
    1059    22242989 :                         if (b->thash) {
    1060        6359 :                                 p -= count;
    1061       12718 :                                 for (BUN i = 0; i < count; i++) {
    1062        6359 :                                         t = ((void **) values)[i];
    1063        6359 :                                         HASHappend_locked(b, p, t);
    1064        6359 :                                         p++;
    1065             :                                 }
    1066        6359 :                                 nunique = b->thash ? b->thash->nunique : 0;
    1067             :                         }
    1068    34692585 :                 } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1069        8050 :                         bi.minpos = bi.maxpos = BUN_NONE;
    1070        8050 :                         minvalp = maxvalp = NULL;
    1071        8050 :                         assert(!b->tnil);
    1072       16100 :                         for (BUN i = 0; i < count; i++) {
    1073        8050 :                                 t = (void *) ((char *) values + (i << b->tshift));
    1074        8050 :                                 mskSetVal(b, p, *(msk *) t);
    1075        8050 :                                 p++;
    1076             :                         }
    1077             :                 } else {
    1078    34684535 :                         if (bi.minpos != BUN_NONE)
    1079    33055909 :                                 minvalp = BUNtloc(bi, bi.minpos);
    1080    34684535 :                         if (bi.maxpos != BUN_NONE)
    1081    33106694 :                                 maxvalp = BUNtloc(bi, bi.maxpos);
    1082    69395008 :                         for (BUN i = 0; i < count; i++) {
    1083    34722488 :                                 t = (void *) ((char *) values + (i << b->tshift));
    1084    34722488 :                                 gdk_return rc = tfastins_nocheckFIX(b, p, t);
    1085    34723062 :                                 if (rc != GDK_SUCCEED) {
    1086           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1087           0 :                                         return rc;
    1088             :                                 }
    1089    34723062 :                                 if (b->thash) {
    1090      489439 :                                         HASHappend_locked(b, p, t);
    1091             :                                 }
    1092    34723062 :                                 if (atomcmp(t, atomnil) != 0) {
    1093    33651214 :                                         if (p == 0) {
    1094      386046 :                                                 bi.minpos = bi.maxpos = 0;
    1095      386046 :                                                 minvalp = maxvalp = t;
    1096             :                                         } else {
    1097    66351401 :                                                 if (bi.minpos != BUN_NONE &&
    1098    33084271 :                                                     atomcmp(minvalp, t) > 0) {
    1099       41701 :                                                         bi.minpos = p;
    1100       41701 :                                                         minvalp = t;
    1101             :                                                 }
    1102    66401736 :                                                 if (bi.maxpos != BUN_NONE &&
    1103    33136954 :                                                     atomcmp(maxvalp, t) < 0) {
    1104     8124236 :                                                         bi.maxpos = p;
    1105     8124236 :                                                         maxvalp = t;
    1106             :                                                 }
    1107             :                                         }
    1108             :                                 } else {
    1109             :                                         setnil = true;
    1110             :                                 }
    1111    34710473 :                                 p++;
    1112             :                         }
    1113    34672520 :                         nunique = b->thash ? b->thash->nunique : 0;
    1114             :                 }
    1115             :         } else {
    1116             :                 /* inserting nils, unless it's msk */
    1117       14158 :                 for (BUN i = 0; i < count; i++) {
    1118        7615 :                         gdk_return rc = tfastins_nocheck(b, p, t);
    1119        7615 :                         if (rc != GDK_SUCCEED) {
    1120           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1121           0 :                                 return rc;
    1122             :                         }
    1123        7615 :                         if (b->thash) {
    1124           0 :                                 HASHappend_locked(b, p, t);
    1125             :                         }
    1126        7615 :                         p++;
    1127             :                 }
    1128        6543 :                 nunique = b->thash ? b->thash->nunique : 0;
    1129        6543 :                 setnil |= b->ttype != TYPE_msk;
    1130             :         }
    1131    56930102 :         MT_lock_set(&b->theaplock);
    1132    56915039 :         if (setnil) {
    1133     2779768 :                 b->tnil = true;
    1134     2779768 :                 b->tnonil = false;
    1135             :         }
    1136    56915039 :         b->tminpos = bi.minpos;
    1137    56915039 :         b->tmaxpos = bi.maxpos;
    1138    56915039 :         if (count > BATcount(b) / gdk_unique_estimate_keep_fraction)
    1139    18553258 :                 b->tunique_est = 0;
    1140             : 
    1141    56915039 :         if (b->ttype == TYPE_oid) {
    1142             :                 /* spend extra effort on oid (possible candidate list) */
    1143      760516 :                 if (values == NULL || is_oid_nil(((oid *) values)[0])) {
    1144         160 :                         b->tsorted = false;
    1145         160 :                         b->trevsorted = false;
    1146         160 :                         b->tkey = false;
    1147         160 :                         b->tseqbase = oid_nil;
    1148             :                 } else {
    1149      760356 :                         if (b->batCount == 0) {
    1150        8348 :                                 b->tsorted = true;
    1151        8348 :                                 b->trevsorted = true;
    1152        8348 :                                 b->tkey = true;
    1153        8348 :                                 b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
    1154             :                         } else {
    1155      752008 :                                 if (!is_oid_nil(b->tseqbase) &&
    1156      421604 :                                     (count > 1 ||
    1157      421604 :                                      b->tseqbase + b->batCount != ((oid *) values)[0]))
    1158        3925 :                                         b->tseqbase = oid_nil;
    1159      752008 :                                 if (b->tsorted && !is_oid_nil(((oid *) b->theap->base)[b->batCount - 1]) && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
    1160          63 :                                         b->tsorted = false;
    1161          63 :                                         if (b->tnosorted == 0)
    1162          63 :                                                 b->tnosorted = b->batCount;
    1163             :                                 }
    1164      752008 :                                 if (b->trevsorted && !is_oid_nil(((oid *) values)[0]) && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
    1165        7519 :                                         b->trevsorted = false;
    1166        7519 :                                         if (b->tnorevsorted == 0)
    1167        7519 :                                                 b->tnorevsorted = b->batCount;
    1168             :                                 }
    1169      752008 :                                 if (b->tkey) {
    1170      745419 :                                         if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
    1171          17 :                                                 b->tkey = false;
    1172          17 :                                                 if (b->tnokey[1] == 0) {
    1173          17 :                                                         b->tnokey[0] = b->batCount - 1;
    1174          17 :                                                         b->tnokey[1] = b->batCount;
    1175             :                                                 }
    1176      745402 :                                         } else if (!b->tsorted && !b->trevsorted)
    1177          54 :                                                 b->tkey = false;
    1178             :                                 }
    1179             :                         }
    1180      760356 :                         for (BUN i = 1; i < count; i++) {
    1181           0 :                                 if (is_oid_nil(((oid *) values)[i])) {
    1182           0 :                                         b->tsorted = false;
    1183           0 :                                         b->trevsorted = false;
    1184           0 :                                         b->tkey = false;
    1185           0 :                                         b->tseqbase = oid_nil;
    1186           0 :                                         break;
    1187             :                                 }
    1188           0 :                                 if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
    1189           0 :                                         b->tkey = false;
    1190           0 :                                         if (b->tnokey[1] == 0) {
    1191           0 :                                                 b->tnokey[0] = b->batCount + i - 1;
    1192           0 :                                                 b->tnokey[1] = b->batCount + i;
    1193             :                                         }
    1194           0 :                                 } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
    1195           0 :                                         b->tsorted = false;
    1196           0 :                                         if (b->tnosorted == 0)
    1197           0 :                                                 b->tnosorted = b->batCount + i;
    1198           0 :                                         if (!b->trevsorted)
    1199           0 :                                                 b->tkey = false;
    1200             :                                 } else {
    1201           0 :                                         if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
    1202           0 :                                                 b->tseqbase = oid_nil;
    1203           0 :                                         b->trevsorted = false;
    1204           0 :                                         if (b->tnorevsorted == 0)
    1205           0 :                                                 b->tnorevsorted = b->batCount + i;
    1206           0 :                                         if (!b->tsorted)
    1207           0 :                                                 b->tkey = false;
    1208             :                                 }
    1209             :                         }
    1210             :                 }
    1211    56154523 :         } else if (!ATOMlinear(b->ttype)) {
    1212        8050 :                 b->tsorted = b->trevsorted = b->tkey = false;
    1213    56146473 :         } else if (b->batCount == 0) {
    1214      604388 :                 if (values == NULL) {
    1215           0 :                         b->tsorted = b->trevsorted = true;
    1216           0 :                         b->tkey = count == 1;
    1217           0 :                         b->tunique_est = 1;
    1218             :                 } else {
    1219      604388 :                         int c;
    1220      604388 :                         switch (count) {
    1221      604021 :                         case 1:
    1222      604021 :                                 b->tsorted = b->trevsorted = b->tkey = true;
    1223      604021 :                                 b->tunique_est = 1;
    1224      604021 :                                 break;
    1225         141 :                         case 2:
    1226         141 :                                 if (b->tvheap)
    1227          41 :                                         c = ATOMcmp(b->ttype,
    1228             :                                                     ((void **) values)[0],
    1229             :                                                     ((void **) values)[1]);
    1230             :                                 else
    1231         100 :                                         c = ATOMcmp(b->ttype,
    1232             :                                                     values,
    1233             :                                                     (char *) values + b->twidth);
    1234         141 :                                 b->tsorted = c <= 0;
    1235         141 :                                 b->tnosorted = !b->tsorted;
    1236         141 :                                 b->trevsorted = c >= 0;
    1237         141 :                                 b->tnorevsorted = !b->trevsorted;
    1238         141 :                                 b->tkey = c != 0;
    1239         141 :                                 b->tnokey[0] = 0;
    1240         141 :                                 b->tnokey[1] = !b->tkey;
    1241         141 :                                 b->tunique_est = (double) (1 + b->tkey);
    1242         141 :                                 break;
    1243         226 :                         default:
    1244         226 :                                 b->tsorted = b->trevsorted = b->tkey = false;
    1245         226 :                                 break;
    1246             :                         }
    1247             :                 }
    1248    55542085 :         } else if (b->batCount == 1 && count == 1) {
    1249      435476 :                 bi = bat_iterator_nolock(b);
    1250      435476 :                 t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
    1251      435476 :                 if (values != NULL) {
    1252      435739 :                         if (b->tvheap)
    1253      164104 :                                 t = ((void **) values)[0];
    1254             :                         else
    1255             :                                 t = values;
    1256             :                 }
    1257      435476 :                 int c = ATOMcmp(b->ttype, BUNtail(bi, 0), t);
    1258      432092 :                 b->tsorted = c <= 0;
    1259      432092 :                 b->tnosorted = !b->tsorted;
    1260      432092 :                 b->trevsorted = c >= 0;
    1261      432092 :                 b->tnorevsorted = !b->trevsorted;
    1262      432092 :                 b->tkey = c != 0;
    1263      432092 :                 b->tnokey[0] = 0;
    1264      432092 :                 b->tnokey[1] = !b->tkey;
    1265      432092 :                 b->tunique_est = (double) (1 + b->tkey);
    1266             :         } else {
    1267    55106609 :                 b->tsorted = b->trevsorted = b->tkey = false;
    1268             :         }
    1269    56911655 :         BATsetcount(b, p);
    1270    56916871 :         if (nunique != 0)
    1271      495798 :                 b->tunique_est = (double) nunique;
    1272    56916871 :         MT_lock_unset(&b->theaplock);
    1273    56962650 :         MT_rwlock_wrunlock(&b->thashlock);
    1274             : 
    1275    56954611 :         OIDXdestroy(b);
    1276    56955165 :         STRMPdestroy(b);        /* TODO: use STRMPappendBitstring */
    1277    56949169 :         RTREEdestroy(b);
    1278    56949169 :         return GDK_SUCCEED;
    1279             : }
    1280             : 
    1281             : /* Append a single value to the bat. */
    1282             : gdk_return
    1283    39939208 : BUNappend(BAT *b, const void *t, bool force)
    1284             : {
    1285    39939208 :         return BUNappendmulti(b, b->ttype && b->tvheap ? (const void *) &t : (const void *) t, 1, force);
    1286             : }
    1287             : 
    1288             : gdk_return
    1289           4 : BUNdelete(BAT *b, oid o)
    1290             : {
    1291           4 :         BATiter bi = bat_iterator(b);
    1292             : 
    1293           4 :         if (bi.count == 0) {
    1294           0 :                 bat_iterator_end(&bi);
    1295           0 :                 GDKerror("cannot delete from empty bat\n");
    1296           0 :                 return GDK_FAIL;
    1297             :         }
    1298           4 :         if (is_oid_nil(b->hseqbase)) {
    1299           0 :                 bat_iterator_end(&bi);
    1300           0 :                 GDKerror("cannot delete from bat with VOID hseqbase\n");
    1301           0 :                 return GDK_FAIL;
    1302             :         }
    1303             : 
    1304           4 :         BUN p = o - b->hseqbase;
    1305             : 
    1306           4 :         if (bi.count - 1 != p) {
    1307           2 :                 bat_iterator_end(&bi);
    1308           2 :                 GDKerror("cannot delete anything other than last value\n");
    1309           2 :                 return GDK_FAIL;
    1310             :         }
    1311           2 :         if (b->batInserted >= bi.count) {
    1312           0 :                 bat_iterator_end(&bi);
    1313           0 :                 GDKerror("cannot delete committed value\n");
    1314           0 :                 return GDK_FAIL;
    1315             :         }
    1316             : 
    1317           2 :         TRC_DEBUG(ALGO, ALGOBATFMT " deleting oid " OIDFMT "\n", ALGOBATPAR(b), o);
    1318             :         /* load hash so that we can maintain it */
    1319           2 :         (void) BATcheckhash(b);
    1320             : 
    1321           2 :         BUN nunique = HASHdelete(&bi, p, BUNtail(bi, p));
    1322           2 :         ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
    1323           2 :         bat_iterator_end(&bi);
    1324             : 
    1325           2 :         MT_lock_set(&b->theaplock);
    1326           2 :         if (b->tmaxpos == p)
    1327           0 :                 b->tmaxpos = BUN_NONE;
    1328           2 :         if (b->tminpos == p)
    1329           0 :                 b->tminpos = BUN_NONE;
    1330           2 :         if (b->tnosorted >= p)
    1331           0 :                 b->tnosorted = 0;
    1332           2 :         if (b->tnorevsorted >= p)
    1333           0 :                 b->tnorevsorted = 0;
    1334           2 :         b->batCount--;
    1335           2 :         if (nunique != 0)
    1336           0 :                 b->tunique_est = (double) nunique;
    1337           2 :         else if (BATcount(b) < gdk_unique_estimate_keep_fraction)
    1338           2 :                 b->tunique_est = 0;
    1339           2 :         if (b->batCount <= 1) {
    1340             :                 /* some trivial properties */
    1341           0 :                 b->tkey = true;
    1342           0 :                 b->tsorted = b->trevsorted = true;
    1343           0 :                 b->tnosorted = b->tnorevsorted = 0;
    1344           0 :                 if (b->batCount == 0) {
    1345           0 :                         b->tnil = false;
    1346           0 :                         b->tnonil = true;
    1347             :                 }
    1348             :         }
    1349           2 :         MT_lock_unset(&b->theaplock);
    1350           2 :         OIDXdestroy(b);
    1351           2 :         STRMPdestroy(b);
    1352           2 :         RTREEdestroy(b);
    1353           2 :         PROPdestroy(b);
    1354           2 :         return GDK_SUCCEED;
    1355             : }
    1356             : 
    1357             : /* @-  BUN replace
    1358             :  * The last operation in this context is BUN replace. It assumes that
    1359             :  * the header denotes a key. The old value association is destroyed
    1360             :  * (if it exists in the first place) and the new value takes its
    1361             :  * place.
    1362             :  *
    1363             :  * In order to make updates on void columns workable; replaces on them
    1364             :  * are always done in-place. Performing them without bun-movements
    1365             :  * greatly simplifies the problem. The 'downside' is that when
    1366             :  * transaction management has to be performed, replaced values should
    1367             :  * be saved explicitly.
    1368             :  */
    1369             : static gdk_return
    1370     1189499 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
    1371             : {
    1372     1189499 :         BUN prv, nxt;
    1373     1189499 :         const void *val;
    1374     1189499 :         int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
    1375     1189499 :         const void *atomnil = ATOMnilptr(b->ttype);
    1376             : 
    1377     1189499 :         MT_lock_set(&b->theaplock);
    1378     1202748 :         BUN last = BATcount(b) - 1;
    1379     1202748 :         BATiter bi = bat_iterator_nolock(b);
    1380             :         /* zap alignment info */
    1381     1202748 :         if (!force && (b->batRestricted != BAT_WRITE ||
    1382      581736 :                        ((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1))) {
    1383           0 :                 MT_lock_unset(&b->theaplock);
    1384           0 :                 GDKerror("access denied to %s, aborting.\n",
    1385             :                          BATgetId(b));
    1386           0 :                 assert(0);
    1387             :                 return GDK_FAIL;
    1388             :         }
    1389     1202748 :         TRC_DEBUG(ALGO, ALGOBATFMT " replacing " BUNFMT " values\n", ALGOBATPAR(b), count);
    1390     1193925 :         if (b->ttype == TYPE_void) {
    1391           0 :                 PROPdestroy(b);
    1392           0 :                 b->tminpos = BUN_NONE;
    1393           0 :                 b->tmaxpos = BUN_NONE;
    1394           0 :                 b->tunique_est = 0.0;
    1395     1193925 :         } else if (count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
    1396      547301 :                 b->tunique_est = 0;
    1397             :         }
    1398     1193925 :         const ValRecord *prop;
    1399     1193925 :         ValRecord minprop, maxprop;
    1400     1193925 :         const void *minbound = NULL, *maxbound = NULL;
    1401     1193925 :         if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
    1402           0 :             VALcopy(&minprop, prop) != NULL)
    1403           0 :                 minbound = VALptr(&minprop);
    1404     1192793 :         if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
    1405           0 :             VALcopy(&maxprop, prop) != NULL)
    1406           0 :                 maxbound = VALptr(&maxprop);
    1407     1197153 :         const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
    1408     1197574 :         MT_lock_unset(&b->theaplock);
    1409             :         /* load hash so that we can maintain it */
    1410     1205691 :         (void) BATcheckhash(b);
    1411     1201597 :         MT_rwlock_wrlock(&b->thashlock);
    1412     2405736 :         for (BUN i = 0; i < count; i++) {
    1413     1202292 :                 BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
    1414     1205463 :                 const void *t = b->ttype && b->tvheap ?
    1415     1359260 :                         ((const void **) values)[i] :
    1416     1045324 :                         (const void *) ((const char *) values + (i << b->tshift));
    1417     1202292 :                 bool isnil = atomnil && atomcmp(t, atomnil) == 0;
    1418     1203371 :                 if (notnull && isnil) {
    1419           0 :                         assert(0);
    1420             :                         GDKerror("NULL value not within bounds\n");
    1421             :                         MT_rwlock_wrunlock(&b->thashlock);
    1422             :                         goto bailout;
    1423     1203371 :                 } else if (!isnil &&
    1424         782 :                            ((minbound &&
    1425     1153823 :                              atomcmp(t, minbound) < 0) ||
    1426           0 :                             (maxbound &&
    1427           0 :                              atomcmp(t, maxbound) >= 0))) {
    1428           0 :                         assert(0);
    1429             :                         GDKerror("value not within bounds\n");
    1430             :                         MT_rwlock_wrunlock(&b->thashlock);
    1431             :                         goto bailout;
    1432             :                 }
    1433             : 
    1434             :                 /* retrieve old value, but if this comes from the
    1435             :                  * logger, we need to deal with offsets that point
    1436             :                  * outside of the valid vheap */
    1437     1204153 :                 if (b->ttype == TYPE_void) {
    1438           0 :                         val = BUNtpos(bi, p);
    1439     1204153 :                 } else if (bi.type == TYPE_msk) {
    1440         198 :                         val = BUNtmsk(bi, p);
    1441     1203955 :                 } else if (b->tvheap) {
    1442      156821 :                         size_t off = BUNtvaroff(bi, p);
    1443      156821 :                         if (off < bi.vhfree)
    1444      157126 :                                 val = bi.vh->base + off;
    1445             :                         else
    1446             :                                 val = NULL; /* bad offset */
    1447             :                 } else {
    1448     1047134 :                         val = BUNtloc(bi, p);
    1449             :                 }
    1450             : 
    1451     1204458 :                 if (val) {
    1452     1204458 :                         if (atomcmp(val, t) == 0)
    1453      332692 :                                 continue; /* nothing to do */
    1454      871887 :                         if (!isnil &&
    1455      164224 :                             b->tnil &&
    1456      164224 :                             atomcmp(val, atomnil) == 0) {
    1457             :                                 /* if old value is nil and new value
    1458             :                                  * isn't, we're not sure anymore about
    1459             :                                  * the nil property, so we must clear
    1460             :                                  * it */
    1461      164045 :                                 MT_lock_set(&b->theaplock);
    1462      164042 :                                 b->tnil = false;
    1463      164042 :                                 MT_lock_unset(&b->theaplock);
    1464             :                         }
    1465      871888 :                         if (b->ttype != TYPE_void) {
    1466      871307 :                                 if (bi.maxpos != BUN_NONE) {
    1467      549357 :                                         if (!isnil && atomcmp(BUNtail(bi, bi.maxpos), t) < 0) {
    1468             :                                                 /* new value is larger
    1469             :                                                  * than previous
    1470             :                                                  * largest */
    1471       58252 :                                                 bi.maxpos = p;
    1472      491104 :                                         } else if (bi.maxpos == p && atomcmp(BUNtail(bi, bi.maxpos), t) != 0) {
    1473             :                                                 /* old value is equal to
    1474             :                                                  * largest and new value
    1475             :                                                  * is smaller or nil (see
    1476             :                                                  * above), so we don't
    1477             :                                                  * know anymore which is
    1478             :                                                  * the largest */
    1479         514 :                                                 bi.maxpos = BUN_NONE;
    1480             :                                         }
    1481             :                                 }
    1482      871306 :                                 if (bi.minpos != BUN_NONE) {
    1483      375825 :                                         if (!isnil && atomcmp(BUNtail(bi, bi.minpos), t) > 0) {
    1484             :                                                 /* new value is smaller
    1485             :                                                  * than previous
    1486             :                                                  * smallest */
    1487         437 :                                                 bi.minpos = p;
    1488      375389 :                                         } else if (bi.minpos == p && atomcmp(BUNtail(bi, bi.minpos), t) != 0) {
    1489             :                                                 /* old value is equal to
    1490             :                                                  * smallest and new value
    1491             :                                                  * is larger or nil (see
    1492             :                                                  * above), so we don't
    1493             :                                                  * know anymore which is
    1494             :                                                  * the largest */
    1495         637 :                                                 bi.minpos = BUN_NONE;
    1496             :                                         }
    1497             :                                 }
    1498             :                         }
    1499      871888 :                         HASHdelete_locked(&bi, p, val);     /* first delete old value from hash */
    1500             :                 } else {
    1501             :                         /* out of range old value, so the properties and
    1502             :                          * hash cannot be trusted */
    1503           0 :                         PROPdestroy(b);
    1504           0 :                         Hash *hs = b->thash;
    1505           0 :                         if (hs) {
    1506           0 :                                 b->thash = NULL;
    1507           0 :                                 doHASHdestroy(b, hs);
    1508             :                         }
    1509           0 :                         MT_lock_set(&b->theaplock);
    1510           0 :                         bi.minpos = BUN_NONE;
    1511           0 :                         bi.maxpos = BUN_NONE;
    1512           0 :                         b->tunique_est = 0.0;
    1513           0 :                         MT_lock_unset(&b->theaplock);
    1514             :                 }
    1515      869499 :                 OIDXdestroy(b);
    1516      872308 :                 STRMPdestroy(b);
    1517      872825 :                 RTREEdestroy(b);
    1518             : 
    1519      872843 :                 if (b->tvheap && b->ttype) {
    1520       76041 :                         var_t _d;
    1521       76041 :                         ptr _ptr;
    1522       76041 :                         _ptr = BUNtloc(bi, p);
    1523       76041 :                         switch (b->twidth) {
    1524        8907 :                         case 1:
    1525        8907 :                                 _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
    1526        8907 :                                 break;
    1527       60801 :                         case 2:
    1528       60801 :                                 _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
    1529       60801 :                                 break;
    1530        6333 :                         case 4:
    1531        6333 :                                 _d = (var_t) * (uint32_t *) _ptr;
    1532        6333 :                                 break;
    1533             : #if SIZEOF_VAR_T == 8
    1534           0 :                         case 8:
    1535           0 :                                 _d = (var_t) * (uint64_t *) _ptr;
    1536           0 :                                 break;
    1537             : #endif
    1538             :                         default:
    1539           0 :                                 MT_UNREACHABLE();
    1540             :                         }
    1541       76041 :                         MT_lock_set(&b->theaplock);
    1542       76137 :                         if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
    1543           0 :                                 MT_lock_unset(&b->theaplock);
    1544           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1545           0 :                                 goto bailout;
    1546             :                         }
    1547       76151 :                         MT_lock_unset(&b->theaplock);
    1548       76236 :                         if (b->twidth < SIZEOF_VAR_T &&
    1549       76306 :                             (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
    1550             :                                 /* doesn't fit in current heap, upgrade it */
    1551          82 :                                 if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
    1552           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1553           0 :                                         goto bailout;
    1554             :                                 }
    1555             :                         }
    1556             :                         /* reinitialize iterator after possible heap upgrade */
    1557             :                         {
    1558             :                                 /* save and restore minpos/maxpos */
    1559       76236 :                                 BUN minpos = bi.minpos;
    1560       76236 :                                 BUN maxpos = bi.maxpos;
    1561       76236 :                                 bi = bat_iterator_nolock(b);
    1562       76236 :                                 bi.minpos = minpos;
    1563       76236 :                                 bi.maxpos = maxpos;
    1564             :                         }
    1565       76236 :                         _ptr = BUNtloc(bi, p);
    1566       76236 :                         switch (b->twidth) {
    1567        8834 :                         case 1:
    1568        8834 :                                 * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
    1569        8834 :                                 break;
    1570       61060 :                         case 2:
    1571       61060 :                                 * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
    1572       61060 :                                 break;
    1573        6342 :                         case 4:
    1574        6342 :                                 * (uint32_t *) _ptr = (uint32_t) _d;
    1575        6342 :                                 break;
    1576             : #if SIZEOF_VAR_T == 8
    1577           0 :                         case 8:
    1578           0 :                                 * (uint64_t *) _ptr = (uint64_t) _d;
    1579           0 :                                 break;
    1580             : #endif
    1581             :                         default:
    1582       76236 :                                 MT_UNREACHABLE();
    1583             :                         }
    1584      796802 :                 } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1585         182 :                         mskSetVal(b, p, * (msk *) t);
    1586             :                 } else {
    1587      796620 :                         assert(BATatoms[b->ttype].atomPut == NULL);
    1588      796620 :                         switch (ATOMsize(b->ttype)) {
    1589             :                         case 0:      /* void */
    1590             :                                 break;
    1591       15224 :                         case 1:
    1592       15224 :                                 ((bte *) b->theap->base)[p] = * (bte *) t;
    1593       15224 :                                 break;
    1594        7615 :                         case 2:
    1595        7615 :                                 ((sht *) b->theap->base)[p] = * (sht *) t;
    1596        7615 :                                 break;
    1597      191385 :                         case 4:
    1598      191385 :                                 ((int *) b->theap->base)[p] = * (int *) t;
    1599      191385 :                                 break;
    1600      582396 :                         case 8:
    1601      582396 :                                 ((lng *) b->theap->base)[p] = * (lng *) t;
    1602      582396 :                                 break;
    1603           0 :                         case 16:
    1604             : #ifdef HAVE_HGE
    1605           0 :                                 ((hge *) b->theap->base)[p] = * (hge *) t;
    1606             : #else
    1607             :                                 ((uuid *) b->theap->base)[p] = * (uuid *) t;
    1608             : #endif
    1609           0 :                                 break;
    1610           0 :                         default:
    1611           0 :                                 memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
    1612           0 :                                 break;
    1613             :                         }
    1614             :                 }
    1615             : 
    1616      873038 :                 HASHinsert_locked(&bi, p, t);       /* insert new value into hash */
    1617             : 
    1618      870174 :                 prv = p > 0 ? p - 1 : BUN_NONE;
    1619      870174 :                 nxt = p < last ? p + 1 : BUN_NONE;
    1620             : 
    1621      870174 :                 MT_lock_set(&b->theaplock);
    1622      872011 :                 if (b->tsorted) {
    1623        3149 :                         if (prv != BUN_NONE &&
    1624        1260 :                             atomcmp(t, BUNtail(bi, prv)) < 0) {
    1625          56 :                                 b->tsorted = false;
    1626          56 :                                 b->tnosorted = p;
    1627        2622 :                         } else if (nxt != BUN_NONE &&
    1628         789 :                                    atomcmp(t, BUNtail(bi, nxt)) > 0) {
    1629         708 :                                 b->tsorted = false;
    1630         708 :                                 b->tnosorted = nxt;
    1631        1125 :                         } else if (b->ttype != TYPE_void && BATtdense(b)) {
    1632           0 :                                 if (prv != BUN_NONE &&
    1633           0 :                                     1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
    1634           0 :                                         b->tseqbase = oid_nil;
    1635           0 :                                 } else if (nxt != BUN_NONE &&
    1636           0 :                                            * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
    1637           0 :                                         b->tseqbase = oid_nil;
    1638           0 :                                 } else if (prv == BUN_NONE &&
    1639           0 :                                            nxt == BUN_NONE) {
    1640           0 :                                         b->tseqbase = * (oid *) t;
    1641             :                                 }
    1642             :                         }
    1643      870122 :                 } else if (b->tnosorted >= p)
    1644        3446 :                         b->tnosorted = 0;
    1645      872011 :                 if (b->trevsorted) {
    1646        1552 :                         if (prv != BUN_NONE &&
    1647         492 :                             atomcmp(t, BUNtail(bi, prv)) > 0) {
    1648          63 :                                 b->trevsorted = false;
    1649          63 :                                 b->tnorevsorted = p;
    1650        1182 :                         } else if (nxt != BUN_NONE &&
    1651         185 :                                    atomcmp(t, BUNtail(bi, nxt)) < 0) {
    1652          49 :                                 b->trevsorted = false;
    1653          49 :                                 b->tnorevsorted = nxt;
    1654             :                         }
    1655      870951 :                 } else if (b->tnorevsorted >= p)
    1656        1736 :                         b->tnorevsorted = 0;
    1657      872011 :                 if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
    1658         988 :                         BATkey(b, false);
    1659      871023 :                 } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
    1660         909 :                         b->tnokey[0] = b->tnokey[1] = 0;
    1661      872011 :                 if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
    1662      638983 :                         b->tnonil = t && atomcmp(t, atomnil) != 0;
    1663     1204669 :                 MT_lock_unset(&b->theaplock);
    1664             :         }
    1665     1203816 :         BUN nunique = b->thash ? b->thash->nunique : 0;
    1666     1203816 :         MT_rwlock_wrunlock(&b->thashlock);
    1667     1205339 :         MT_lock_set(&b->theaplock);
    1668     1200443 :         if (nunique != 0)
    1669       27201 :                 b->tunique_est = (double) nunique;
    1670     1200443 :         b->tminpos = bi.minpos;
    1671     1200443 :         b->tmaxpos = bi.maxpos;
    1672     1200443 :         b->theap->dirty = true;
    1673     1200443 :         if (b->tvheap)
    1674      155900 :                 b->tvheap->dirty = true;
    1675     1200443 :         MT_lock_unset(&b->theaplock);
    1676             : 
    1677     1201854 :         return GDK_SUCCEED;
    1678             : 
    1679           0 :   bailout:
    1680           0 :         if (minbound)
    1681           0 :                 VALclear(&minprop);
    1682           0 :         if (maxbound)
    1683           0 :                 VALclear(&maxprop);
    1684             :         return GDK_FAIL;
    1685             : }
    1686             : 
    1687             : /* Replace multiple values given by their positions with the given values. */
    1688             : gdk_return
    1689      585417 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
    1690             : {
    1691      585417 :         BATcheck(b, GDK_FAIL);
    1692             : 
    1693      585417 :         if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
    1694             :                 return GDK_FAIL;
    1695             : 
    1696      585417 :         return BUNinplacemulti(b, positions, values, count, force, false);
    1697             : }
    1698             : 
    1699             : /* Replace multiple values starting from a given position with the given
    1700             :  * values. */
    1701             : gdk_return
    1702      606109 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
    1703             : {
    1704      606109 :         BATcheck(b, GDK_FAIL);
    1705             : 
    1706      606109 :         if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
    1707             :                 return GDK_FAIL;
    1708             : 
    1709      606109 :         return BUNinplacemulti(b, &position, values, count, force, true);
    1710             : }
    1711             : 
    1712             : gdk_return
    1713      585417 : BUNreplace(BAT *b, oid id, const void *t, bool force)
    1714             : {
    1715      585417 :         return BUNreplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &t : t, 1, force);
    1716             : }
    1717             : 
    1718             : /* very much like BUNreplace, but this doesn't make any changes if the
    1719             :  * tail column is void */
    1720             : gdk_return
    1721        4515 : void_inplace(BAT *b, oid id, const void *val, bool force)
    1722             : {
    1723        4515 :         assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
    1724        4515 :         if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
    1725             :                 GDKerror("id out of range\n");
    1726             :                 return GDK_FAIL;
    1727             :         }
    1728        4515 :         if (b->ttype == TYPE_void)
    1729             :                 return GDK_SUCCEED;
    1730        4515 :         return BUNinplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &val : (const void *) val, 1, force, false);
    1731             : }
    1732             : 
    1733             : /*
    1734             :  * @- BUN Lookup
    1735             :  * Location of a BUN using a value should use the available indexes to
    1736             :  * speed up access. If indexes are lacking then a hash index is
    1737             :  * constructed under the assumption that 1) multiple access to the BAT
    1738             :  * can be expected and 2) building the hash is only slightly more
    1739             :  * expensive than the full linear scan.  BUN_NONE is returned if no
    1740             :  * such element could be found.  In those cases where the type is
    1741             :  * known and a hash index is available, one should use the inline
    1742             :  * functions to speed-up processing.
    1743             :  */
    1744             : static BUN
    1745           0 : slowfnd(BAT *b, const void *v)
    1746             : {
    1747           0 :         BATiter bi = bat_iterator(b);
    1748           0 :         BUN p, q;
    1749           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(bi.type);
    1750             : 
    1751           0 :         BATloop(b, p, q) {
    1752           0 :                 if ((*cmp)(v, BUNtail(bi, p)) == 0) {
    1753           0 :                         bat_iterator_end(&bi);
    1754           0 :                         return p;
    1755             :                 }
    1756             :         }
    1757           0 :         bat_iterator_end(&bi);
    1758           0 :         return BUN_NONE;
    1759             : }
    1760             : 
    1761             : static BUN
    1762           0 : mskfnd(BAT *b, msk v)
    1763             : {
    1764           0 :         BUN p, q;
    1765             : 
    1766           0 :         if (v) {
    1767             :                 /* find a 1 value */
    1768           0 :                 for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
    1769           0 :                         if (((uint32_t *) b->theap->base)[p] != 0) {
    1770             :                                 /* there's at least one 1 bit */
    1771           0 :                                 return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
    1772             :                         }
    1773             :                 }
    1774             :         } else {
    1775             :                 /* find a 0 value */
    1776           0 :                 for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
    1777           0 :                         if (((uint32_t *) b->theap->base)[p] != ~0U) {
    1778             :                                 /* there's at least one 0 bit */
    1779           0 :                                 return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
    1780             :                         }
    1781             :                 }
    1782             :         }
    1783             :         return BUN_NONE;
    1784             : }
    1785             : 
    1786             : BUN
    1787     1812953 : BUNfnd(BAT *b, const void *v)
    1788             : {
    1789     1812953 :         BUN r = BUN_NONE;
    1790     1812953 :         BATiter bi;
    1791             : 
    1792     1812953 :         BATcheck(b, BUN_NONE);
    1793     1812953 :         if (!v || BATcount(b) == 0)
    1794             :                 return r;
    1795     1386105 :         if (complex_cand(b)) {
    1796           0 :                 struct canditer ci;
    1797           0 :                 canditer_init(&ci, NULL, b);
    1798           0 :                 return canditer_search(&ci, * (const oid *) v, false);
    1799             :         }
    1800     1386105 :         if (BATtvoid(b))
    1801      758888 :                 return BUNfndVOID(b, v);
    1802      627217 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1803           0 :                 return mskfnd(b, *(msk*)v);
    1804             :         }
    1805      627217 :         if (!BATcheckhash(b)) {
    1806      140347 :                 if (BATordered(b) || BATordered_rev(b))
    1807      139979 :                         return SORTfnd(b, v);
    1808             :         }
    1809      487223 :         if (BAThash(b) == GDK_SUCCEED) {
    1810      487228 :                 bi = bat_iterator(b); /* outside of hashlock */
    1811      487240 :                 MT_rwlock_rdlock(&b->thashlock);
    1812      487241 :                 if (b->thash == NULL) {
    1813           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1814           0 :                         bat_iterator_end(&bi);
    1815           0 :                         goto hashfnd_failed;
    1816             :                 }
    1817      974317 :                 switch (ATOMbasetype(bi.type)) {
    1818           5 :                 case TYPE_bte:
    1819           5 :                         HASHloop_bte(bi, b->thash, r, v)
    1820             :                                 break;
    1821             :                         break;
    1822           0 :                 case TYPE_sht:
    1823           0 :                         HASHloop_sht(bi, b->thash, r, v)
    1824             :                                 break;
    1825             :                         break;
    1826          32 :                 case TYPE_int:
    1827          32 :                         HASHloop_int(bi, b->thash, r, v)
    1828             :                                 break;
    1829             :                         break;
    1830           0 :                 case TYPE_flt:
    1831           0 :                         HASHloop_flt(bi, b->thash, r, v)
    1832             :                                 break;
    1833             :                         break;
    1834           0 :                 case TYPE_dbl:
    1835           0 :                         HASHloop_dbl(bi, b->thash, r, v)
    1836             :                                 break;
    1837             :                         break;
    1838         148 :                 case TYPE_lng:
    1839         148 :                         HASHloop_lng(bi, b->thash, r, v)
    1840             :                                 break;
    1841             :                         break;
    1842             : #ifdef HAVE_HGE
    1843           0 :                 case TYPE_hge:
    1844           0 :                         HASHloop_hge(bi, b->thash, r, v)
    1845             :                                 break;
    1846             :                         break;
    1847             : #endif
    1848           0 :                 case TYPE_uuid:
    1849           0 :                         HASHloop_uuid(bi, b->thash, r, v)
    1850             :                                 break;
    1851             :                         break;
    1852      487055 :                 case TYPE_str:
    1853      674220 :                         HASHloop_str(bi, b->thash, r, v)
    1854             :                                 break;
    1855             :                         break;
    1856           1 :                 default:
    1857           3 :                         HASHloop(bi, b->thash, r, v)
    1858             :                                 break;
    1859             :                         break;
    1860             :                 }
    1861      487241 :                 MT_rwlock_rdunlock(&b->thashlock);
    1862      487241 :                 bat_iterator_end(&bi);
    1863      487241 :                 return r;
    1864             :         }
    1865           0 :   hashfnd_failed:
    1866             :         /* can't build hash table, search the slow way */
    1867           0 :         GDKclrerr();
    1868           0 :         return slowfnd(b, v);
    1869             : }
    1870             : 
    1871             : /*
    1872             :  * @+ BAT Property Management
    1873             :  *
    1874             :  * The function BATcount returns the number of active elements in a
    1875             :  * BAT.  Counting is type independent.  It can be implemented quickly,
    1876             :  * because the system ensures a dense BUN list.
    1877             :  */
    1878             : void
    1879     3060967 : BATsetcapacity(BAT *b, BUN cnt)
    1880             : {
    1881     3060967 :         b->batCapacity = cnt;
    1882     3060967 :         assert(b->batCount <= cnt);
    1883     3060967 : }
    1884             : 
    1885             : /* Set the batCount value for the bat and also set some dependent
    1886             :  * properties.  This function should be called only when it is save from
    1887             :  * concurrent use (e.g. when theaplock is being held). */
    1888             : void
    1889    70576609 : BATsetcount(BAT *b, BUN cnt)
    1890             : {
    1891             :         /* head column is always VOID, and some head properties never change */
    1892    70576609 :         assert(!is_oid_nil(b->hseqbase));
    1893    70576609 :         assert(cnt <= BUN_MAX);
    1894             : 
    1895    70576609 :         b->batCount = cnt;
    1896    70576609 :         if (b->theap->parentid == b->batCacheid) {
    1897    67498698 :                 b->theap->dirty |= b->ttype != TYPE_void && cnt > 0;
    1898    67498698 :                 b->theap->free = tailsize(b, cnt);
    1899             :         }
    1900    70576609 :         if (b->ttype == TYPE_void)
    1901     6186409 :                 b->batCapacity = cnt;
    1902    70576609 :         if (cnt <= 1) {
    1903     5719928 :                 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
    1904     5719928 :                 b->tnosorted = b->tnorevsorted = 0;
    1905             :         }
    1906             :         /* if the BAT was made smaller, we need to zap some values */
    1907    70576609 :         if (b->tnosorted >= BATcount(b))
    1908     3869807 :                 b->tnosorted = 0;
    1909    70576609 :         if (b->tnorevsorted >= BATcount(b))
    1910     3652947 :                 b->tnorevsorted = 0;
    1911    70576609 :         if (b->tnokey[0] >= BATcount(b) || b->tnokey[1] >= BATcount(b)) {
    1912     3672083 :                 b->tnokey[0] = 0;
    1913     3672083 :                 b->tnokey[1] = 0;
    1914             :         }
    1915    70576609 :         if (b->ttype == TYPE_void) {
    1916     6187698 :                 b->tsorted = true;
    1917     6187698 :                 if (is_oid_nil(b->tseqbase)) {
    1918      136495 :                         b->tkey = cnt <= 1;
    1919      136495 :                         b->trevsorted = true;
    1920      136495 :                         b->tnil = true;
    1921      136495 :                         b->tnonil = false;
    1922             :                 } else {
    1923     6051203 :                         b->tkey = true;
    1924     6051203 :                         b->trevsorted = cnt <= 1;
    1925     6051203 :                         b->tnil = false;
    1926     6051203 :                         b->tnonil = true;
    1927             :                 }
    1928             :         }
    1929    70576609 :         assert(b->batCapacity >= cnt);
    1930    70576609 : }
    1931             : 
    1932             : /*
    1933             :  * The key and name properties can be changed at any time.  Keyed
    1934             :  * dimensions are automatically supported by an auxiliary hash-based
    1935             :  * access structure to speed up searching. Turning off the key
    1936             :  * integrity property does not cause the index to disappear. It can
    1937             :  * still be used to speed-up retrieval. The routine BATkey sets the
    1938             :  * key property of the association head.
    1939             :  */
    1940             : gdk_return
    1941       56652 : BATkey(BAT *b, bool flag)
    1942             : {
    1943       56652 :         BATcheck(b, GDK_FAIL);
    1944       56652 :         if (b->ttype == TYPE_void) {
    1945        1167 :                 if (BATtdense(b) && !flag) {
    1946           0 :                         GDKerror("dense column must be unique.\n");
    1947           0 :                         return GDK_FAIL;
    1948             :                 }
    1949        1167 :                 if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
    1950           0 :                         GDKerror("void column cannot be unique.\n");
    1951           0 :                         return GDK_FAIL;
    1952             :                 }
    1953             :         }
    1954       56652 :         b->tkey = flag;
    1955       56652 :         if (!flag) {
    1956       40477 :                 b->tseqbase = oid_nil;
    1957             :         } else
    1958       16175 :                 b->tnokey[0] = b->tnokey[1] = 0;
    1959       56652 :         gdk_return rc = GDK_SUCCEED;
    1960       56652 :         if (flag && VIEWtparent(b)) {
    1961             :                 /* if a view is key, then so is the parent if the two
    1962             :                  * are aligned */
    1963           6 :                 BAT *bp = BATdescriptor(VIEWtparent(b));
    1964           6 :                 if (bp != NULL) {
    1965           6 :                         MT_lock_set(&bp->theaplock);
    1966          12 :                         if (BATcount(b) == BATcount(bp) &&
    1967           6 :                             ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
    1968           6 :                             !BATtkey(bp) &&
    1969           0 :                             ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
    1970             :                              BATcount(b) == 0))
    1971           0 :                                 rc = BATkey(bp, true);
    1972           6 :                         MT_lock_unset(&bp->theaplock);
    1973           6 :                         BBPunfix(bp->batCacheid);
    1974             :                 }
    1975             :         }
    1976             :         return rc;
    1977             : }
    1978             : 
    1979             : void
    1980     2946061 : BAThseqbase(BAT *b, oid o)
    1981             : {
    1982     2946061 :         if (b != NULL) {
    1983     2946061 :                 assert(o <= GDK_oid_max);    /* i.e., not oid_nil */
    1984     2946061 :                 assert(o + BATcount(b) <= GDK_oid_max);
    1985     2946061 :                 b->hseqbase = o;
    1986             :         }
    1987     2946061 : }
    1988             : 
    1989             : void
    1990     7418877 : BATtseqbase(BAT *b, oid o)
    1991             : {
    1992     7418877 :         assert(o <= oid_nil);
    1993     7418877 :         if (b == NULL)
    1994             :                 return;
    1995     7418877 :         assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
    1996     7418877 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    1997     6292385 :                 b->tseqbase = o;
    1998             : 
    1999             :                 /* adapt keyness */
    2000     6292385 :                 if (BATtvoid(b)) {
    2001     6263713 :                         b->tsorted = true;
    2002     6263713 :                         if (is_oid_nil(o)) {
    2003         113 :                                 b->tkey = b->batCount <= 1;
    2004         113 :                                 b->tnonil = b->batCount == 0;
    2005         113 :                                 b->tnil = b->batCount > 0;
    2006         113 :                                 b->trevsorted = true;
    2007         113 :                                 b->tnosorted = b->tnorevsorted = 0;
    2008         113 :                                 if (!b->tkey) {
    2009           0 :                                         b->tnokey[0] = 0;
    2010           0 :                                         b->tnokey[1] = 1;
    2011             :                                 } else {
    2012         113 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    2013             :                                 }
    2014             :                         } else {
    2015     6263600 :                                 if (!b->tkey) {
    2016       19762 :                                         b->tkey = true;
    2017       19762 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    2018             :                                 }
    2019     6263600 :                                 b->tnonil = true;
    2020     6263600 :                                 b->tnil = false;
    2021     6263600 :                                 b->trevsorted = b->batCount <= 1;
    2022     6263600 :                                 if (!b->trevsorted)
    2023       20269 :                                         b->tnorevsorted = 1;
    2024             :                         }
    2025             :                 }
    2026             :         } else {
    2027     1126492 :                 assert(o == oid_nil);
    2028     1126492 :                 b->tseqbase = oid_nil;
    2029             :         }
    2030             : }
    2031             : 
    2032             : /*
    2033             :  * @- Change the BAT access permissions (read, append, write)
    2034             :  * Regrettably, BAT access-permissions, persistent status and memory
    2035             :  * map modes, interact in ways that makes one's brain sizzle. This
    2036             :  * makes BATsetaccess and TMcommit (where a change in BAT persistence
    2037             :  * mode is made permanent) points in which the memory map status of
    2038             :  * bats needs to be carefully re-assessed and ensured.
    2039             :  *
    2040             :  * Another complication is the fact that during commit, concurrent
    2041             :  * users may access the heaps, such that the simple solution
    2042             :  * unmap;re-map is out of the question.
    2043             :  * Even worse, it is not possible to even rename an open mmap file in
    2044             :  * Windows. For this purpose, we dropped the old .priv scheme, which
    2045             :  * relied on file moves. Now, the file that is opened with mmap is
    2046             :  * always the X file, in case of newstorage=STORE_PRIV, we save in a
    2047             :  * new file X.new
    2048             :  *
    2049             :  * we must consider the following dimensions:
    2050             :  *
    2051             :  * persistence:
    2052             :  *     not simply the current persistence mode but whether the bat *was*
    2053             :  *     present at the last commit point (BBP status & BBPEXISTING).
    2054             :  *     The crucial issue is namely whether we must guarantee recovery
    2055             :  *     to a previous sane state.
    2056             :  *
    2057             :  * access:
    2058             :  *     whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
    2059             :  *     is usually the same as BAT_READ (as our concern are only data pages
    2060             :  *     that already existed at the last commit).
    2061             :  *
    2062             :  * storage:
    2063             :  *     the current way the heap file X is memory-mapped;
    2064             :  *     STORE_MMAP uses direct mapping (so dirty pages may be flushed
    2065             :  *     at any time to disk), STORE_PRIV uses copy-on-write.
    2066             :  *
    2067             :  * newstorage:
    2068             :  *     the current save-regime. STORE_MMAP calls msync() on the heap X,
    2069             :  *     whereas STORE_PRIV writes the *entire* heap in a file: X.new
    2070             :  *     If a BAT is loaded from disk, the field newstorage is used
    2071             :  *     to set storage as well (so before change-access and commit-
    2072             :  *     persistence mayhem, we always have newstorage=storage).
    2073             :  *
    2074             :  * change-access:
    2075             :  *     what happens if the bat-access mode is changed from
    2076             :  *     BAT_READ into BAT_WRITE (or vice versa).
    2077             :  *
    2078             :  * commit-persistence:
    2079             :  *     what happens during commit if the bat-persistence mode was
    2080             :  *     changed (from TRANSIENT into PERSISTENT, or vice versa).
    2081             :  *
    2082             :  * this is the scheme:
    2083             :  *
    2084             :  *  persistence access    newstorage storage    change-access commit-persistence
    2085             :  *  =========== ========= ========== ========== ============= ==================
    2086             :  * 0 transient  BAT_READ  STORE_MMAP STORE_MMAP =>2           =>4
    2087             :  * 1 transient  BAT_READ  STORE_PRIV STORE_PRIV =>3           =>5
    2088             :  * 2 transient  BAT_WRITE STORE_MMAP STORE_MMAP =>0           =>6+
    2089             :  * 3 transient  BAT_WRITE STORE_PRIV STORE_PRIV =>1           =>7
    2090             :  * 4 persistent BAT_READ  STORE_MMAP STORE_MMAP =>6+          =>0
    2091             :  * 5 persistent BAT_READ  STORE_PRIV STORE_PRIV =>7           =>1
    2092             :  * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
    2093             :  * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5           =>3
    2094             :  *
    2095             :  * exception states:
    2096             :  * a transient  BAT_READ  STORE_PRIV STORE_MMAP =>b           =>c
    2097             :  * b transient  BAT_WRITE STORE_PRIV STORE_MMAP =>a           =>6
    2098             :  * c persistent BAT_READ  STORE_PRIV STORE_MMAP =>6           =>a
    2099             :  *
    2100             :  * (+) indicates that we must ensure that the heap gets saved in its new mode
    2101             :  *
    2102             :  * Note that we now allow a heap with save-regime STORE_PRIV that was
    2103             :  * actually mapped STORE_MMAP. In effect, the potential corruption of
    2104             :  * the X file is compensated by writing out full X.new files that take
    2105             :  * precedence.  When transitioning out of this state towards one with
    2106             :  * both storage regime and OS as STORE_MMAP we need to move the X.new
    2107             :  * files into the backup directory. Then msync the X file and (on
    2108             :  * success) remove the X.new; see backup_new().
    2109             :  *
    2110             :  * Exception states are only reachable if the commit fails and those
    2111             :  * new persistent bats have already been processed (but never become
    2112             :  * part of a committed state). In that case a transition 2=>6 may end
    2113             :  * up 2=>b.  Exception states a and c are reachable from b.
    2114             :  *
    2115             :  * Errors in HEAPchangeaccess() can be handled atomically inside the
    2116             :  * routine.  The work on changing mmap modes HEAPcommitpersistence()
    2117             :  * is done during the BBPsync() for all bats that are newly persistent
    2118             :  * (BBPNEW). After the TMcommit(), it is done for those bats that are
    2119             :  * no longer persistent after the commit (BBPDELETED), only if it
    2120             :  * succeeds.  Such transient bats cannot be processed before the
    2121             :  * commit, because the commit may fail and then the more unsafe
    2122             :  * transient mmap modes would be present on a persistent bat.
    2123             :  *
    2124             :  * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
    2125             :  * gdk_tm.c.
    2126             :  *
    2127             :  * Including the exception states, we have 11 of the 16
    2128             :  * combinations. As for the 5 avoided states, all four
    2129             :  * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
    2130             :  * omitted (this would amount to an msync() save regime on a
    2131             :  * copy-on-write heap -- which does not work). The remaining avoided
    2132             :  * state is the patently unsafe
    2133             :  * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
    2134             :  *
    2135             :  * Note that after a server restart exception states are gone, as on
    2136             :  * BAT loads the saved descriptor is inspected again (which will
    2137             :  * reproduce the state at the last succeeded commit).
    2138             :  *
    2139             :  * To avoid exception states, a TMsubcommit protocol would need to be
    2140             :  * used which is too heavy for BATsetaccess().
    2141             :  *
    2142             :  * Note that this code is not about making heaps mmap-ed in the first
    2143             :  * place.  It is just about determining which flavor of mmap should be
    2144             :  * used. The MAL user is oblivious of such details.
    2145             :  */
    2146             : 
    2147             : /* rather than deleting X.new, we comply with the commit protocol and
    2148             :  * move it to backup storage */
    2149             : static gdk_return
    2150           0 : backup_new(Heap *hp, bool lock)
    2151             : {
    2152           0 :         int batret, bakret, ret = -1;
    2153           0 :         char batpath[MAXPATH], bakpath[MAXPATH];
    2154           0 :         struct stat st;
    2155             : 
    2156           0 :         char *bak_filename = NULL;
    2157           0 :         if ((bak_filename = strrchr(hp->filename, DIR_SEP)) != NULL)
    2158           0 :                 bak_filename++;
    2159             :         else
    2160             :                 bak_filename = hp->filename;
    2161             :         /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
    2162           0 :         if (GDKfilepath(batpath, sizeof(batpath), hp->farmid, BATDIR, hp->filename, "new") == GDK_SUCCEED &&
    2163           0 :             GDKfilepath(bakpath, sizeof(bakpath), hp->farmid, BAKDIR, bak_filename, "new") == GDK_SUCCEED) {
    2164             :                 /* file actions here interact with the global commits */
    2165           0 :                 if (lock)
    2166           0 :                         BBPtmlock();
    2167             : 
    2168           0 :                 batret = MT_stat(batpath, &st);
    2169           0 :                 bakret = MT_stat(bakpath, &st);
    2170             : 
    2171           0 :                 if (batret == 0 && bakret) {
    2172             :                         /* no backup yet, so move the existing X.new there out
    2173             :                          * of the way */
    2174           0 :                         if ((ret = MT_rename(batpath, bakpath)) < 0)
    2175           0 :                                 GDKsyserror("backup_new: rename %s to %s failed\n",
    2176             :                                             batpath, bakpath);
    2177           0 :                         TRC_DEBUG(IO, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
    2178           0 :                 } else if (batret == 0) {
    2179             :                         /* there is a backup already; just remove the X.new */
    2180           0 :                         if ((ret = MT_remove(batpath)) != 0)
    2181           0 :                                 GDKsyserror("backup_new: remove %s failed\n", batpath);
    2182           0 :                         TRC_DEBUG(IO, "remove(%s) = %d\n", batpath, ret);
    2183             :                 } else {
    2184             :                         ret = 0;
    2185             :                 }
    2186           0 :                 if (lock)
    2187           0 :                         BBPtmunlock();
    2188             :         }
    2189           0 :         return ret ? GDK_FAIL : GDK_SUCCEED;
    2190             : }
    2191             : 
    2192             : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
    2193             : 
    2194             : /* transition heap from readonly to writable */
    2195             : static storage_t
    2196     7440675 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
    2197             : {
    2198     7440675 :         if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
    2199     7440675 :                 return hp->newstorage;       /* 0<=>2,1<=>3,a<=>b */
    2200             : 
    2201           0 :         if (dstmode == BAT_WRITE) {
    2202           0 :                 if (hp->storage != STORE_PRIV)
    2203           0 :                         hp->dirty = true;    /* exception c does not make it dirty */
    2204           0 :                 return STORE_PRIV;      /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
    2205             :         }
    2206           0 :         if (hp->storage == STORE_MMAP) {     /* 6=>4 */
    2207           0 :                 hp->dirty = true;
    2208           0 :                 return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;        /* only called for existing bats */
    2209             :         }
    2210             :         return hp->storage;  /* 7=>5 */
    2211             : }
    2212             : 
    2213             : /* heap changes persistence mode (at commit point) */
    2214             : static storage_t
    2215      202306 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
    2216             : {
    2217      202306 :         if (existing) {         /* existing, ie will become transient */
    2218       59540 :                 if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) {      /* 6=>2 */
    2219           0 :                         hp->dirty = true;
    2220           0 :                         return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;       /* only called for existing bats */
    2221             :                 }
    2222       59540 :                 return hp->newstorage;       /* 4=>0,5=>1,7=>3,c=>a no change */
    2223             :         }
    2224             :         /* !existing, ie will become persistent */
    2225      142766 :         if (hp->newstorage == STORE_MEM)
    2226             :                 return hp->newstorage;
    2227         783 :         if (hp->newstorage == STORE_MMAP && !writable)
    2228             :                 return STORE_MMAP;      /* 0=>4 STORE_MMAP */
    2229             : 
    2230           0 :         if (hp->newstorage == STORE_MMAP)
    2231           0 :                 hp->dirty = true;    /* 2=>6 */
    2232             :         return STORE_PRIV;      /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
    2233             : }
    2234             : 
    2235             : 
    2236             : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
    2237             : 
    2238             : /* change the heap modes at a commit */
    2239             : gdk_return
    2240      178400 : BATcheckmodes(BAT *b, bool existing)
    2241             : {
    2242      178400 :         storage_t m1 = STORE_MEM, m3 = STORE_MEM;
    2243      178400 :         bool dirty = false, wr;
    2244             : 
    2245      178400 :         BATcheck(b, GDK_FAIL);
    2246             : 
    2247      178400 :         wr = (b->batRestricted == BAT_WRITE);
    2248      178400 :         if (b->ttype) {
    2249      178400 :                 m1 = HEAPcommitpersistence(b->theap, wr, existing);
    2250      178400 :                 dirty |= (b->theap->newstorage != m1);
    2251             :         }
    2252             : 
    2253      178400 :         if (b->tvheap) {
    2254       23906 :                 bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
    2255       23906 :                 m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
    2256       23906 :                 dirty |= (b->tvheap->newstorage != m3);
    2257             :         }
    2258      178400 :         if (m1 == STORE_INVALID || m3 == STORE_INVALID)
    2259             :                 return GDK_FAIL;
    2260             : 
    2261      178400 :         if (dirty) {
    2262           0 :                 b->theap->newstorage = m1;
    2263           0 :                 if (b->tvheap)
    2264           0 :                         b->tvheap->newstorage = m3;
    2265             :         }
    2266             :         return GDK_SUCCEED;
    2267             : }
    2268             : 
    2269             : BAT *
    2270     9228201 : BATsetaccess(BAT *b, restrict_t newmode)
    2271             : {
    2272     9228201 :         restrict_t bakmode;
    2273             : 
    2274     9228201 :         BATcheck(b, NULL);
    2275     9228201 :         if (newmode != BAT_READ &&
    2276       19255 :             (isVIEW(b) || (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
    2277           0 :                 BAT *bn = COLcopy(b, b->ttype, true, b->batRole);
    2278           0 :                 BBPunfix(b->batCacheid);
    2279           0 :                 if (bn == NULL)
    2280             :                         return NULL;
    2281             :                 b = bn;
    2282             :         }
    2283     9228201 :         MT_lock_set(&b->theaplock);
    2284     9270599 :         bakmode = b->batRestricted;
    2285     9270599 :         if (bakmode != newmode) {
    2286     6508631 :                 bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
    2287     6508631 :                 bool wr = (newmode == BAT_WRITE);
    2288     6508631 :                 bool rd = (bakmode == BAT_WRITE);
    2289     6508631 :                 storage_t m1 = STORE_MEM, m3 = STORE_MEM;
    2290     6508631 :                 storage_t b1 = STORE_MEM, b3 = STORE_MEM;
    2291             : 
    2292     6508631 :                 if (b->theap->parentid == b->batCacheid) {
    2293     6500811 :                         b1 = b->theap->newstorage;
    2294     6511271 :                         m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
    2295             :                 }
    2296     6505389 :                 if (b->tvheap && b->tvheap->parentid == b->batCacheid) {
    2297      946075 :                         bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
    2298      946075 :                         b3 = b->tvheap->newstorage;
    2299     1892771 :                         m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
    2300             :                 }
    2301     6508790 :                 if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
    2302     2452354 :                         MT_lock_unset(&b->theaplock);
    2303     2456832 :                         BBPunfix(b->batCacheid);
    2304     2456832 :                         return NULL;
    2305             :                 }
    2306             : 
    2307             :                 /* set new access mode and mmap modes */
    2308     4056436 :                 b->batRestricted = newmode;
    2309     4056436 :                 if (b->theap->parentid == b->batCacheid)
    2310     4055843 :                         b->theap->newstorage = m1;
    2311     4056436 :                 if (b->tvheap && b->tvheap->parentid == b->batCacheid)
    2312      926812 :                         b->tvheap->newstorage = m3;
    2313             : 
    2314     4056436 :                 MT_lock_unset(&b->theaplock);
    2315     4067535 :                 if (existing && !isVIEW(b) && BBPsave(b) != GDK_SUCCEED) {
    2316             :                         /* roll back all changes */
    2317           0 :                         MT_lock_set(&b->theaplock);
    2318           0 :                         b->batRestricted = bakmode;
    2319           0 :                         b->theap->newstorage = b1;
    2320           0 :                         if (b->tvheap)
    2321           0 :                                 b->tvheap->newstorage = b3;
    2322           0 :                         MT_lock_unset(&b->theaplock);
    2323           0 :                         BBPunfix(b->batCacheid);
    2324           0 :                         return NULL;
    2325             :                 }
    2326             :         } else {
    2327     2761968 :                 MT_lock_unset(&b->theaplock);
    2328             :         }
    2329             :         return b;
    2330             : }
    2331             : 
    2332             : restrict_t
    2333           0 : BATgetaccess(BAT *b)
    2334             : {
    2335           0 :         BATcheck(b, BAT_WRITE);
    2336           0 :         MT_lock_set(&b->theaplock);
    2337           0 :         restrict_t restricted = b->batRestricted;
    2338           0 :         MT_lock_unset(&b->theaplock);
    2339           0 :         return restricted;
    2340             : }
    2341             : 
    2342             : /*
    2343             :  * @- change BAT persistency (persistent,session,transient)
    2344             :  * In the past, we prevented BATS with certain types from being saved at all:
    2345             :  * - BATs of BATs, as having recursive bats creates cascading
    2346             :  *   complexities in commits/aborts.
    2347             :  * - any atom with refcounts, as the BBP has no overview of such
    2348             :  *   user-defined refcounts.
    2349             :  * - pointer types, as the values they point to are bound to be transient.
    2350             :  *
    2351             :  * However, nowadays we do allow such saves, as the BBP swapping
    2352             :  * mechanism was altered to be able to save transient bats temporarily
    2353             :  * to disk in order to make room.  Thus, we must be able to save any
    2354             :  * transient BAT to disk.
    2355             :  *
    2356             :  * What we don't allow is to make such bats persistent.
    2357             :  *
    2358             :  * Although the persistent state does influence the allowed mmap
    2359             :  * modes, this only goes for the *real* committed persistent
    2360             :  * state. Making the bat persistent with BATmode does not matter for
    2361             :  * the heap modes until the commit point is reached. So we do not need
    2362             :  * to do anything with heap modes yet at this point.
    2363             :  */
    2364             : gdk_return
    2365      435010 : BATmode(BAT *b, bool transient)
    2366             : {
    2367      435010 :         BATcheck(b, GDK_FAIL);
    2368             : 
    2369             :         /* can only make a bat PERSISTENT if its role is already
    2370             :          * PERSISTENT */
    2371      435010 :         assert(transient || b->batRole == PERSISTENT);
    2372             :         /* cannot make a view PERSISTENT */
    2373      248673 :         assert(transient || !isVIEW(b));
    2374             : 
    2375      435010 :         if (b->batRole == TRANSIENT && !transient) {
    2376           0 :                 GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
    2377           0 :                 return GDK_FAIL;
    2378             :         }
    2379             : 
    2380      435010 :         BATiter bi = bat_iterator(b);
    2381      435010 :         bool mustrelease = false;
    2382      435010 :         bool mustretain = false;
    2383      435010 :         bat bid = b->batCacheid;
    2384             : 
    2385      435010 :         if (transient != bi.transient) {
    2386      435010 :                 if (!transient) {
    2387      248673 :                         if (ATOMisdescendant(b->ttype, TYPE_ptr)) {
    2388           0 :                                 GDKerror("%s type implies that %s[%s] "
    2389             :                                          "cannot be made persistent.\n",
    2390             :                                          ATOMname(b->ttype), BATgetId(b),
    2391             :                                          ATOMname(b->ttype));
    2392           0 :                                 bat_iterator_end(&bi);
    2393           0 :                                 return GDK_FAIL;
    2394             :                         }
    2395             :                 }
    2396             : 
    2397             :                 /* we need to delay the calls to BBPretain and
    2398             :                  * BBPrelease until after we have released our reference
    2399             :                  * to the heaps (i.e. until after bat_iterator_end),
    2400             :                  * because in either case, BBPfree can be called (either
    2401             :                  * directly here or in BBPtrim) which waits for the heap
    2402             :                  * reference to come down.  BBPretain calls incref which
    2403             :                  * waits until the trim that is waiting for us is done,
    2404             :                  * so that causes deadlock, and BBPrelease can call
    2405             :                  * BBPfree which causes deadlock with a single thread */
    2406      186337 :                 if (!transient) {
    2407             :                         /* persistent BATs get a logical reference */
    2408             :                         mustretain = true;
    2409      186337 :                 } else if (!bi.transient) {
    2410             :                         /* transient BATs loose their logical reference */
    2411      186337 :                         mustrelease = true;
    2412             :                 }
    2413      435010 :                 MT_lock_set(&GDKswapLock(bid));
    2414      435010 :                 if (!transient) {
    2415      248673 :                         if (BBP_status(bid) & BBPDELETED) {
    2416           0 :                                 BBP_status_on(bid, BBPEXISTING);
    2417           0 :                                 BBP_status_off(bid, BBPDELETED);
    2418             :                         } else
    2419      248673 :                                 BBP_status_on(bid, BBPNEW);
    2420      186337 :                 } else if (!bi.transient) {
    2421      186337 :                         if (!(BBP_status(bid) & BBPNEW))
    2422       60259 :                                 BBP_status_on(bid, BBPDELETED);
    2423      186337 :                         BBP_status_off(bid, BBPPERSISTENT);
    2424             :                 }
    2425             :                 /* session bats or persistent bats that did not
    2426             :                  * witness a commit yet may have been saved */
    2427      435010 :                 MT_lock_set(&b->theaplock);
    2428      435010 :                 if (b->batCopiedtodisk) {
    2429       49663 :                         if (!transient) {
    2430          51 :                                 BBP_status_off(bid, BBPTMP);
    2431             :                         } else {
    2432             :                                 /* TMcommit must remove it to
    2433             :                                  * guarantee free space */
    2434       49612 :                                 BBP_status_on(bid, BBPTMP);
    2435             :                         }
    2436             :                 }
    2437      435010 :                 b->batTransient = transient;
    2438      435010 :                 MT_lock_unset(&b->theaplock);
    2439      435010 :                 MT_lock_unset(&GDKswapLock(bid));
    2440             :         }
    2441      435010 :         bat_iterator_end(&bi);
    2442             :         /* retain/release after bat_iterator_end because of refs to heaps */
    2443      435010 :         if (mustretain)
    2444      248673 :                 BBPretain(bid);
    2445      186337 :         else if (mustrelease)
    2446      186337 :                 BBPrelease(bid);
    2447             :         return GDK_SUCCEED;
    2448             : }
    2449             : 
    2450             : /* BATassertProps checks whether properties are set correctly.  Under
    2451             :  * no circumstances will it change any properties.  Note that the
    2452             :  * "nil" property is not actually used anywhere, but it is checked. */
    2453             : 
    2454             : #ifdef NDEBUG
    2455             : /* assertions are disabled, turn failing tests into a message */
    2456             : #undef assert
    2457             : #define assert(test)    ((void) ((test) || (TRC_CRITICAL(CHECK, "Assertion `%s' failed\n", #test), 0)))
    2458             : #endif
    2459             : 
    2460             : static void
    2461   319470784 : assert_ascii(const char *s)
    2462             : {
    2463   638941568 :         if (!strNil(s)) {
    2464  4731472603 :                 while (*s) {
    2465  4462579465 :                         assert((*s & 0x80) == 0);
    2466  4462579465 :                         s++;
    2467             :                 }
    2468             :         }
    2469   319470784 : }
    2470             : 
    2471             : /* Assert that properties are set correctly.
    2472             :  *
    2473             :  * A BAT can have a bunch of properties set.  Mostly, the property
    2474             :  * bits are set if we *know* the property holds, and not set if we
    2475             :  * don't know whether the property holds (or if we know it doesn't
    2476             :  * hold).  All properties are per column.
    2477             :  *
    2478             :  * The properties currently maintained are:
    2479             :  *
    2480             :  * seqbase      Only valid for TYPE_oid and TYPE_void columns: each
    2481             :  *              value in the column is exactly one more than the
    2482             :  *              previous value, starting at position 0 with the value
    2483             :  *              stored in this property.
    2484             :  *              This implies sorted, key, nonil (which therefore need
    2485             :  *              to be set).
    2486             :  * nil          There is at least one NIL value in the column.
    2487             :  * nonil        There are no NIL values in the column.
    2488             :  * key          All values in the column are distinct.
    2489             :  * sorted       The column is sorted (ascending).  If also revsorted,
    2490             :  *              then all values are equal.
    2491             :  * revsorted    The column is reversely sorted (descending).  If
    2492             :  *              also sorted, then all values are equal.
    2493             :  * nosorted     BUN position which proofs not sorted (given position
    2494             :  *              and one before are not ordered correctly).
    2495             :  * norevsorted  BUN position which proofs not revsorted (given position
    2496             :  *              and one before are not ordered correctly).
    2497             :  * nokey        Pair of BUN positions that proof not all values are
    2498             :  *              distinct (i.e. values at given locations are equal).
    2499             :  * ascii        Only valid for TYPE_str columns: all strings in the column
    2500             :  *              are ASCII, i.e. the UTF-8 encoding for all characters is a
    2501             :  *              single byte.
    2502             :  *
    2503             :  * Note that the functions BATtseqbase and BATkey also set more
    2504             :  * properties than you might suspect.  When setting properties on a
    2505             :  * newly created and filled BAT, you may want to first make sure the
    2506             :  * batCount is set correctly (e.g. by calling BATsetcount), then use
    2507             :  * BATtseqbase and BATkey, and finally set the other properties.
    2508             :  *
    2509             :  * For a view, we cannot check all properties, since it is possible with
    2510             :  * the way the SQL layer works, that a parent BAT gets changed, changing
    2511             :  * the properties, while there is a view.  The view is supposed to look
    2512             :  * at only the non-changing part of the BAT (through candidate lists),
    2513             :  * but this means that the properties of the view might not be correct.
    2514             :  * For this reason, for views, we skip all property checking that looks
    2515             :  * at the BAT content.
    2516             :  */
    2517             : 
    2518             : void
    2519    20746629 : BATassertProps(BAT *b)
    2520             : {
    2521    20746629 :         unsigned bbpstatus;
    2522    20746629 :         BUN p, q;
    2523    20746629 :         int (*cmpf)(const void *, const void *);
    2524    20746629 :         int cmp;
    2525    20746629 :         const void *prev = NULL, *valp, *nilp;
    2526    20746629 :         char filename[sizeof(b->theap->filename)];
    2527    20746629 :         bool isview1, isview2;
    2528             : 
    2529             :         /* do the complete check within a lock */
    2530    20746629 :         MT_lock_set(&b->theaplock);
    2531             : 
    2532             :         /* general BAT sanity */
    2533    20805386 :         assert(b != NULL);
    2534    20805386 :         assert(b->batCacheid > 0);
    2535    20805386 :         assert(b->batCacheid < getBBPsize());
    2536    20735312 :         assert(b == BBP_desc(b->batCacheid));
    2537    20735312 :         assert(b->batCount >= b->batInserted);
    2538             : 
    2539             :         /* headless */
    2540    20735312 :         assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
    2541    20735312 :         assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
    2542             : 
    2543    20735312 :         isview1 = b->theap->parentid != b->batCacheid;
    2544    20735312 :         isview2 = b->tvheap && b->tvheap->parentid != b->batCacheid;
    2545             : 
    2546    20735312 :         bbpstatus = BBP_status(b->batCacheid);
    2547             :         /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
    2548    20735312 :         assert(((bbpstatus & BBPDELETED) != 0) +
    2549             :                ((bbpstatus & BBPEXISTING) != 0) +
    2550             :                ((bbpstatus & BBPNEW) != 0) <= 1);
    2551             : 
    2552    20735312 :         assert(b->ttype >= TYPE_void);
    2553    20735312 :         assert(b->ttype < GDKatomcnt);
    2554    20735312 :         assert(isview1 ||
    2555             :                b->ttype == TYPE_void ||
    2556             :                BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
    2557    20735312 :         assert(isview2 ||
    2558             :                b->tvheap == NULL ||
    2559             :                (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
    2560             : 
    2561    20735312 :         cmpf = ATOMcompare(b->ttype);
    2562    20735312 :         nilp = ATOMnilptr(b->ttype);
    2563             : 
    2564    20735312 :         assert(isview1 || b->theap->free >= tailsize(b, BATcount(b)));
    2565    20735312 :         if (b->ttype != TYPE_void) {
    2566    15629405 :                 assert(b->batCount <= b->batCapacity);
    2567    15629405 :                 assert(isview1 || b->theap->size >= b->theap->free);
    2568    15629405 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    2569             :                         /* 32 values per 4-byte word (that's not the
    2570             :                          * same as 8 values per byte...) */
    2571        4898 :                         assert(isview1 || b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
    2572             :                 } else
    2573    15624507 :                         assert(isview1 || b->theap->size >> b->tshift >= b->batCapacity);
    2574             :         }
    2575    20735312 :         if (!isview1) {
    2576    15855001 :                 strconcat_len(filename, sizeof(filename),
    2577    15855001 :                               BBP_physical(b->theap->parentid),
    2578     2807246 :                               b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
    2579             : #if SIZEOF_VAR_T == 8
    2580             :                               b->twidth == 4 ? ".tail4" :
    2581             : #endif
    2582             :                               ".tail" : ".tail",
    2583             :                               NULL);
    2584    15923990 :                 assert(strcmp(b->theap->filename, filename) == 0);
    2585             :         }
    2586    20804301 :         if (!isview2 && b->tvheap) {
    2587     2645179 :                 strconcat_len(filename, sizeof(filename),
    2588     2645179 :                               BBP_physical(b->tvheap->parentid),
    2589             :                               ".theap",
    2590             :                               NULL);
    2591     2645801 :                 assert(strcmp(b->tvheap->filename, filename) == 0);
    2592             :         }
    2593             : 
    2594             :         /* void, str and blob imply varsized */
    2595    20804923 :         if (ATOMstorage(b->ttype) == TYPE_str ||
    2596             :             ATOMstorage(b->ttype) == TYPE_blob)
    2597     3610776 :                 assert(b->tvheap != NULL);
    2598             :         /* other "known" types are not varsized */
    2599    20804923 :         if (ATOMstorage(b->ttype) > TYPE_void &&
    2600             :             ATOMstorage(b->ttype) < TYPE_str)
    2601    12044019 :                 assert(b->tvheap == NULL);
    2602             :         /* shift and width have a particular relationship */
    2603    20804923 :         if (ATOMstorage(b->ttype) == TYPE_str)
    2604     3607851 :                 assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
    2605             :         else
    2606    17197072 :                 assert(b->twidth == ATOMsize(b->ttype));
    2607    20804923 :         assert(b->tseqbase <= oid_nil);
    2608             :         /* only oid/void columns can be dense */
    2609    20804923 :         assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
    2610             :         /* a column cannot both have and not have NILs */
    2611    20804923 :         assert(!b->tnil || !b->tnonil);
    2612             :         /* only string columns can be ASCII */
    2613    20804923 :         assert(!b->tascii || ATOMstorage(b->ttype) == TYPE_str);
    2614    20804923 :         if (b->ttype == TYPE_void) {
    2615     5232007 :                 assert(b->tshift == 0);
    2616     5232007 :                 assert(b->twidth == 0);
    2617     5232007 :                 assert(b->tsorted);
    2618     5232007 :                 if (is_oid_nil(b->tseqbase)) {
    2619         338 :                         assert(b->tvheap == NULL);
    2620         338 :                         assert(BATcount(b) == 0 || !b->tnonil);
    2621         338 :                         assert(BATcount(b) <= 1 || !b->tkey);
    2622         338 :                         assert(b->trevsorted);
    2623             :                 } else {
    2624     5231669 :                         if (b->tvheap != NULL) {
    2625             :                                 /* candidate list with exceptions */
    2626       48564 :                                 assert(b->batRole == TRANSIENT || b->batRole == SYSTRANS);
    2627       48564 :                                 assert(b->tvheap->free <= b->tvheap->size);
    2628       48564 :                                 assert(b->tvheap->free >= sizeof(ccand_t));
    2629       48564 :                                 assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
    2630       48564 :                                 if (negoid_cand(b) && ccand_free(b) > 0) {
    2631         376 :                                         const oid *oids = (const oid *) ccand_first(b);
    2632         376 :                                         q = ccand_free(b) / SIZEOF_OID;
    2633         376 :                                         assert(oids != NULL);
    2634         376 :                                         assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
    2635             :                                         /* exceptions within range */
    2636         376 :                                         assert(oids[0] >= b->tseqbase);
    2637         376 :                                         assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
    2638             :                                         /* exceptions sorted */
    2639     2839122 :                                         for (p = 1; p < q; p++)
    2640     2838746 :                                                 assert(oids[p - 1] < oids[p]);
    2641             :                                 }
    2642             :                         }
    2643     5231669 :                         assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2644     5231669 :                         assert(BATcount(b) == 0 || !b->tnil);
    2645     5231669 :                         assert(BATcount(b) <= 1 || !b->trevsorted);
    2646     5231669 :                         assert(b->tkey);
    2647     5231669 :                         assert(b->tnonil);
    2648             :                 }
    2649     5232007 :                 MT_lock_unset(&b->theaplock);
    2650    13157511 :                 return;
    2651             :         }
    2652             : 
    2653    15572916 :         BATiter bi  = bat_iterator_nolock(b);
    2654             : 
    2655    15572916 :         if (BATtdense(b)) {
    2656      267347 :                 assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2657      267347 :                 assert(b->ttype == TYPE_oid);
    2658      267347 :                 assert(b->tsorted);
    2659      267347 :                 assert(b->tkey);
    2660      267347 :                 assert(b->tnonil);
    2661      267347 :                 if ((q = b->batCount) != 0) {
    2662      182785 :                         const oid *o = (const oid *) Tloc(b, 0);
    2663      182785 :                         assert(*o == b->tseqbase);
    2664   219991860 :                         for (p = 1; p < q; p++)
    2665   219809075 :                                 assert(o[p - 1] + 1 == o[p]);
    2666             :                 }
    2667      267347 :                 MT_lock_unset(&b->theaplock);
    2668      267236 :                 return;
    2669             :         }
    2670    15305569 :         assert(1 << b->tshift == b->twidth);
    2671             :         /* only linear atoms can be sorted */
    2672    15305569 :         assert(!b->tsorted || ATOMlinear(b->ttype));
    2673    15305569 :         assert(!b->trevsorted || ATOMlinear(b->ttype));
    2674    15305569 :         if (ATOMlinear(b->ttype)) {
    2675    15315975 :                 assert(b->tnosorted == 0 ||
    2676             :                        (b->tnosorted > 0 &&
    2677             :                         b->tnosorted < b->batCount));
    2678    15315975 :                 assert(!b->tsorted || b->tnosorted == 0);
    2679    15315975 :                 if (!isview1 &&
    2680    15315975 :                     !isview2 &&
    2681     2013484 :                     !b->tsorted &&
    2682      179121 :                     b->tnosorted > 0 &&
    2683      179121 :                     b->tnosorted < b->batCount)
    2684      179546 :                         assert(cmpf(BUNtail(bi, b->tnosorted - 1),
    2685             :                                     BUNtail(bi, b->tnosorted)) > 0);
    2686    15314919 :                 assert(b->tnorevsorted == 0 ||
    2687             :                        (b->tnorevsorted > 0 &&
    2688             :                         b->tnorevsorted < b->batCount));
    2689    15314919 :                 assert(!b->trevsorted || b->tnorevsorted == 0);
    2690    15314919 :                 if (!isview1 &&
    2691    10113220 :                     !isview2 &&
    2692     2648441 :                     !b->trevsorted &&
    2693      570567 :                     b->tnorevsorted > 0 &&
    2694      570567 :                     b->tnorevsorted < b->batCount)
    2695      570563 :                         assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
    2696             :                                     BUNtail(bi, b->tnorevsorted)) < 0);
    2697             :         }
    2698             :         /* if tkey property set, both tnokey values must be 0 */
    2699    15301932 :         assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
    2700    15301932 :         if (!isview1 &&
    2701    15301932 :             !isview2 &&
    2702     2460051 :             !b->tkey &&
    2703     2460051 :             (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
    2704             :                 /* if tkey not set and tnokey indicates a proof of
    2705             :                  * non-key-ness, make sure the tnokey values are in
    2706             :                  * range and indeed provide a proof */
    2707      560495 :                 assert(b->tnokey[0] != b->tnokey[1]);
    2708      560495 :                 assert(b->tnokey[0] < b->batCount);
    2709      560495 :                 assert(b->tnokey[1] < b->batCount);
    2710      560495 :                 assert(cmpf(BUNtail(bi, b->tnokey[0]),
    2711             :                             BUNtail(bi, b->tnokey[1])) == 0);
    2712             :         }
    2713             :         /* var heaps must have sane sizes */
    2714    15300265 :         assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
    2715             : 
    2716    15300265 :         if (!b->tkey && !b->tsorted && !b->trevsorted &&
    2717     5346500 :             !b->tnonil && !b->tnil) {
    2718             :                 /* nothing more to prove */
    2719     2409290 :                 MT_lock_unset(&b->theaplock);
    2720     2404534 :                 return;
    2721             :         }
    2722             : 
    2723             :         /* only do a scan if the bat is not a view */
    2724    12890975 :         if (!isview1 && !isview2) {
    2725     9663436 :                 const ValRecord *prop;
    2726     9663436 :                 const void *maxval = NULL;
    2727     9663436 :                 const void *minval = NULL;
    2728     9663436 :                 const void *maxbound = NULL;
    2729     9663436 :                 const void *minbound = NULL;
    2730     9663436 :                 const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
    2731     9601278 :                 bool seenmax = false, seenmin = false;
    2732     9601278 :                 bool seennil = false;
    2733             : 
    2734     9601278 :                 if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL)
    2735          12 :                         maxbound = VALptr(prop);
    2736     9676741 :                 if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL)
    2737        1447 :                         minbound = VALptr(prop);
    2738     9685181 :                 if (b->tmaxpos != BUN_NONE) {
    2739     1199612 :                         assert(b->tmaxpos < BATcount(b));
    2740     1199612 :                         maxval = BUNtail(bi, b->tmaxpos);
    2741     1199612 :                         assert(cmpf(maxval, nilp) != 0);
    2742             :                 }
    2743     9671060 :                 if (b->tminpos != BUN_NONE) {
    2744     1129008 :                         assert(b->tminpos < BATcount(b));
    2745     1129008 :                         minval = BUNtail(bi, b->tminpos);
    2746     1129008 :                         assert(cmpf(minval, nilp) != 0);
    2747             :                 }
    2748     9673838 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    2749             :                         /* for now, don't do extra checks for bit mask */
    2750             :                         ;
    2751     9669004 :                 } else if (b->tsorted || b->trevsorted || !b->tkey) {
    2752             :                         /* if sorted (either way), or we don't have to
    2753             :                          * prove uniqueness, we can do a simple
    2754             :                          * scan */
    2755             :                         /* only call compare function if we have to */
    2756     9647324 :                         bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
    2757             : 
    2758  5546015297 :                         BATloop(b, p, q) {
    2759  5536570798 :                                 valp = BUNtail(bi, p);
    2760  5536570798 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2761  5395906550 :                                 assert(!isnil || !notnull);
    2762  5395906550 :                                 assert(!b->tnonil || !isnil);
    2763  5395906550 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2764  5395906550 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2765  5395906550 :                                 if (b->tascii)
    2766   317134403 :                                         assert_ascii(valp);
    2767  5395998338 :                                 if (minbound && !isnil) {
    2768          30 :                                         cmp = cmpf(minbound, valp);
    2769          30 :                                         assert(cmp <= 0);
    2770             :                                 }
    2771  5395998338 :                                 if (maxbound && !isnil) {
    2772          30 :                                         cmp = cmpf(maxbound, valp);
    2773          30 :                                         assert(cmp > 0);
    2774             :                                 }
    2775  5395998338 :                                 if (maxval && !isnil) {
    2776   893235517 :                                         cmp = cmpf(maxval, valp);
    2777   893248534 :                                         assert(cmp >= 0);
    2778   893248534 :                                         seenmax |= cmp == 0;
    2779             :                                 }
    2780  5396011355 :                                 if (minval && !isnil) {
    2781   135135529 :                                         cmp = cmpf(minval, valp);
    2782   135137444 :                                         assert(cmp <= 0);
    2783   135137444 :                                         seenmin |= cmp == 0;
    2784             :                                 }
    2785  5396013270 :                                 if (prev && cmpprv) {
    2786  2570079098 :                                         cmp = cmpf(prev, valp);
    2787  2710605929 :                                         assert(!b->tsorted || cmp <= 0);
    2788  2710605929 :                                         assert(!b->trevsorted || cmp >= 0);
    2789  2710605929 :                                         assert(!b->tkey || cmp != 0);
    2790             :                                 }
    2791  5536540101 :                                 seennil |= isnil;
    2792  5536540101 :                                 if (seennil && !cmpprv &&
    2793     4631758 :                                     maxval == NULL && minval == NULL &&
    2794      172072 :                                     minbound == NULL && maxbound == NULL) {
    2795             :                                         /* we've done all the checking
    2796             :                                          * we can do */
    2797             :                                         break;
    2798             :                                 }
    2799  5536367973 :                                 prev = valp;
    2800             :                         }
    2801             :                 } else {        /* b->tkey && !b->tsorted && !b->trevsorted */
    2802             :                         /* we need to check for uniqueness the hard
    2803             :                          * way (i.e. using a hash table) */
    2804       21680 :                         const char *nme = BBP_physical(b->batCacheid);
    2805       21680 :                         Hash *hs = NULL;
    2806       21680 :                         BUN mask;
    2807             : 
    2808       21680 :                         if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
    2809           0 :                                 TRC_WARNING(BAT, "Cannot allocate hash table\n");
    2810           0 :                                 goto abort_check;
    2811             :                         }
    2812       21710 :                         if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heaplink.filename) ||
    2813       21689 :                             snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heapbckt.filename)) {
    2814             :                                 /* cannot happen, see comment in gdk.h
    2815             :                                  * about sizes near definition of
    2816             :                                  * BBPINIT */
    2817           0 :                                 GDKfree(hs);
    2818           0 :                                 TRC_CRITICAL(BAT, "Heap filename is too large\n");
    2819           0 :                                 goto abort_check;
    2820             :                         }
    2821       21700 :                         if (ATOMsize(b->ttype) == 1)
    2822             :                                 mask = (BUN) 1 << 8;
    2823       21687 :                         else if (ATOMsize(b->ttype) == 2)
    2824             :                                 mask = (BUN) 1 << 16;
    2825             :                         else
    2826       21687 :                                 mask = HASHmask(b->batCount);
    2827       21700 :                         hs->heapbckt.parentid = b->batCacheid;
    2828       21700 :                         hs->heaplink.parentid = b->batCacheid;
    2829       21700 :                         if ((hs->heaplink.farmid = BBPselectfarm(
    2830       21700 :                                      TRANSIENT, b->ttype, hashheap)) < 0 ||
    2831       21700 :                             (hs->heapbckt.farmid = BBPselectfarm(
    2832       43370 :                                     TRANSIENT, b->ttype, hashheap)) < 0 ||
    2833       21700 :                             HASHnew(hs, b->ttype, BATcount(b),
    2834             :                                     mask, BUN_NONE, false) != GDK_SUCCEED) {
    2835           0 :                                 GDKfree(hs);
    2836           0 :                                 TRC_WARNING(BAT, "Cannot allocate hash table\n");
    2837           0 :                                 goto abort_check;
    2838             :                         }
    2839   131150777 :                         BATloop(b, p, q) {
    2840   131129069 :                                 BUN hb;
    2841   131129069 :                                 BUN prb;
    2842   131129069 :                                 valp = BUNtail(bi, p);
    2843   131129069 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2844   131094480 :                                 assert(!isnil || !notnull);
    2845   131094480 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2846   131094480 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2847   131094480 :                                 if (b->tascii)
    2848       12160 :                                         assert_ascii(valp);
    2849   131094480 :                                 if (minbound && !isnil) {
    2850           0 :                                         cmp = cmpf(minbound, valp);
    2851           0 :                                         assert(cmp <= 0);
    2852             :                                 }
    2853   131094480 :                                 if (maxbound && !isnil) {
    2854           0 :                                         cmp = cmpf(maxbound, valp);
    2855           0 :                                         assert(cmp > 0);
    2856             :                                 }
    2857   131094480 :                                 if (maxval && !isnil) {
    2858        7858 :                                         cmp = cmpf(maxval, valp);
    2859        7858 :                                         assert(cmp >= 0);
    2860        7858 :                                         seenmax |= cmp == 0;
    2861             :                                 }
    2862   131094480 :                                 if (minval && !isnil) {
    2863        7858 :                                         cmp = cmpf(minval, valp);
    2864        7858 :                                         assert(cmp <= 0);
    2865        7858 :                                         seenmin |= cmp == 0;
    2866             :                                 }
    2867   131094480 :                                 prb = HASHprobe(hs, valp);
    2868   131122254 :                                 for (hb = HASHget(hs, prb);
    2869   132278578 :                                      hb != BUN_NONE;
    2870     1156324 :                                      hb = HASHgetlink(hs, hb))
    2871     1176074 :                                         if (cmpf(valp, BUNtail(bi, hb)) == 0)
    2872           0 :                                                 assert(!b->tkey);
    2873   131102504 :                                 HASHputlink(hs, p, HASHget(hs, prb));
    2874   131117565 :                                 HASHput(hs, prb, p);
    2875   131129080 :                                 assert(!b->tnonil || !isnil);
    2876   131129080 :                                 seennil |= isnil;
    2877             :                         }
    2878       21708 :                         HEAPfree(&hs->heaplink, true);
    2879       21673 :                         HEAPfree(&hs->heapbckt, true);
    2880       21686 :                         GDKfree(hs);
    2881             :                 }
    2882     9643168 :           abort_check:
    2883     9643168 :                 GDKclrerr();
    2884     9611101 :                 assert(maxval == NULL || seenmax);
    2885     9611101 :                 assert(minval == NULL || seenmin);
    2886     9611101 :                 assert(!b->tnil || seennil);
    2887             :         }
    2888    12838640 :         MT_lock_unset(&b->theaplock);
    2889             : }

Generated by: LCOV version 1.14