LCOV - code coverage report
Current view: top level - gdk - gdk_bbp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1716 2754 62.3 %
Date: 2024-04-26 00:35:57 Functions: 79 92 85.9 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a M. L. Kersten, P. Boncz, N. J. Nes
      15             :  * @* BAT Buffer Pool (BBP)
      16             :  * The BATs created and loaded are collected in a BAT buffer pool.
      17             :  * The Bat Buffer Pool has a number of functions:
      18             :  * @table @code
      19             :  *
      20             :  * @item administration and lookup
      21             :  * The BBP is a directory which contains status information about all
      22             :  * known BATs.  This interface may be used very heavily, by
      23             :  * data-intensive applications.  To eliminate all overhead, read-only
      24             :  * access to the BBP may be done by table-lookups. The integer index
      25             :  * type for these lookups is @emph{bat}, as retrieved by
      26             :  * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil
      27             :  * bat.
      28             :  *
      29             :  * @item persistence
      30             :  * The BBP is made persistent by saving it to the dictionary file
      31             :  * called @emph{BBP.dir} in the database.
      32             :  *
      33             :  * When the number of BATs rises, having all files in one directory
      34             :  * becomes a bottleneck.  The BBP therefore implements a scheme that
      35             :  * distributes all BATs in a growing directory tree with at most 64
      36             :  * BATs stored in one node.
      37             :  *
      38             :  * @item buffer management
      39             :  * The BBP is responsible for loading and saving of BATs to disk. It
      40             :  * also contains routines to unload BATs from memory when memory
      41             :  * resources get scarce. For this purpose, it administers BAT memory
      42             :  * reference counts (to know which BATs can be unloaded) and BAT usage
      43             :  * statistics (it unloads the least recently used BATs).
      44             :  *
      45             :  * @item recovery
      46             :  * When the database is closed or during a run-time syncpoint, the
      47             :  * system tables must be written to disk in a safe way, that is immune
      48             :  * for system failures (like disk full). To do so, the BBP implements
      49             :  * an atomic commit and recovery protocol: first all files to be
      50             :  * overwritten are moved to a BACKUP/ dir. If that succeeds, the
      51             :  * writes are done. If that also fully succeeds the BACKUP/ dir is
      52             :  * renamed to DELETE_ME/ and subsequently deleted.  If not, all files
      53             :  * in BACKUP/ are moved back to their original location.
      54             :  *
      55             :  * @item unloading
      56             :  * Bats which have a logical reference (ie. a lrefs > 0) but no memory
      57             :  * reference (refcnt == 0) can be unloaded. Unloading dirty bats
      58             :  * means, moving the original (committed version) to the BACKUP/ dir
      59             :  * and saving the bat. This complicates the commit and recovery/abort
      60             :  * issues.  The commit has to check if the bat is already moved. And
      61             :  * The recovery has to always move back the files from the BACKUP/
      62             :  * dir.
      63             :  *
      64             :  * @item reference counting
      65             :  * Bats use have two kinds of references: logical and physical
      66             :  * (pointer) ones.  The logical references are administered by
      67             :  * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix.
      68             :  */
      69             : 
      70             : #include "monetdb_config.h"
      71             : #include "gdk.h"
      72             : #include "gdk_private.h"
      73             : #include "mutils.h"
      74             : #ifdef HAVE_FCNTL_H
      75             : #include <fcntl.h>
      76             : #endif
      77             : 
      78             : #ifndef F_OK
      79             : #define F_OK 0
      80             : #endif
      81             : #ifndef S_ISDIR
      82             : #define S_ISDIR(mode)   (((mode) & _S_IFMT) == _S_IFDIR)
      83             : #endif
      84             : #ifndef O_CLOEXEC
      85             : #ifdef _O_NOINHERIT
      86             : #define O_CLOEXEC _O_NOINHERIT  /* Windows */
      87             : #else
      88             : #define O_CLOEXEC 0
      89             : #endif
      90             : #endif
      91             : #ifndef O_BINARY
      92             : #define O_BINARY 0
      93             : #endif
      94             : 
      95             : /*
      96             :  * The BBP has a fixed address, so re-allocation due to a growing BBP
      97             :  * caused by one thread does not disturb reads to the old entries by
      98             :  * another.  This is implemented using anonymous virtual memory;
      99             :  * extensions on the same address are guaranteed because a large
     100             :  * non-committed VM area is requested initially. New slots in the BBP
     101             :  * are found in O(1) by keeping a freelist that uses the 'next' field
     102             :  * in the BBPrec records.
     103             :  */
     104             : static BBPrec BBP0[BBPINIT];
     105             : BBPrec *BBP[N_BBPINIT] = {[0] = BBP0}; /* fixed base VM address of BBP array */
     106             : bat BBPlimit = BBPINIT;         /* current committed VM BBP array */
     107             : static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */
     108             : 
     109             : struct BBPfarm_t BBPfarms[MAXFARMS];
     110             : 
     111             : #define KITTENNAP 1             /* used to suspend processing */
     112             : #define BBPNONAME "."         /* filler for no name in BBP.dir */
     113             : /*
     114             :  * The hash index uses a bucket index (int array) of size mask that is
     115             :  * tuned for perfect hashing (1 lookup). The bucket chain uses the
     116             :  * 'next' field in the BBPrec records.
     117             :  */
     118             : static MT_Lock BBPnameLock = MT_LOCK_INITIALIZER(BBPnameLock);
     119             : #define BBP_mask        1023            /* number of buckets = & mask */
     120             : static bat BBP_hash[BBP_mask+1];        /* BBP logical name hash buckets */
     121             : static MT_Lock GDKcacheLock = MT_LOCK_INITIALIZER(GDKcacheLock);
     122             : static bat BBP_free;
     123             : static uint32_t BBP_nfree;
     124             : #define BBP_FREE_LOWATER        10
     125             : #define BBP_FREE_HIWATER        50
     126             : 
     127             : static gdk_return BBPfree(BAT *b);
     128             : static void BBPdestroy(BAT *b);
     129             : static void BBPuncacheit(bat bid, bool unloaddesc);
     130             : static gdk_return BBPprepare(bool subcommit);
     131             : static BAT *getBBPdescriptor(bat i);
     132             : static gdk_return BBPbackup(BAT *b, bool subcommit);
     133             : static gdk_return BBPdir_init(void);
     134             : static void BBPcallbacks(void);
     135             : 
     136             : /* two lngs of extra info in BBP.dir */
     137             : /* these two are atomic because of their use in log_new() */
     138             : static ATOMIC_TYPE BBPlogno = ATOMIC_VAR_INIT(0);
     139             : 
     140             : #define BBPtmpcheck(s)  (strncmp(s, "tmp_", 4) == 0)
     141             : 
     142             : #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
     143             : 
     144             : #define BATno_shared_heap(b) \
     145             :         (!VIEWtparent(b) && (ATOMIC_GET(&(b)->theap->refs) & HEAPREFS) == 1)
     146             : 
     147             : #define BATshared(b) \
     148             :         ((!VIEWtparent(b) && (ATOMIC_GET(&(b)->theap->refs) & HEAPREFS) > 1) || \
     149             :          ((b)->tvheap && !VIEWvtparent(b) && (ATOMIC_GET(&(b)->tvheap->refs) & HEAPREFS) > 1))
     150             : 
     151             : static void
     152       24443 : BBP_insert(bat i)
     153             : {
     154       24443 :         bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
     155             : 
     156       24443 :         BBP_next(i) = BBP_hash[idx];
     157       24443 :         BBP_hash[idx] = i;
     158       24443 : }
     159             : 
     160             : static void
     161       12312 : BBP_delete(bat i)
     162             : {
     163       12312 :         const char *s = BBP_logical(i);
     164       12312 :         bat idx = (bat) (strHash(s) & BBP_mask);
     165             : 
     166       12312 :         for (bat *h = &BBP_hash[idx]; (i = *h) != 0; h = &BBP_next(i)) {
     167       12312 :                 if (strcmp(BBP_logical(i), s) == 0) {
     168       12312 :                         *h = BBP_next(i);
     169       12312 :                         break;
     170             :                 }
     171             :         }
     172       12312 : }
     173             : 
     174             : bat
     175   484950698 : getBBPsize(void)
     176             : {
     177   484950698 :         return (bat) ATOMIC_GET(&BBPsize);
     178             : }
     179             : 
     180             : lng
     181         381 : getBBPlogno(void)
     182             : {
     183         381 :         return (lng) ATOMIC_GET(&BBPlogno);
     184             : }
     185             : 
     186             : 
     187             : /*
     188             :  * @+ BBP Consistency and Concurrency
     189             :  * While GDK provides the basic building blocks for an ACID system, in
     190             :  * itself it is not such a system, as we this would entail too much
     191             :  * overhead that is often not needed. Hence, some consistency control
     192             :  * is left to the user. The first important user constraint is that if
     193             :  * a user updates a BAT, (s)he himself must assure that no-one else
     194             :  * accesses this BAT.
     195             :  *
     196             :  * Concerning buffer management, the BBP carries out a swapping
     197             :  * policy.  BATs are kept in memory till the memory is full. If the
     198             :  * memory is full, the malloc functions initiate BBP trim actions,
     199             :  * that unload the coldest BATs that have a zero reference count. The
     200             :  * second important user constraint is therefore that a user may only
     201             :  * manipulate live BAT data in memory if it is sure that there is at
     202             :  * least one reference count to that BAT.
     203             :  *
     204             :  * The main BBP array is protected by two locks:
     205             :  * @table @code
     206             :  * @item GDKcacheLock]
     207             :  * this lock guards the free slot management in the BBP array.  The
     208             :  * BBP operations that allocate a new slot for a new BAT
     209             :  * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed
     210             :  * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold
     211             :  * this lock. It also protects all BAT (re)naming actions include
     212             :  * (read and write) in the hash table with BAT names.
     213             :  * @item GDKswapLock
     214             :  * this lock guards the swap (loaded/unloaded) status of the
     215             :  * BATs. Hence, all BBP routines that influence the swapping policy,
     216             :  * or actually carry out the swapping policy itself, acquire this lock
     217             :  * (e.g. @emph{BBPfix},@emph{BBPunfix}).  Note that this also means
     218             :  * that updates to the BBP_status indicator array must be protected by
     219             :  * GDKswapLock.
     220             :  *
     221             :  * To reduce contention GDKswapLock was split into multiple locks; it
     222             :  * is now an array of lock pointers which is accessed by
     223             :  * GDKswapLock(bat)
     224             :  * @end table
     225             :  *
     226             :  * Routines that need both locks should first acquire the locks in the
     227             :  * GDKswapLock array (in ascending order) and then GDKcacheLock (and
     228             :  * release them in reverse order).
     229             :  *
     230             :  * To obtain maximum speed, read operations to existing elements in
     231             :  * the BBP are unguarded. As said, it is the users responsibility that
     232             :  * the BAT that is being read is not being modified. BBP update
     233             :  * actions that modify the BBP data structure itself are locked by the
     234             :  * BBP functions themselves. Hence, multiple concurrent BBP read
     235             :  * operations may be ongoing while at the same time at most one BBP
     236             :  * write operation @strong{on a different BAT} is executing.  This
     237             :  * holds for accesses to the public (quasi-) arrays @emph{BBPcache},
     238             :  * @emph{BBPstatus} and @emph{BBPrefs}.
     239             :  * These arrays are called quasi as now they are
     240             :  * actually stored together in one big BBPrec array called BBP, that
     241             :  * is allocated in anonymous VM space, so we can reallocate this
     242             :  * structure without changing the base address (a crucial feature if
     243             :  * read actions are to go on unlocked while other entries in the BBP
     244             :  * may be modified).
     245             :  */
     246             : static volatile MT_Id locked_by = 0;
     247             : 
     248             : /* use a lock instead of atomic instructions so that we wait for
     249             :  * BBPlock/BBPunlock */
     250             : #define BBP_unload_inc()                        \
     251             :         do {                                    \
     252             :                 MT_lock_set(&GDKunloadLock);        \
     253             :                 BBPunloadCnt++;                 \
     254             :                 MT_lock_unset(&GDKunloadLock);      \
     255             :         } while (0)
     256             : 
     257             : #define BBP_unload_dec()                        \
     258             :         do {                                    \
     259             :                 MT_lock_set(&GDKunloadLock);        \
     260             :                 --BBPunloadCnt;                 \
     261             :                 assert(BBPunloadCnt >= 0);   \
     262             :                 MT_lock_unset(&GDKunloadLock);      \
     263             :         } while (0)
     264             : 
     265             : static int BBPunloadCnt = 0;
     266             : static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER(GDKunloadLock);
     267             : 
     268             : void
     269          24 : BBPlock(void)
     270             : {
     271          24 :         int i;
     272             : 
     273             :         /* wait for all pending unloads to finish */
     274          24 :         MT_lock_set(&GDKunloadLock);
     275          24 :         while (BBPunloadCnt > 0) {
     276           0 :                 MT_lock_unset(&GDKunloadLock);
     277           0 :                 MT_sleep_ms(1);
     278          24 :                 MT_lock_set(&GDKunloadLock);
     279             :         }
     280             : 
     281          24 :         BBPtmlock();
     282          24 :         MT_lock_set(&GDKcacheLock);
     283      196656 :         for (i = 0; i <= BBP_BATMASK; i++)
     284      196608 :                 MT_lock_set(&GDKswapLock(i));
     285          24 :         locked_by = MT_getpid();
     286             : 
     287          24 :         MT_lock_unset(&GDKunloadLock);
     288          24 : }
     289             : 
     290             : void
     291          24 : BBPunlock(void)
     292             : {
     293          24 :         int i;
     294             : 
     295      196632 :         for (i = BBP_BATMASK; i >= 0; i--)
     296      196608 :                 MT_lock_unset(&GDKswapLock(i));
     297          24 :         MT_lock_unset(&GDKcacheLock);
     298          24 :         locked_by = 0;
     299          24 :         BBPtmunlock();
     300          24 : }
     301             : 
     302             : int
     303    13331032 : BBPselectfarm(role_t role, int type, enum heaptype hptype)
     304             : {
     305    13331032 :         int i;
     306             : 
     307    13331032 :         (void) type;            /* may use in future */
     308    13331032 :         (void) hptype;          /* may use in future */
     309             : 
     310    13331032 :         if (GDKinmemory(0))
     311             :                 return 0;
     312             : 
     313             : #ifndef PERSISTENTHASH
     314             :         if (hptype == hashheap)
     315             :                 role = TRANSIENT;
     316             : #endif
     317             : #ifndef PERSISTENTIDX
     318             :         if (hptype == orderidxheap)
     319             :                 role = TRANSIENT;
     320             : #endif
     321    26325418 :         for (i = 0; i < MAXFARMS; i++)
     322    26325418 :                 if (BBPfarms[i].roles & (1U << (int) role))
     323    13323644 :                         return i;
     324             :         /* must be able to find farms for TRANSIENT and PERSISTENT */
     325           0 :         assert(role != TRANSIENT && role != PERSISTENT);
     326             :         return -1;
     327             : }
     328             : 
     329             : static gdk_return
     330         342 : BBPextend(bat newsize)
     331             : {
     332         342 :         if (newsize > N_BBPINIT * BBPINIT) {
     333           0 :                 GDKerror("trying to extend BAT pool beyond the "
     334             :                          "limit (%d)\n", N_BBPINIT * BBPINIT);
     335           0 :                 return GDK_FAIL;
     336             :         }
     337             : 
     338             :         /* make sure the new size is at least BBPsize large */
     339         343 :         while (BBPlimit < newsize) {
     340           1 :                 BUN limit = BBPlimit >> BBPINITLOG;
     341           1 :                 assert(BBP[limit] == NULL);
     342           1 :                 BBP[limit] = GDKzalloc(BBPINIT * sizeof(BBPrec));
     343           1 :                 if (BBP[limit] == NULL) {
     344           0 :                         GDKerror("failed to extend BAT pool\n");
     345           0 :                         return GDK_FAIL;
     346             :                 }
     347       16385 :                 for (BUN i = 0; i < BBPINIT; i++) {
     348       16384 :                         ATOMIC_INIT(&BBP[limit][i].status, 0);
     349       16384 :                         BBP[limit][i].pid = ~(MT_Id)0;
     350             :                 }
     351           1 :                 BBPlimit += BBPINIT;
     352             :         }
     353             : 
     354             :         return GDK_SUCCEED;
     355             : }
     356             : 
     357             : static gdk_return
     358         118 : recover_dir(int farmid, bool direxists)
     359             : {
     360         118 :         if (direxists) {
     361             :                 /* just try; don't care about these non-vital files */
     362           0 :                 if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
     363           0 :                         GDKwarning("unlink of BBP.bak failed\n");
     364           0 :                 if (GDKmove(farmid, BATDIR, "BBP", "dir", BATDIR, "BBP", "bak", false) != GDK_SUCCEED)
     365           0 :                         GDKwarning("rename of BBP.dir to BBP.bak failed\n");
     366             :         }
     367         118 :         return GDKmove(farmid, BAKDIR, "BBP", "dir", BATDIR, "BBP", "dir", true);
     368             : }
     369             : 
     370             : static inline str
     371      782955 : BBPsubdir_recursive(char *s, bat i)
     372             : {
     373      782955 :         i >>= 6;
     374      782955 :         if (i >= 0100) {
     375      189988 :                 s = BBPsubdir_recursive(s, i);
     376      190001 :                 *s++ = DIR_SEP;
     377             :         }
     378      782968 :         i &= 077;
     379      782968 :         *s++ = '0' + (i >> 3);
     380      782968 :         *s++ = '0' + (i & 7);
     381      782968 :         return s;
     382             : }
     383             : 
     384             : static inline void
     385           0 : BBPgetsubdir(char *s, bat i)
     386             : {
     387           0 :         if (i >= 0100) {
     388           0 :                 s = BBPsubdir_recursive(s, i);
     389             :         }
     390           0 :         *s = 0;
     391           0 : }
     392             : 
     393             : static inline void
     394      614627 : BBPgetfilename(char *s, size_t len, bat i)
     395             : {
     396      614627 :         if (i >= 0100) {
     397      593171 :                 char *p = BBPsubdir_recursive(s, i);
     398      593241 :                 *p++ = DIR_SEP;
     399      593241 :                 len -= (p - s);
     400      593241 :                 s = p;
     401             :         }
     402      614697 :         if (snprintf(s, len, "%o", i) >= (int) len)
     403           0 :                 TRC_CRITICAL(BAT_, "impossible error\n");
     404      614697 : }
     405             : 
     406             : static gdk_return BBPrecover(int farmid);
     407             : static gdk_return BBPrecover_subdir(void);
     408             : static bool BBPdiskscan(const char *, size_t);
     409             : 
     410             : static int
     411        8539 : vheapinit(BAT *b, const char *buf, unsigned bbpversion, const char *filename, int lineno)
     412             : {
     413        8539 :         int n = 0;
     414        8539 :         uint64_t free, size;
     415        8539 :         uint16_t storage;
     416             : 
     417        8539 :         (void) bbpversion;      /* could be used to implement compatibility */
     418             : 
     419        8539 :         size = 0;                             /* for GDKLIBRARY_HSIZE case */
     420        8539 :         storage = STORE_INVALID;              /* for GDKLIBRARY_HSIZE case */
     421       17078 :         if (bbpversion <= GDKLIBRARY_HSIZE ?
     422           0 :             sscanf(buf,
     423             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16
     424             :                    "%n",
     425             :                    &free, &size, &storage, &n) < 3 :
     426        8539 :             sscanf(buf,
     427             :                    " %" SCNu64
     428             :                    "%n",
     429             :                    &free, &n) < 1) {
     430           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     431           0 :                 return -1;
     432             :         }
     433        8539 :         if (b->batCount == 0)
     434        2470 :                 free = 0;
     435        8539 :         if (b->ttype >= 0 &&
     436        8325 :             ATOMstorage(b->ttype) == TYPE_str &&
     437        8271 :             free < GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN)
     438        7059 :                 size = GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN;
     439        1480 :         else if (free < 512)
     440         132 :                 size = 512;
     441             :         else
     442        1348 :                 size = free;
     443       17078 :         *b->tvheap = (Heap) {
     444        8539 :                 .free = (size_t) free,
     445        8539 :                 .size = (size_t) size,
     446             :                 .base = NULL,
     447             :                 .storage = STORE_INVALID,
     448             :                 .cleanhash = true,
     449             :                 .newstorage = STORE_INVALID,
     450             :                 .dirty = false,
     451        8539 :                 .parentid = b->batCacheid,
     452        8539 :                 .farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap),
     453        8539 :                 .hasfile = free > 0,
     454             :         };
     455        8539 :         strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
     456             :                       filename, ".theap", NULL);
     457        8539 :         return n;
     458             : }
     459             : 
     460             : static int
     461       32712 : heapinit(BAT *b, const char *buf,
     462             : #ifdef GDKLIBRARY_HASHASH
     463             :          int *hashash,
     464             : #endif
     465             :          unsigned bbpversion, const char *filename, int lineno)
     466             : {
     467       32712 :         int t;
     468       32712 :         char type[33];
     469       32712 :         uint16_t width;
     470       32712 :         uint16_t var;
     471       32712 :         uint16_t properties;
     472       32712 :         uint64_t nokey0;
     473       32712 :         uint64_t nokey1;
     474       32712 :         uint64_t nosorted;
     475       32712 :         uint64_t norevsorted;
     476       32712 :         uint64_t base;
     477       32712 :         uint64_t free;
     478       32712 :         uint64_t size;
     479       32712 :         uint16_t storage;
     480       32712 :         uint64_t minpos, maxpos;
     481       32712 :         int n;
     482             : 
     483       32712 :         (void) bbpversion;      /* could be used to implement compatibility */
     484             : 
     485       32712 :         minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case */
     486       32712 :         size = 0;                             /* for GDKLIBRARY_HSIZE case */
     487       32712 :         storage = STORE_INVALID;              /* for GDKLIBRARY_HSIZE case */
     488       65424 :         if (bbpversion <= GDKLIBRARY_MINMAX_POS ?
     489           0 :             sscanf(buf,
     490             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     491             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     492             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16
     493             :                    "%n",
     494             :                    type, &width, &var, &properties, &nokey0,
     495             :                    &nokey1, &nosorted, &norevsorted, &base,
     496             :                    &free, &size, &storage,
     497             :                    &n) < 12 :
     498             :             bbpversion <= GDKLIBRARY_HSIZE ?
     499           0 :             sscanf(buf,
     500             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     501             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     502             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16 " %" SCNu64 " %" SCNu64
     503             :                    "%n",
     504             :                    type, &width, &var, &properties, &nokey0,
     505             :                    &nokey1, &nosorted, &norevsorted, &base,
     506             :                    &free, &size, &storage, &minpos, &maxpos,
     507             :                    &n) < 14 :
     508       32712 :             sscanf(buf,
     509             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     510             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     511             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64
     512             :                    "%n",
     513             :                    type, &width, &var, &properties, &nokey0,
     514             :                    &nokey1, &nosorted, &norevsorted, &base,
     515             :                    &free, &minpos, &maxpos,
     516             :                    &n) < 12) {
     517           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     518           0 :                 return -1;
     519             :         }
     520             : 
     521       32712 :         if (strcmp(type, "wkba") == 0)
     522           0 :                 GDKwarning("type wkba (SQL name: GeometryA) is deprecated\n");
     523             : 
     524       32712 :         if (properties & ~0x0F81) {
     525           0 :                 TRC_CRITICAL(GDK, "unknown properties are set: incompatible database on line %d of BBP.dir\n", lineno);
     526           0 :                 return -1;
     527             :         }
     528             : #ifdef GDKLIBRARY_HASHASH
     529       32712 :         *hashash = var & 2;
     530             : #endif
     531       32712 :         var &= ~2;
     532       32712 :         if ((t = ATOMindex(type)) < 0) {
     533         241 :                 if ((t = ATOMunknown_find(type)) == 0) {
     534           0 :                         TRC_CRITICAL(GDK, "no space for atom %s", type);
     535           0 :                         return -1;
     536             :                 }
     537       40796 :         } else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) {
     538           0 :                 TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d on line %d\n", (int) b->batCacheid, lineno);
     539           0 :                 return -1;
     540       32471 :         } else if (var && t != 0 ?
     541        8325 :                    ATOMsize(t) < width ||
     542        8325 :                    (width != 1 && width != 2 && width != 4
     543             : #if SIZEOF_VAR_T == 8
     544          54 :                     && width != 8
     545             : #endif
     546             :                            ) :
     547       24146 :                    ATOMsize(t) != width) {
     548           0 :                 TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tsize mismatch for BAT %d on line %d\n", (int) b->batCacheid, lineno);
     549           0 :                 return -1;
     550             :         }
     551       32712 :         b->ttype = t;
     552       32712 :         b->twidth = width;
     553       32712 :         b->tshift = ATOMelmshift(width);
     554       32712 :         assert_shift_width(b->tshift,b->twidth);
     555       32712 :         b->tnokey[0] = (BUN) nokey0;
     556       32712 :         b->tnokey[1] = (BUN) nokey1;
     557       32712 :         b->tsorted = (bit) ((properties & 0x0001) != 0);
     558       32712 :         b->trevsorted = (bit) ((properties & 0x0080) != 0);
     559       32712 :         b->tkey = (properties & 0x0100) != 0;
     560       32712 :         b->tnonil = (properties & 0x0400) != 0;
     561       32712 :         b->tnil = (properties & 0x0800) != 0;
     562       32712 :         b->tnosorted = (BUN) nosorted;
     563       32712 :         b->tnorevsorted = (BUN) norevsorted;
     564       32712 :         b->tunique_est = 0.0;
     565             :         /* (properties & 0x0200) is the old tdense flag */
     566       32712 :         b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base;
     567       32712 :         b->theap->free = (size_t) free;
     568       32712 :         b->theap->hasfile = free > 0;
     569             :         /* set heap size to match capacity */
     570       32712 :         if (b->ttype == TYPE_msk) {
     571             :                 /* round up capacity to multiple of 32 */
     572        4994 :                 b->batCapacity = (b->batCapacity + 31) & ~((BUN) 31);
     573        4994 :                 b->theap->size = b->batCapacity / 8;
     574             :         } else {
     575       27718 :                 b->theap->size = (size_t) b->batCapacity << b->tshift;
     576             :         }
     577       32712 :         b->theap->base = NULL;
     578       32712 :         settailname(b->theap, filename, t, width);
     579       32712 :         b->theap->storage = STORE_INVALID;
     580       32712 :         b->theap->newstorage = STORE_INVALID;
     581       32712 :         b->theap->farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap);
     582       32712 :         b->theap->dirty = false;
     583       32712 :         b->theap->parentid = b->batCacheid;
     584       32712 :         if (minpos < b->batCount)
     585       12488 :                 b->tminpos = (BUN) minpos;
     586             :         else
     587       20224 :                 b->tminpos = BUN_NONE;
     588       32712 :         if (maxpos < b->batCount)
     589       12502 :                 b->tmaxpos = (BUN) maxpos;
     590             :         else
     591       20210 :                 b->tmaxpos = BUN_NONE;
     592       32712 :         if (t && var) {
     593        8539 :                 t = vheapinit(b, buf + n, bbpversion, filename, lineno);
     594        8539 :                 if (t < 0)
     595             :                         return t;
     596        8539 :                 n += t;
     597             :         } else {
     598       24173 :                 b->tvheap = NULL;
     599             :         }
     600       32712 :         return n;
     601             : }
     602             : 
     603             : /* read a single line from the BBP.dir file (file pointer fp) and fill
     604             :  * in the structure pointed to by bn and extra information through the
     605             :  * other pointers; this function does not allocate any memory; return 0
     606             :  * on end of file, 1 on success, and -1 on failure */
     607             : /* set to true during initialization, else always false; if false, do
     608             :  * not return any options (set pointer to NULL as if there aren't any);
     609             :  * if true and there are options, return them in freshly allocated
     610             :  * memory through *options */
     611             : static bool return_options = false;
     612             : int
     613       33057 : BBPreadBBPline(FILE *fp, unsigned bbpversion, int *lineno, BAT *bn,
     614             : #ifdef GDKLIBRARY_HASHASH
     615             :                int *hashash,
     616             : #endif
     617             :                char *batname, char *filename, char **options)
     618             : {
     619       33057 :         char buf[4096];
     620       33057 :         uint64_t batid;
     621       33057 :         unsigned int properties;
     622       33057 :         int nread, n;
     623       33057 :         char *s;
     624       33057 :         uint64_t count, base = 0;
     625             : 
     626       33057 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
     627         345 :                 if (ferror(fp)) {
     628           0 :                         TRC_CRITICAL(GDK, "error reading BBP.dir on line %d\n", *lineno);
     629           0 :                         return -1;
     630             :                 }
     631             :                 return 0;       /* end of file */
     632             :         }
     633       32712 :         (*lineno)++;
     634       32712 :         if ((s = strpbrk(buf, "\r\n")) != NULL) {
     635       32712 :                 if (s[0] == '\r' && s[1] != '\n') {
     636           0 :                         TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
     637           0 :                         return -1;
     638             :                 }
     639             :                 /* zap the newline */
     640       32712 :                 *s = '\0';
     641             :         } else {
     642           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d: line too long\n", *lineno);
     643           0 :                 return -1;
     644             :         }
     645             : 
     646       65424 :         if (bbpversion <= GDKLIBRARY_HSIZE ?
     647           0 :             sscanf(buf,
     648             :                    "%" SCNu64 " %*u %128s %*s %u %" SCNu64 " %*u %" SCNu64
     649             :                    "%n",
     650             :                    &batid, batname,
     651             :                    &properties, &count, &base,
     652             :                    &nread) < 5 :
     653             :             bbpversion <= GDKLIBRARY_STATUS ?
     654        5060 :             sscanf(buf,
     655             :                    "%" SCNu64 " %*u %128s %*s %u %" SCNu64 " %" SCNu64
     656             :                    "%n",
     657             :                    &batid, batname,
     658             :                    &properties, &count, &base,
     659             :                    &nread) < 5 :
     660       27652 :             sscanf(buf,
     661             :                    "%" SCNu64 " %128s %u %" SCNu64 " %" SCNu64
     662             :                    "%n",
     663             :                    &batid, batname,
     664             :                    &properties, &count, &base,
     665             :                    &nread) < 5) {
     666           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
     667           0 :                 return -1;
     668             :         }
     669             : 
     670       32712 :         if (batid >= N_BBPINIT * BBPINIT) {
     671           0 :                 TRC_CRITICAL(GDK, "bat ID (%" PRIu64 ") too large to accomodate (max %d), on line %d.", batid, N_BBPINIT * BBPINIT - 1, *lineno);
     672           0 :                 return -1;
     673             :         }
     674             : 
     675       32712 :         BBPgetfilename(filename, sizeof(BBP_physical(0)), (bat) batid);
     676             : 
     677       32712 :         bn->batCacheid = (bat) batid;
     678       32712 :         bn->batTransient = false;
     679       32712 :         bn->batCopiedtodisk = true;
     680       32712 :         switch ((properties & 0x06) >> 1) {
     681         928 :         case 0:
     682         928 :                 bn->batRestricted = BAT_WRITE;
     683         928 :                 break;
     684       31784 :         case 1:
     685       31784 :                 bn->batRestricted = BAT_READ;
     686       31784 :                 break;
     687           0 :         case 2:
     688           0 :                 bn->batRestricted = BAT_APPEND;
     689           0 :                 break;
     690           0 :         default:
     691           0 :                 TRC_CRITICAL(GDK, "incorrect batRestricted value");
     692           0 :                 return -1;
     693             :         }
     694       32712 :         bn->batCount = (BUN) count;
     695       32712 :         bn->batInserted = bn->batCount;
     696             :         /* set capacity to at least count */
     697       32712 :         bn->batCapacity = (BUN) count <= BATTINY ? BATTINY : (BUN) count;
     698             : 
     699       32712 :         if (base > (uint64_t) GDK_oid_max) {
     700           0 :                 TRC_CRITICAL(GDK, "head seqbase out of range (ID = %" PRIu64 ", seq = %" PRIu64 ") on line %d.", batid, base, *lineno);
     701           0 :                 return -1;
     702             :         }
     703       32712 :         bn->hseqbase = (oid) base;
     704       32712 :         n = heapinit(bn, buf + nread,
     705             : #ifdef GDKLIBRARY_HASHASH
     706             :                      hashash,
     707             : #endif
     708             :                      bbpversion, filename, *lineno);
     709       32712 :         if (n < 0) {
     710             :                 return -1;
     711             :         }
     712       32712 :         nread += n;
     713             : 
     714       32712 :         if (nread >= (int) sizeof(buf) || (buf[nread] != '\0' && buf[nread] != ' ')) {
     715           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
     716           0 :                 return -1;
     717             :         }
     718       32712 :         if (options) {
     719       32712 :                 if (return_options && buf[nread] == ' ') {
     720           0 :                         if ((*options = GDKstrdup(buf + nread + 1)) == NULL) {
     721           0 :                                 TRC_CRITICAL(GDK, "GDKstrdup failed\n");
     722           0 :                                 return -1;
     723             :                         }
     724             :                 } else {
     725       32712 :                         *options = NULL;
     726             :                 }
     727             :         }
     728             :         return 1;
     729             : }
     730             : 
     731             : static gdk_return
     732         340 : BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno
     733             : #ifdef GDKLIBRARY_HASHASH
     734             :                , bat **hashbats, bat *nhashbats
     735             : #endif
     736             :         )
     737             : {
     738             : #ifdef GDKLIBRARY_HASHASH
     739         340 :         bat *hbats = NULL;
     740         340 :         bat nhbats = 0;
     741             : #endif
     742             : 
     743             :         /* read the BBP.dir and insert the BATs into the BBP */
     744         340 :         return_options = true;
     745         340 :         MT_lock_set(&BBPnameLock);
     746       31472 :         for (;;) {
     747       31812 :                 BAT b;
     748       31812 :                 Heap h;
     749       31812 :                 Heap vh;
     750       31812 :                 vh = h = (Heap) {
     751             :                         .free = 0,
     752             :                 };
     753       31812 :                 b = (BAT) {
     754             :                         .theap = &h,
     755             :                         .tvheap = &vh,
     756             :                 };
     757       31812 :                 char *options;
     758       31812 :                 char headname[129];
     759       31812 :                 char filename[sizeof(BBP_physical(0))];
     760       31812 :                 char logical[1024];
     761             : #ifdef GDKLIBRARY_HASHASH
     762       31812 :                 int Thashash;
     763             : #endif
     764             : 
     765       31812 :                 switch (BBPreadBBPline(fp, bbpversion, &lineno, &b,
     766             : #ifdef GDKLIBRARY_HASHASH
     767             :                                        &Thashash,
     768             : #endif
     769             :                                        headname, filename, &options)) {
     770         340 :                 case 0:
     771             :                         /* end of file */
     772             : #ifdef GDKLIBRARY_HASHASH
     773         340 :                         *hashbats = hbats;
     774         340 :                         *nhashbats = nhbats;
     775             : #endif
     776         340 :                         return_options = false;
     777         340 :                         MT_lock_unset(&BBPnameLock);
     778         340 :                         return GDK_SUCCEED;
     779             :                 case 1:
     780             :                         /* successfully read an entry */
     781       31472 :                         break;
     782           0 :                 default:
     783             :                         /* error */
     784           0 :                         goto bailout;
     785             :                 }
     786             : 
     787       31472 :                 if (b.batCacheid >= N_BBPINIT * BBPINIT) {
     788           0 :                         GDKfree(options);
     789           0 :                         TRC_CRITICAL(GDK, "bat ID (%d) too large to accommodate (max %d), on line %d.", b.batCacheid, N_BBPINIT * BBPINIT - 1, lineno);
     790           0 :                         goto bailout;
     791             :                 }
     792             : 
     793       31472 :                 if (b.batCacheid >= (bat) ATOMIC_GET(&BBPsize)) {
     794           0 :                         if ((bat) ATOMIC_GET(&BBPsize) + 1 >= BBPlimit &&
     795           0 :                             BBPextend(b.batCacheid + 1) != GDK_SUCCEED) {
     796           0 :                                 GDKfree(options);
     797           0 :                                 goto bailout;
     798             :                         }
     799           0 :                         ATOMIC_SET(&BBPsize, b.batCacheid + 1);
     800             :                 }
     801       31472 :                 BAT *bn = BBP_desc(b.batCacheid);
     802       31472 :                 if (bn->batCacheid != 0) {
     803           0 :                         GDKfree(options);
     804           0 :                         TRC_CRITICAL(GDK, "duplicate entry in BBP.dir (ID = "
     805             :                                      "%d) on line %d.", b.batCacheid, lineno);
     806           0 :                         goto bailout;
     807             :                 }
     808             : 
     809             : #ifdef GDKLIBRARY_HASHASH
     810       31472 :                 if (Thashash) {
     811           0 :                         assert(bbpversion <= GDKLIBRARY_HASHASH);
     812           0 :                         bat *sb = GDKrealloc(hbats, ++nhbats * sizeof(bat));
     813           0 :                         if (sb == NULL) {
     814           0 :                                 GDKfree(options);
     815           0 :                                 goto bailout;
     816             :                         }
     817           0 :                         hbats = sb;
     818           0 :                         hbats[nhbats - 1] = b.batCacheid;
     819             :                 }
     820             : #endif
     821             : 
     822       31472 :                 Heap *hn;
     823       31472 :                 if ((hn = GDKmalloc(sizeof(Heap))) == NULL) {
     824           0 :                         GDKfree(options);
     825           0 :                         TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
     826           0 :                         goto bailout;
     827             :                 }
     828       31472 :                 *bn = b;
     829       31472 :                 *hn = h;
     830       31472 :                 bn->theap = hn;
     831       31472 :                 if (b.tvheap) {
     832        8214 :                         Heap *vhn;
     833        8214 :                         assert(b.tvheap == &vh);
     834        8214 :                         if ((vhn = GDKmalloc(sizeof(Heap))) == NULL) {
     835           0 :                                 GDKfree(hn);
     836           0 :                                 GDKfree(options);
     837           0 :                                 TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
     838           0 :                                 goto bailout;
     839             :                         }
     840        8214 :                         *vhn = vh;
     841        8214 :                         bn->tvheap = vhn;
     842        8214 :                         ATOMIC_INIT(&bn->tvheap->refs, 1);
     843             :                 }
     844             : 
     845       31472 :                 char name[MT_NAME_LEN];
     846       31472 :                 snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
     847       31472 :                 MT_lock_init(&bn->theaplock, name);
     848       31472 :                 snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     849       31472 :                 MT_lock_init(&bn->batIdxLock, name);
     850       31472 :                 snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
     851       31472 :                 MT_rwlock_init(&bn->thashlock, name);
     852       31472 :                 ATOMIC_INIT(&bn->theap->refs, 1);
     853             : 
     854       31472 :                 if (snprintf(BBP_bak(b.batCacheid), sizeof(BBP_bak(b.batCacheid)), "tmp_%o", (unsigned) b.batCacheid) >= (int) sizeof(BBP_bak(b.batCacheid))) {
     855           0 :                         BATdestroy(bn);
     856           0 :                         GDKfree(options);
     857           0 :                         TRC_CRITICAL(GDK, "BBP logical filename directory is too large, on line %d\n", lineno);
     858           0 :                         goto bailout;
     859             :                 }
     860       31472 :                 char *s;
     861       31472 :                 if ((s = strchr(headname, '~')) != NULL && s == headname) {
     862             :                         /* sizeof(logical) > sizeof(BBP_bak(b.batCacheid)), so
     863             :                          * this fits */
     864           0 :                         strcpy(logical, BBP_bak(b.batCacheid));
     865             :                 } else {
     866           0 :                         if (s)
     867           0 :                                 *s = 0;
     868       31472 :                         strcpy_len(logical, headname, sizeof(logical));
     869             :                 }
     870       31472 :                 if (strcmp(logical, BBP_bak(b.batCacheid)) == 0) {
     871       30710 :                         BBP_logical(b.batCacheid) = BBP_bak(b.batCacheid);
     872             :                 } else {
     873         762 :                         BBP_logical(b.batCacheid) = GDKstrdup(logical);
     874         762 :                         if (BBP_logical(b.batCacheid) == NULL) {
     875           0 :                                 BATdestroy(bn);
     876           0 :                                 GDKfree(options);
     877           0 :                                 TRC_CRITICAL(GDK, "GDKstrdup failed\n");
     878           0 :                                 goto bailout;
     879             :                         }
     880             :                 }
     881       31472 :                 strcpy_len(BBP_physical(b.batCacheid), filename, sizeof(BBP_physical(b.batCacheid)));
     882             : #ifdef __COVERITY__
     883             :                 /* help coverity */
     884             :                 BBP_physical(b.batCacheid)[sizeof(BBP_physical(b.batCacheid)) - 1] = 0;
     885             : #endif
     886       31472 :                 BBP_options(b.batCacheid) = options;
     887       31472 :                 BBP_refs(b.batCacheid) = 0;
     888       31472 :                 BBP_lrefs(b.batCacheid) = 1;    /* any BAT we encounter here is persistent, so has a logical reference */
     889       31472 :                 BBP_pid(b.batCacheid) = 0;
     890       31472 :                 BBP_status_set(b.batCacheid, BBPEXISTING);
     891       31472 :                 if (BBPnamecheck(BBP_logical(b.batCacheid)) == 0)
     892         762 :                         BBP_insert(b.batCacheid);
     893             :         }
     894             : 
     895           0 :   bailout:
     896           0 :         MT_lock_unset(&BBPnameLock);
     897           0 :         return_options = false;
     898             : #ifdef GDKLIBRARY_HASHASH
     899           0 :         GDKfree(hbats);
     900             : #endif
     901           0 :         return GDK_FAIL;
     902             : }
     903             : 
     904             : /* check that the necessary files for all BATs exist and are large
     905             :  * enough */
     906             : static gdk_return
     907         341 : BBPcheckbats(unsigned bbpversion)
     908             : {
     909         341 :         (void) bbpversion;
     910       87188 :         for (bat bid = 1, size = (bat) ATOMIC_GET(&BBPsize); bid < size; bid++) {
     911       86847 :                 struct stat statb;
     912       86847 :                 BAT *b;
     913       86847 :                 char *path;
     914             : 
     915       86847 :                 b = BBP_desc(bid);
     916       86847 :                 if (b->batCacheid == 0 || b->ttype == TYPE_void) {
     917             :                         /* no files needed */
     918       55375 :                         continue;
     919             :                 }
     920       31472 :                 if (b->theap->free > 0) {
     921       20768 :                         path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
     922       20768 :                         if (path == NULL)
     923           0 :                                 return GDK_FAIL;
     924             :                         /* first check string offset heap with width,
     925             :                          * then without */
     926       20768 :                         if (MT_stat(path, &statb) < 0) {
     927             : #ifdef GDKLIBRARY_TAILN
     928           0 :                                 if (b->ttype == TYPE_str &&
     929           0 :                                     b->twidth < SIZEOF_VAR_T) {
     930           0 :                                         size_t taillen = strlen(path) - 1;
     931           0 :                                         char tailsave = path[taillen];
     932           0 :                                         path[taillen] = 0;
     933           0 :                                         if (MT_stat(path, &statb) < 0) {
     934           0 :                                                 GDKsyserror("cannot stat file %s%c or %s (expected size %zu)\n",
     935             :                                                             path, tailsave, path, b->theap->free);
     936           0 :                                                 GDKfree(path);
     937           0 :                                                 return GDK_FAIL;
     938             :                                         }
     939             :                                 } else
     940             : #endif
     941             :                                 {
     942           0 :                                         GDKsyserror("cannot stat file %s (expected size %zu)\n",
     943             :                                                     path, b->theap->free);
     944           0 :                                         GDKfree(path);
     945           0 :                                         return GDK_FAIL;
     946             :                                 }
     947             :                         }
     948       20768 :                         if ((size_t) statb.st_size < b->theap->free) {
     949           0 :                                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->theap->free, (size_t) statb.st_size);
     950           0 :                                 GDKfree(path);
     951           0 :                                 return GDK_FAIL;
     952             :                         }
     953       20768 :                         size_t hfree = b->theap->free;
     954       20768 :                         hfree = (hfree + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     955       20768 :                         if (hfree == 0)
     956           0 :                                 hfree = GDK_mmap_pagesize;
     957       20768 :                         if (statb.st_size > (off_t) hfree) {
     958           9 :                                 int fd;
     959           9 :                                 if ((fd = MT_open(path, O_RDWR | O_CLOEXEC | O_BINARY)) >= 0) {
     960           9 :                                         if (ftruncate(fd, hfree) == -1)
     961           0 :                                                 perror("ftruncate");
     962           9 :                                         (void) close(fd);
     963             :                                 }
     964             :                         }
     965       20768 :                         GDKfree(path);
     966             :                 }
     967       31472 :                 if (b->tvheap != NULL && b->tvheap->free > 0) {
     968        5849 :                         path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap");
     969        5849 :                         if (path == NULL)
     970             :                                 return GDK_FAIL;
     971        5849 :                         if (MT_stat(path, &statb) < 0) {
     972           0 :                                 GDKsyserror("cannot stat file %s\n",
     973             :                                             path);
     974           0 :                                 GDKfree(path);
     975           0 :                                 return GDK_FAIL;
     976             :                         }
     977        5849 :                         if ((size_t) statb.st_size < b->tvheap->free) {
     978           0 :                                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->tvheap->free, (size_t) statb.st_size);
     979           0 :                                 GDKfree(path);
     980           0 :                                 return GDK_FAIL;
     981             :                         }
     982        5849 :                         size_t hfree = b->tvheap->free;
     983        5849 :                         hfree = (hfree + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     984        5849 :                         if (hfree == 0)
     985           0 :                                 hfree = GDK_mmap_pagesize;
     986        5849 :                         if (statb.st_size > (off_t) hfree) {
     987          10 :                                 int fd;
     988          10 :                                 if ((fd = MT_open(path, O_RDWR | O_CLOEXEC | O_BINARY)) >= 0) {
     989          10 :                                         if (ftruncate(fd, hfree) == -1)
     990           0 :                                                 perror("ftruncate");
     991          10 :                                         (void) close(fd);
     992             :                                 }
     993             :                         }
     994        5849 :                         GDKfree(path);
     995             :                 }
     996             :         }
     997             :         return GDK_SUCCEED;
     998             : }
     999             : 
    1000             : #ifdef HAVE_HGE
    1001             : #define SIZEOF_MAX_INT SIZEOF_HGE
    1002             : #else
    1003             : #define SIZEOF_MAX_INT SIZEOF_LNG
    1004             : #endif
    1005             : 
    1006             : unsigned
    1007         345 : BBPheader(FILE *fp, int *lineno, bat *bbpsize, lng *logno, bool allow_hge_upgrade)
    1008             : {
    1009         345 :         char buf[BUFSIZ];
    1010         345 :         int sz, ptrsize, oidsize, intsize;
    1011         345 :         unsigned bbpversion;
    1012             : 
    1013         345 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
    1014           0 :                 TRC_CRITICAL(GDK, "BBP.dir is empty");
    1015           0 :                 return 0;
    1016             :         }
    1017         345 :         ++*lineno;
    1018         345 :         if (sscanf(buf, "BBP.dir, GDKversion %u\n", &bbpversion) != 1) {
    1019           0 :                 GDKerror("old BBP without version number; "
    1020             :                          "dump the database using a compatible version, "
    1021             :                          "then restore into new database using this version.\n");
    1022           0 :                 return 0;
    1023             :         }
    1024         345 :         if (bbpversion != GDKLIBRARY &&
    1025             :             bbpversion != GDKLIBRARY_STATUS &&
    1026             :             bbpversion != GDKLIBRARY_JSON &&
    1027             :             bbpversion != GDKLIBRARY_HSIZE &&
    1028             :             bbpversion != GDKLIBRARY_HASHASH &&
    1029         345 :             bbpversion != GDKLIBRARY_TAILN &&
    1030             :             bbpversion != GDKLIBRARY_MINMAX_POS) {
    1031           0 :                 TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got 0%o. "
    1032             :                              "This database was probably created by a %s version of MonetDB.",
    1033             :                              GDKLIBRARY, bbpversion,
    1034             :                              bbpversion > GDKLIBRARY ? "newer" : "too old");
    1035           0 :                 return 0;
    1036             :         }
    1037         345 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
    1038           0 :                 TRC_CRITICAL(GDK, "short BBP");
    1039           0 :                 return 0;
    1040             :         }
    1041         345 :         ++*lineno;
    1042         345 :         if (sscanf(buf, "%d %d %d", &ptrsize, &oidsize, &intsize) != 3) {
    1043           0 :                 TRC_CRITICAL(GDK, "BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing on line %d", *lineno);
    1044           0 :                 return 0;
    1045             :         }
    1046         345 :         if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) {
    1047           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
    1048             :                              "expected pointer size %d, got %d, expected OID size %d, got %d.",
    1049             :                              SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize);
    1050           0 :                 return 0;
    1051             :         }
    1052         345 :         if (intsize > SIZEOF_MAX_INT) {
    1053           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
    1054             :                              "expected max. integer size %d, got %d.",
    1055             :                              SIZEOF_MAX_INT, intsize);
    1056           0 :                 return 0;
    1057             :         }
    1058         345 :         if (intsize < SIZEOF_MAX_INT && !allow_hge_upgrade) {
    1059           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
    1060             :                              "expected max. integer size %d, got %d; "
    1061             :                              "use --set allow_hge_upgrade=yes to upgrade.",
    1062             :                              SIZEOF_MAX_INT, intsize);
    1063           0 :                 return 0;
    1064             :         }
    1065         345 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
    1066           0 :                 TRC_CRITICAL(GDK, "short BBP");
    1067           0 :                 return 0;
    1068             :         }
    1069         345 :         ++*lineno;
    1070         345 :         if (sscanf(buf, "BBPsize=%d", &sz) != 1) {
    1071           0 :                 TRC_CRITICAL(GDK, "no BBPsize value found\n");
    1072           0 :                 return 0;
    1073             :         }
    1074         345 :         if (sz > *bbpsize)
    1075         124 :                 *bbpsize = sz;
    1076         345 :         if (bbpversion > GDKLIBRARY_MINMAX_POS) {
    1077         345 :                 if (fgets(buf, sizeof(buf), fp) == NULL) {
    1078           0 :                         TRC_CRITICAL(GDK, "short BBP");
    1079           0 :                         return 0;
    1080             :                 }
    1081         690 :                 if (bbpversion <= GDKLIBRARY_STATUS ?
    1082          16 :                     sscanf(buf, "BBPinfo=" LLSCN " %*d", logno) != 1 :
    1083         329 :                     sscanf(buf, "BBPinfo=" LLSCN, logno) != 1) {
    1084           0 :                         TRC_CRITICAL(GDK, "no info value found\n");
    1085           0 :                         return 0;
    1086             :                 }
    1087             :         } else {
    1088           0 :                 *logno = 0;
    1089             :         }
    1090         345 :         return bbpversion;
    1091             : }
    1092             : 
    1093             : bool
    1094    77500029 : GDKinmemory(int farmid)
    1095             : {
    1096    77500029 :         if (farmid == NOFARM)
    1097             :                 farmid = 0;
    1098    76461951 :         assert(farmid >= 0 && farmid < MAXFARMS);
    1099    77500029 :         return BBPfarms[farmid].dirname == NULL;
    1100             : }
    1101             : 
    1102             : /* all errors are fatal */
    1103             : gdk_return
    1104        1017 : BBPaddfarm(const char *dirname, uint32_t rolemask, bool logerror)
    1105             : {
    1106        1017 :         struct stat st;
    1107        1017 :         int i;
    1108             : 
    1109        1017 :         if (dirname == NULL) {
    1110           1 :                 assert(BBPfarms[0].dirname == NULL);
    1111           1 :                 assert(rolemask & 1);
    1112           1 :                 assert(BBPfarms[0].roles == 0);
    1113           1 :                 BBPfarms[0].roles = rolemask;
    1114           1 :                 return GDK_SUCCEED;
    1115             :         }
    1116        1016 :         if (strchr(dirname, '\n') != NULL) {
    1117           0 :                 if (logerror)
    1118           0 :                         GDKerror("no newline allowed in directory name\n");
    1119           0 :                 return GDK_FAIL;
    1120             :         }
    1121        1016 :         if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].roles != 0)) {
    1122           0 :                 if (logerror)
    1123           0 :                         GDKerror("bad rolemask\n");
    1124           0 :                 return GDK_FAIL;
    1125             :         }
    1126        1016 :         if (strcmp(dirname, "in-memory") == 0 ||
    1127        1015 :             /* backward compatibility: */ strcmp(dirname, ":memory:") == 0) {
    1128             :                 dirname = NULL;
    1129        1015 :         } else if (MT_mkdir(dirname) < 0) {
    1130         927 :                 if (errno == EEXIST) {
    1131         927 :                         if (MT_stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) {
    1132           0 :                                 if (logerror)
    1133           0 :                                         GDKerror("%s: not a directory\n", dirname);
    1134           0 :                                 return GDK_FAIL;
    1135             :                         }
    1136             :                 } else {
    1137           0 :                         if (logerror)
    1138           0 :                                 GDKsyserror("%s: cannot create directory\n", dirname);
    1139           0 :                         return GDK_FAIL;
    1140             :                 }
    1141             :         }
    1142        2021 :         for (i = 0; i < MAXFARMS; i++) {
    1143        2021 :                 if (BBPfarms[i].roles == 0) {
    1144        1016 :                         if (dirname) {
    1145        1015 :                                 BBPfarms[i].dirname = GDKstrdup(dirname);
    1146        1015 :                                 if (BBPfarms[i].dirname == NULL)
    1147             :                                         return GDK_FAIL;
    1148             :                         }
    1149        1016 :                         BBPfarms[i].roles = rolemask;
    1150        1016 :                         if ((rolemask & 1) == 0 && dirname != NULL) {
    1151             :                                 char *bbpdir;
    1152             :                                 int j;
    1153             : 
    1154        1025 :                                 for (j = 0; j < i; j++)
    1155         849 :                                         if (BBPfarms[j].dirname != NULL &&
    1156         849 :                                             strcmp(BBPfarms[i].dirname,
    1157             :                                                    BBPfarms[j].dirname) == 0)
    1158             :                                                 return GDK_SUCCEED;
    1159             :                                 /* if an extra farm, make sure we
    1160             :                                  * don't find a BBP.dir there that
    1161             :                                  * might belong to an existing
    1162             :                                  * database */
    1163         176 :                                 bbpdir = GDKfilepath(i, BATDIR, "BBP", "dir");
    1164         176 :                                 if (bbpdir == NULL) {
    1165             :                                         return GDK_FAIL;
    1166             :                                 }
    1167         176 :                                 if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
    1168           0 :                                         GDKfree(bbpdir);
    1169           0 :                                         if (logerror)
    1170           0 :                                                 GDKerror("%s is a database\n", dirname);
    1171           0 :                                         return GDK_FAIL;
    1172             :                                 }
    1173         176 :                                 GDKfree(bbpdir);
    1174         176 :                                 bbpdir = GDKfilepath(i, BAKDIR, "BBP", "dir");
    1175         176 :                                 if (bbpdir == NULL) {
    1176             :                                         return GDK_FAIL;
    1177             :                                 }
    1178         176 :                                 if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
    1179           0 :                                         GDKfree(bbpdir);
    1180           0 :                                         if (logerror)
    1181           0 :                                                 GDKerror("%s is a database\n", dirname);
    1182           0 :                                         return GDK_FAIL;
    1183             :                                 }
    1184         176 :                                 GDKfree(bbpdir);
    1185             :                         }
    1186         519 :                         return GDK_SUCCEED;
    1187             :                 }
    1188             :         }
    1189           0 :         if (logerror)
    1190           0 :                 GDKerror("too many farms\n");
    1191             :         return GDK_FAIL;
    1192             : }
    1193             : 
    1194             : gdk_return
    1195         343 : BBPchkfarms(void)
    1196             : {
    1197         343 :         const char *dir = NULL;
    1198         343 :         uint32_t rolemask = 0;
    1199         343 :         if ((BBPfarms[0].roles & 1) == 0) {
    1200           0 :                 GDKerror("Must call BBPaddfarms at least once for persistent data\n");
    1201           0 :                 return GDK_FAIL;
    1202             :         }
    1203       11319 :         for (int i = 0; i < MAXFARMS; i++) {
    1204       10976 :                 if (BBPfarms[i].roles != 0) {
    1205         674 :                         dir = BBPfarms[i].dirname;
    1206         674 :                         rolemask |= BBPfarms[i].roles;
    1207             :                 }
    1208             :         }
    1209         343 :         if (dir == NULL)
    1210           1 :                 dir = "in-memory";
    1211         343 :         if ((rolemask & (1U << TRANSIENT)) == 0) {
    1212           0 :                 gdk_return rc = BBPaddfarm(dir, 1U << TRANSIENT, true);
    1213           0 :                 if (rc != GDK_SUCCEED)
    1214             :                         return rc;
    1215             :         }
    1216         343 :         if ((rolemask & (1U << SYSTRANS)) == 0) {
    1217         343 :                 gdk_return rc = BBPaddfarm(dir, 1U << SYSTRANS, true);
    1218         343 :                 if (rc != GDK_SUCCEED)
    1219             :                         return rc;
    1220             :         }
    1221             :         return GDK_SUCCEED;
    1222             : }
    1223             : 
    1224             : #ifdef GDKLIBRARY_HASHASH
    1225             : static gdk_return
    1226           0 : fixhashashbat(BAT *b)
    1227             : {
    1228           0 :         const char *nme = BBP_physical(b->batCacheid);
    1229           0 :         char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
    1230           0 :         if (srcdir == NULL) {
    1231           0 :                 TRC_CRITICAL(GDK, "GDKfilepath failed\n");
    1232           0 :                 return GDK_FAIL;
    1233             :         }
    1234           0 :         char *s;
    1235           0 :         if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
    1236           0 :                 *s = 0;
    1237           0 :         const char *bnme;
    1238           0 :         if ((bnme = strrchr(nme, DIR_SEP)) != NULL)
    1239           0 :                 bnme++;
    1240             :         else
    1241             :                 bnme = nme;
    1242           0 :         long_str filename;
    1243           0 :         snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
    1244             : 
    1245             :         /* we don't maintain index structures */
    1246           0 :         HASHdestroy(b);
    1247           0 :         IMPSdestroy(b);
    1248           0 :         OIDXdestroy(b);
    1249           0 :         PROPdestroy(b);
    1250           0 :         STRMPdestroy(b);
    1251           0 :         RTREEdestroy(b);
    1252             : 
    1253             :         /* make backup of heaps */
    1254           0 :         const char *t;
    1255           0 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "tail1",
    1256             :                     BAKDIR, bnme, "tail1", false) == GDK_SUCCEED)
    1257             :                 t = "tail1";
    1258           0 :         else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail2",
    1259             :                          BAKDIR, bnme, "tail2", false) == GDK_SUCCEED)
    1260             :                 t = "tail2";
    1261             : #if SIZEOF_VAR_T == 8
    1262           0 :         else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail4",
    1263             :                          BAKDIR, bnme, "tail4", false) == GDK_SUCCEED)
    1264             :                 t = "tail4";
    1265             : #endif
    1266           0 :         else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
    1267             :                          BAKDIR, bnme, "tail", true) == GDK_SUCCEED)
    1268             :                 t = "tail";
    1269             :         else {
    1270           0 :                 GDKfree(srcdir);
    1271           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
    1272           0 :                 return GDK_FAIL;
    1273             :         }
    1274           0 :         GDKclrerr();
    1275           0 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
    1276             :                     BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
    1277           0 :                 GDKfree(srcdir);
    1278           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
    1279           0 :                 return GDK_FAIL;
    1280             :         }
    1281             :         /* load old heaps */
    1282           0 :         Heap h1 = *b->theap; /* old heap */
    1283           0 :         h1.base = NULL;
    1284           0 :         h1.dirty = false;
    1285           0 :         strconcat_len(h1.filename, sizeof(h1.filename), filename, ".", t, NULL);
    1286           0 :         if (HEAPload(&h1, filename, t, false) != GDK_SUCCEED) {
    1287           0 :                 GDKfree(srcdir);
    1288           0 :                 TRC_CRITICAL(GDK, "loading old tail heap "
    1289             :                              "for BAT %d failed\n", b->batCacheid);
    1290           0 :                 return GDK_FAIL;
    1291             :         }
    1292           0 :         Heap vh1 = *b->tvheap;       /* old heap */
    1293           0 :         vh1.base = NULL;
    1294           0 :         vh1.dirty = false;
    1295           0 :         strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL);
    1296           0 :         if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
    1297           0 :                 GDKfree(srcdir);
    1298           0 :                 HEAPfree(&h1, false);
    1299           0 :                 TRC_CRITICAL(GDK, "loading old string heap "
    1300             :                              "for BAT %d failed\n", b->batCacheid);
    1301           0 :                 return GDK_FAIL;
    1302             :         }
    1303             : 
    1304             :         /* create new heaps */
    1305           0 :         Heap *h2 = GDKmalloc(sizeof(Heap));
    1306           0 :         Heap *vh2 = GDKmalloc(sizeof(Heap));
    1307           0 :         if (h2 == NULL || vh2 == NULL) {
    1308           0 :                 GDKfree(h2);
    1309           0 :                 GDKfree(vh2);
    1310           0 :                 GDKfree(srcdir);
    1311           0 :                 HEAPfree(&h1, false);
    1312           0 :                 HEAPfree(&vh1, false);
    1313           0 :                 TRC_CRITICAL(GDK, "allocating new heaps "
    1314             :                              "for BAT %d failed\n", b->batCacheid);
    1315           0 :                 return GDK_FAIL;
    1316             :         }
    1317           0 :         *h2 = *b->theap;
    1318           0 :         h2->base = NULL;
    1319           0 :         if (HEAPalloc(h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
    1320           0 :                 GDKfree(h2);
    1321           0 :                 GDKfree(vh2);
    1322           0 :                 GDKfree(srcdir);
    1323           0 :                 HEAPfree(&h1, false);
    1324           0 :                 HEAPfree(&vh1, false);
    1325           0 :                 TRC_CRITICAL(GDK, "allocating new tail heap "
    1326             :                              "for BAT %d failed\n", b->batCacheid);
    1327           0 :                 return GDK_FAIL;
    1328             :         }
    1329           0 :         h2->dirty = true;
    1330           0 :         h2->free = h1.free;
    1331             : 
    1332           0 :         *vh2 = *b->tvheap;
    1333           0 :         strconcat_len(vh2->filename, sizeof(vh2->filename), nme, ".theap", NULL);
    1334           0 :         strHeap(vh2, b->batCapacity);
    1335           0 :         if (vh2->base == NULL) {
    1336           0 :                 GDKfree(srcdir);
    1337           0 :                 HEAPfree(&h1, false);
    1338           0 :                 HEAPfree(&vh1, false);
    1339           0 :                 HEAPfree(h2, false);
    1340           0 :                 GDKfree(h2);
    1341           0 :                 GDKfree(vh2);
    1342           0 :                 TRC_CRITICAL(GDK, "allocating new string heap "
    1343             :                              "for BAT %d failed\n", b->batCacheid);
    1344           0 :                 return GDK_FAIL;
    1345             :         }
    1346           0 :         vh2->dirty = true;
    1347           0 :         ATOMIC_INIT(&h2->refs, 1);
    1348           0 :         ATOMIC_INIT(&vh2->refs, 1);
    1349           0 :         Heap *ovh = b->tvheap;
    1350           0 :         b->tvheap = vh2;
    1351           0 :         vh2 = NULL;             /* no longer needed */
    1352           0 :         for (BUN i = 0; i < b->batCount; i++) {
    1353           0 :                 var_t o;
    1354           0 :                 switch (b->twidth) {
    1355           0 :                 case 1:
    1356           0 :                         o = (var_t) ((uint8_t *) h1.base)[i] + GDK_VAROFFSET;
    1357           0 :                         break;
    1358           0 :                 case 2:
    1359           0 :                         o = (var_t) ((uint16_t *) h1.base)[i] + GDK_VAROFFSET;
    1360           0 :                         break;
    1361             : #if SIZEOF_VAR_T == 8
    1362           0 :                 case 4:
    1363           0 :                         o = (var_t) ((uint32_t *) h1.base)[i];
    1364           0 :                         break;
    1365             : #endif
    1366           0 :                 default:
    1367           0 :                         o = ((var_t *) h1.base)[i];
    1368           0 :                         break;
    1369             :                 }
    1370           0 :                 const char *s = vh1.base + o;
    1371           0 :                 var_t no = strPut(b, &o, s);
    1372           0 :                 if (no == 0) {
    1373           0 :                         HEAPfree(&h1, false);
    1374           0 :                         HEAPfree(&vh1, false);
    1375           0 :                         HEAPdecref(h2, false);
    1376           0 :                         HEAPdecref(b->tvheap, false);
    1377           0 :                         b->tvheap = ovh;
    1378           0 :                         GDKfree(srcdir);
    1379           0 :                         TRC_CRITICAL(GDK, "storing string value "
    1380             :                                      "for BAT %d failed\n", b->batCacheid);
    1381           0 :                         return GDK_FAIL;
    1382             :                 }
    1383           0 :                 assert(no >= GDK_VAROFFSET);
    1384           0 :                 switch (b->twidth) {
    1385           0 :                 case 1:
    1386           0 :                         no -= GDK_VAROFFSET;
    1387           0 :                         assert(no <= 0xFF);
    1388           0 :                         ((uint8_t *) h2->base)[i] = (uint8_t) no;
    1389           0 :                         break;
    1390           0 :                 case 2:
    1391           0 :                         no -= GDK_VAROFFSET;
    1392           0 :                         assert(no <= 0xFFFF);
    1393           0 :                         ((uint16_t *) h2->base)[i] = (uint16_t) no;
    1394           0 :                         break;
    1395             : #if SIZEOF_VAR_T == 8
    1396           0 :                 case 4:
    1397           0 :                         assert(no <= 0xFFFFFFFF);
    1398           0 :                         ((uint32_t *) h2->base)[i] = (uint32_t) no;
    1399           0 :                         break;
    1400             : #endif
    1401           0 :                 default:
    1402           0 :                         ((var_t *) h2->base)[i] = no;
    1403           0 :                         break;
    1404             :                 }
    1405             :         }
    1406             : 
    1407             :         /* cleanup */
    1408           0 :         HEAPfree(&h1, false);
    1409           0 :         HEAPfree(&vh1, false);
    1410           0 :         if (HEAPsave(h2, nme, BATtailname(b), true, h2->free, NULL) != GDK_SUCCEED) {
    1411           0 :                 HEAPdecref(h2, false);
    1412           0 :                 HEAPdecref(b->tvheap, false);
    1413           0 :                 b->tvheap = ovh;
    1414           0 :                 GDKfree(srcdir);
    1415           0 :                 TRC_CRITICAL(GDK, "saving heap failed\n");
    1416           0 :                 return GDK_FAIL;
    1417             :         }
    1418           0 :         if (HEAPsave(b->tvheap, nme, "theap", true, b->tvheap->free, &b->theaplock) != GDK_SUCCEED) {
    1419           0 :                 HEAPfree(b->tvheap, false);
    1420           0 :                 b->tvheap = ovh;
    1421           0 :                 GDKfree(srcdir);
    1422           0 :                 TRC_CRITICAL(GDK, "saving string heap failed\n");
    1423           0 :                 return GDK_FAIL;
    1424             :         }
    1425           0 :         HEAPdecref(b->theap, false);
    1426           0 :         b->theap = h2;
    1427           0 :         HEAPfree(h2, false);
    1428           0 :         HEAPdecref(ovh, false);
    1429           0 :         HEAPfree(b->tvheap, false);
    1430           0 :         GDKfree(srcdir);
    1431           0 :         return GDK_SUCCEED;
    1432             : }
    1433             : 
    1434             : static gdk_return
    1435           0 : fixhashash(bat *hashbats, bat nhashbats)
    1436             : {
    1437           0 :         for (bat i = 0; i < nhashbats; i++) {
    1438           0 :                 bat bid = hashbats[i];
    1439           0 :                 BAT *b = BBP_desc(bid);
    1440           0 :                 if (b->batCacheid == 0) {
    1441             :                         /* not a valid BAT (shouldn't happen) */
    1442           0 :                         continue;
    1443             :                 }
    1444           0 :                 if (fixhashashbat(b) != GDK_SUCCEED)
    1445             :                         return GDK_FAIL;
    1446             :         }
    1447             :         return GDK_SUCCEED;
    1448             : }
    1449             : #endif
    1450             : 
    1451             : #ifdef GDKLIBRARY_TAILN
    1452             : static gdk_return
    1453           0 : movestrbats(void)
    1454             : {
    1455           0 :         for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++) {
    1456           0 :                 BAT *b = BBP_desc(bid);
    1457           0 :                 if (b->batCacheid == 0) {
    1458             :                         /* not a valid BAT */
    1459           0 :                         continue;
    1460             :                 }
    1461           0 :                 if (b->ttype != TYPE_str || b->twidth == SIZEOF_VAR_T || b->batCount == 0)
    1462           0 :                         continue;
    1463           0 :                 char *oldpath = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail");
    1464           0 :                 char *newpath = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
    1465           0 :                 int ret = -1;
    1466           0 :                 if (oldpath != NULL && newpath != NULL) {
    1467           0 :                         struct stat oldst, newst;
    1468           0 :                         bool oldexist = MT_stat(oldpath, &oldst) == 0;
    1469           0 :                         bool newexist = MT_stat(newpath, &newst) == 0;
    1470           0 :                         if (newexist) {
    1471           0 :                                 if (oldexist) {
    1472           0 :                                         if (oldst.st_mtime > newst.st_mtime) {
    1473           0 :                                                 GDKerror("both %s and %s exist with %s unexpectedly newer: manual intervention required\n", oldpath, newpath, oldpath);
    1474           0 :                                                 ret = -1;
    1475             :                                         } else {
    1476           0 :                                                 GDKwarning("both %s and %s exist, removing %s\n", oldpath, newpath, oldpath);
    1477           0 :                                                 ret = MT_remove(oldpath);
    1478             :                                         }
    1479             :                                 } else {
    1480             :                                         /* already good */
    1481             :                                         ret = 0;
    1482             :                                 }
    1483           0 :                         } else if (oldexist) {
    1484           0 :                                 TRC_DEBUG(IO_, "rename %s to %s\n", oldpath, newpath);
    1485           0 :                                 ret = MT_rename(oldpath, newpath);
    1486             :                         } else {
    1487             :                                 /* neither file exists: may be ok, but
    1488             :                                  * will be checked later */
    1489             :                                 ret = 0;
    1490             :                         }
    1491             :                 }
    1492           0 :                 GDKfree(oldpath);
    1493           0 :                 GDKfree(newpath);
    1494           0 :                 if (ret == -1)
    1495             :                         return GDK_FAIL;
    1496             :         }
    1497             :         return GDK_SUCCEED;
    1498             : }
    1499             : #endif
    1500             : 
    1501             : #ifdef GDKLIBRARY_JSON
    1502             : static gdk_return
    1503           2 : jsonupgradebat(BAT *b, json_storage_conversion fixJSONStorage)
    1504             : {
    1505           2 :         const char *nme = BBP_physical(b->batCacheid);
    1506           2 :         char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
    1507             : 
    1508           2 :         if (srcdir == NULL) {
    1509           0 :                 TRC_CRITICAL(GDK, "GDKfilepath failed\n");
    1510           0 :                 return GDK_FAIL;
    1511             :         }
    1512             : 
    1513           2 :         char *s;
    1514           2 :         if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
    1515           2 :                 *s = 0;
    1516           2 :         const char *bnme;
    1517           2 :         if ((bnme = strrchr(nme, DIR_SEP)) != NULL) {
    1518           2 :                 bnme++;
    1519             :         } else {
    1520             :                 bnme = nme;
    1521             :         }
    1522             : 
    1523           2 :         long_str filename;
    1524           2 :         snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
    1525             : 
    1526             :         /* A json column should not normally have any index structures */
    1527           2 :         HASHdestroy(b);
    1528           2 :         IMPSdestroy(b);
    1529           2 :         OIDXdestroy(b);
    1530           2 :         PROPdestroy(b);
    1531           2 :         STRMPdestroy(b);
    1532           2 :         RTREEdestroy(b);
    1533             : 
    1534             :         /* backup the current heaps */
    1535           2 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
    1536             :                     BAKDIR, bnme, "tail", false) != GDK_SUCCEED) {
    1537           0 :                 GDKfree(srcdir);
    1538           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
    1539           0 :                 return GDK_FAIL;
    1540             :         }
    1541           2 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
    1542             :                     BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
    1543           0 :                 GDKfree(srcdir);
    1544           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
    1545           0 :                 return GDK_FAIL;
    1546             :         }
    1547             : 
    1548             :         /* load the old heaps */
    1549           2 :         Heap h1 = *b->theap;
    1550           2 :         h1.base = NULL;
    1551           2 :         h1.dirty = false;
    1552           2 :         strconcat_len(h1.filename, sizeof(h1.filename), filename, ".tail", NULL);
    1553           2 :         if (HEAPload(&h1, filename, "tail", false) != GDK_SUCCEED) {
    1554           0 :                 GDKfree(srcdir);
    1555           0 :                 TRC_CRITICAL(GDK, "loading old tail heap "
    1556             :                              "for BAT %d failed\n", b->batCacheid);
    1557           0 :                 return GDK_FAIL;
    1558             :         }
    1559             : 
    1560           2 :         Heap vh1 = *b->tvheap;
    1561           2 :         vh1.base = NULL;
    1562           2 :         vh1.dirty = false;
    1563           2 :         strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL);
    1564           2 :         if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
    1565           0 :                 GDKfree(srcdir);
    1566           0 :                 HEAPfree(&h1, false);
    1567           0 :                 TRC_CRITICAL(GDK, "loading old string heap "
    1568             :                              "for BAT %d failed\n", b->batCacheid);
    1569           0 :                 return GDK_FAIL;
    1570             :         }
    1571             : 
    1572             :         /* create the new heaps */
    1573           2 :         Heap *h2 = GDKmalloc(sizeof(Heap));
    1574           2 :         Heap *vh2 = GDKmalloc(sizeof(Heap));
    1575           2 :         if (h2 == NULL || vh2 == NULL) {
    1576           0 :                 GDKfree(h2);
    1577           0 :                 GDKfree(vh2);
    1578           0 :                 GDKfree(srcdir);
    1579           0 :                 HEAPfree(&h1, false);
    1580           0 :                 HEAPfree(&vh1, false);
    1581           0 :                 TRC_CRITICAL(GDK, "allocating new heaps "
    1582             :                              "for BAT %d failed\n", b->batCacheid);
    1583           0 :                 return GDK_FAIL;
    1584             :         }
    1585           2 :         *h2 = *b->theap;
    1586           2 :         h2->base = NULL;
    1587           2 :         if (HEAPalloc(h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
    1588           0 :                 GDKfree(h2);
    1589           0 :                 GDKfree(vh2);
    1590           0 :                 GDKfree(srcdir);
    1591           0 :                 HEAPfree(&h1, false);
    1592           0 :                 HEAPfree(&vh1, false);
    1593           0 :                 TRC_CRITICAL(GDK, "allocating new tail heap "
    1594             :                              "for BAT %d failed\n", b->batCacheid);
    1595           0 :                 return GDK_FAIL;
    1596             : 
    1597             :         }
    1598           2 :         h2->dirty = true;
    1599           2 :         h2->free = h1.free;
    1600             : 
    1601           2 :         *vh2 = *b->tvheap;
    1602           2 :         strconcat_len(vh2->filename, sizeof(vh2->filename), nme, ".theap", NULL);
    1603           2 :         strHeap(vh2, b->batCapacity);
    1604           2 :         if (vh2->base == NULL) {
    1605           0 :                 GDKfree(srcdir);
    1606           0 :                 HEAPfree(&h1, false);
    1607           0 :                 HEAPfree(&vh1, false);
    1608           0 :                 HEAPfree(h2, false);
    1609           0 :                 GDKfree(h2);
    1610           0 :                 GDKfree(vh2);
    1611           0 :                 TRC_CRITICAL(GDK, "allocating new string heap "
    1612             :                              "for BAT %d failed\n", b->batCacheid);
    1613           0 :                 return GDK_FAIL;
    1614             :         }
    1615           2 :         vh2->dirty = true;
    1616           2 :         ATOMIC_INIT(&h2->refs, 1);
    1617           2 :         ATOMIC_INIT(&vh2->refs, 1);
    1618           2 :         Heap *ovh = b->tvheap;
    1619           2 :         b->tvheap = vh2;
    1620           2 :         vh2 = NULL;
    1621             : 
    1622           6 :         for (BUN i = 0; i < b->batCount; i++) {
    1623           4 :                 var_t o = ((var_t *) h1.base)[i];
    1624           4 :                 const char *s = vh1.base + o;
    1625           4 :                 char *ns;
    1626           4 :                 if (fixJSONStorage(&ns, &s) != GDK_SUCCEED) {
    1627           0 :                         GDKfree(srcdir);
    1628           0 :                         HEAPfree(&h1, false);
    1629           0 :                         HEAPfree(&vh1, false);
    1630           0 :                         HEAPdecref(h2, false);
    1631           0 :                         HEAPdecref(b->tvheap, false);
    1632           0 :                         b->tvheap = ovh;
    1633           0 :                         TRC_CRITICAL(GDK, "converting value "
    1634             :                                      "in BAT %d failed\n", b->batCacheid);
    1635           0 :                         return GDK_FAIL;
    1636             :                 }
    1637           4 :                 var_t no = strPut(b, &o, ns);
    1638           4 :                 GDKfree(ns);
    1639           4 :                 if (no == 0) {
    1640           0 :                         GDKfree(srcdir);
    1641           0 :                         HEAPfree(&h1, false);
    1642           0 :                         HEAPfree(&vh1, false);
    1643           0 :                         HEAPdecref(h2, false);
    1644           0 :                         HEAPdecref(b->tvheap, false);
    1645           0 :                         b->tvheap = ovh;
    1646           0 :                         TRC_CRITICAL(GDK, "storing new value "
    1647             :                                      "in BAT %d failed\n", b->batCacheid);
    1648           0 :                         return GDK_FAIL;
    1649             : 
    1650             :                 }
    1651           4 :                 ((var_t *)h2->base)[i] = no;
    1652             :         }
    1653             : 
    1654             :         /* cleanup */
    1655           2 :         HEAPfree(&h1, false);
    1656           2 :         HEAPfree(&vh1, false);
    1657           2 :         if (HEAPsave(h2, nme, BATtailname(b), true, h2->free, NULL) !=
    1658             :             GDK_SUCCEED) {
    1659           0 :                 HEAPdecref(h2, false);
    1660           0 :                 HEAPdecref(b->tvheap, false);
    1661           0 :                 b->tvheap = ovh;
    1662           0 :                 GDKfree(srcdir);
    1663           0 :                 TRC_CRITICAL(GDK, "saving heap failed\n");
    1664           0 :                 return GDK_FAIL;
    1665             :         }
    1666             : 
    1667           2 :         if (HEAPsave(b->tvheap, nme, "theap", true, b->tvheap->free,
    1668             :                      &b->theaplock) != GDK_SUCCEED) {
    1669           0 :                 HEAPfree(b->tvheap, false);
    1670           0 :                 b->tvheap = ovh;
    1671           0 :                 GDKfree(srcdir);
    1672           0 :                 TRC_CRITICAL(GDK, "saving string failed\n");
    1673           0 :                 return GDK_FAIL;
    1674             :         }
    1675             : 
    1676           2 :         HEAPdecref(b->theap, false);
    1677           2 :         b->theap = h2;
    1678           2 :         HEAPfree(h2, false);
    1679           2 :         HEAPdecref(ovh, false);
    1680           2 :         HEAPfree(b->tvheap, false);
    1681           2 :         GDKfree(srcdir);
    1682             : 
    1683           2 :         return GDK_SUCCEED;
    1684             : }
    1685             : 
    1686             : gdk_return
    1687           8 : BBPjson_upgrade(json_storage_conversion fixJSONStorage)
    1688             : {
    1689           8 :         bat bid;
    1690           8 :         int JSON_type = ATOMindex("json");
    1691           8 :         bat nbat = (bat) ATOMIC_GET(&BBPsize);
    1692           8 :         bat *upd = GDKmalloc(sizeof(bat) * (size_t) nbat);
    1693           8 :         int nupd = 0;
    1694             : 
    1695           8 :         if (upd == NULL) {
    1696           0 :                 TRC_CRITICAL(GDK, "could not create bat\n");
    1697           0 :                 return GDK_FAIL;
    1698             :         }
    1699           8 :         upd[nupd++] = 0;        /* first entry unused */
    1700             : 
    1701           8 :         BBPlock();
    1702             : 
    1703       10544 :         for (bid = 1; bid < nbat; bid++) {
    1704       10528 :                 BAT *b = BBP_desc(bid);
    1705       10528 :                 if (b->batCacheid == 0) {
    1706             :                         /* not a valid BAT */
    1707        7954 :                         continue;
    1708             :                 }
    1709             : 
    1710        2574 :                 if (b->ttype < 0) {
    1711          56 :                         const char *nme;
    1712             : 
    1713          56 :                         nme = ATOMunknown_name(b->ttype);
    1714          56 :                         if (strcmp(nme, "json") != 0)
    1715          54 :                                 continue;
    1716        2518 :                 } else if (b->ttype != JSON_type) {
    1717        2518 :                         continue;
    1718             :                 }
    1719           2 :                 fprintf(stderr, "Upgrading json bat %d\n", bid);
    1720           2 :                 if (jsonupgradebat(b, fixJSONStorage) != GDK_SUCCEED) {
    1721           0 :                         BBPunlock();
    1722           0 :                         GDKfree(upd);
    1723           0 :                         return GDK_FAIL;
    1724             :                 }
    1725           2 :                 upd[nupd++] = bid;
    1726             :         }
    1727           8 :         BBPunlock();
    1728          10 :         if (nupd > 1 &&
    1729           2 :             TMsubcommit_list(upd, NULL, nupd, -1) != GDK_SUCCEED) {
    1730           0 :                 TRC_CRITICAL(GDK, "failed to commit changes\n");
    1731           0 :                 GDKfree(upd);
    1732           0 :                 return GDK_FAIL;
    1733             :         }
    1734           8 :         GDKfree(upd);
    1735           8 :         return GDK_SUCCEED;
    1736             : }
    1737             : #endif
    1738             : 
    1739             : static bool
    1740          34 : BBPtrim(bool aggressive, bat nbat)
    1741             : {
    1742          34 :         int n = 0;
    1743          34 :         int waitctr = 0;
    1744          34 :         bool changed = false;
    1745          34 :         unsigned flag = BBPUNLOADING | BBPSYNCING | BBPSAVING;
    1746          34 :         if (!aggressive)
    1747          34 :                 flag |= BBPHOT;
    1748          34 :         lng t0 = GDKusec();
    1749       40124 :         for (bat bid = 1; bid < nbat && !GDKexiting(); bid++) {
    1750             :                 /* quick check to see if we might possibly have to do
    1751             :                  * work (includes free bats) */
    1752       40090 :                 if ((BBP_status(bid) & BBPLOADED) == 0)
    1753        7026 :                         continue;
    1754             :                 /* don't do this during a (sub)commit */
    1755       33064 :                 BBPtmlock();
    1756       33064 :                 MT_lock_set(&GDKswapLock(bid));
    1757       33064 :                 BAT *b = NULL;
    1758       33064 :                 bool swap = false;
    1759       33064 :                 if ((BBP_status(bid) & (flag | BBPLOADED)) == BBPLOADED &&
    1760        3974 :                     BBP_refs(bid) == 0 &&
    1761        3974 :                     BBP_lrefs(bid) != 0 &&
    1762        3974 :                     (b = BBP_desc(bid))->batCacheid != 0) {
    1763        3974 :                         MT_lock_set(&b->theaplock);
    1764        3974 :                         if (!BATshared(b) &&
    1765        3971 :                             !isVIEW(b) &&
    1766        3944 :                             (!BATdirty(b) ||
    1767           0 :                              (aggressive &&
    1768           0 :                               b->theap->storage == STORE_MMAP &&
    1769           0 :                               (b->tvheap == NULL ||
    1770           0 :                                b->tvheap->storage == STORE_MMAP)) ||
    1771        2638 :                              (b->batRole == PERSISTENT &&
    1772        2570 :                               BBP_lrefs(bid) <= 2))) {
    1773        2093 :                                 BBP_status_on(bid, BBPUNLOADING);
    1774        2093 :                                 swap = true;
    1775        2880 :                                 waitctr += BATdirty(b) ? 9 : 1;
    1776             :                         }
    1777        3974 :                         MT_lock_unset(&b->theaplock);
    1778             :                 }
    1779       33064 :                 MT_lock_unset(&GDKswapLock(bid));
    1780       33064 :                 if (swap) {
    1781        2093 :                         TRC_DEBUG(BAT_, "unload and free bat %d\n", bid);
    1782        2093 :                         if (BBPfree(b) != GDK_SUCCEED)
    1783           0 :                                 GDKerror("unload failed for bat %d", bid);
    1784        2093 :                         n++;
    1785        2093 :                         changed = true;
    1786             :                 }
    1787       33064 :                 BBPtmunlock();
    1788             :                 /* every once in a while, give others a chance */
    1789       33064 :                 if (++waitctr >= 1000) {
    1790          34 :                         waitctr = 0;
    1791          34 :                         MT_sleep_ms(2);
    1792             :                 }
    1793             :         }
    1794          34 :         if (n > 0)
    1795          13 :                 TRC_INFO(BAT_, "unloaded %d bats in "LLFMT" usec%s\n", n, GDKusec() - t0, aggressive ? " (also hot)" : "");
    1796          34 :         return changed;
    1797             : }
    1798             : 
    1799             : static void
    1800         341 : BBPmanager(void *dummy)
    1801             : {
    1802         341 :         (void) dummy;
    1803         341 :         bool changed = true;
    1804             : 
    1805         375 :         for (;;) {
    1806         375 :                 int n = 0;
    1807         375 :                 bat nbat = (bat) ATOMIC_GET(&BBPsize);
    1808         375 :                 MT_thread_setworking("clearing HOT bits");
    1809      185102 :                 for (bat bid = 1; bid < nbat; bid++) {
    1810      184727 :                         MT_lock_set(&GDKswapLock(bid));
    1811      184727 :                         if (BBP_refs(bid) == 0 && BBP_lrefs(bid) != 0) {
    1812       51726 :                                 n += (BBP_status(bid) & BBPHOT) != 0;
    1813       51726 :                                 BBP_status_off(bid, BBPHOT);
    1814             :                         }
    1815      184727 :                         MT_lock_unset(&GDKswapLock(bid));
    1816             :                 }
    1817         375 :                 TRC_DEBUG(BAT_, "cleared HOT bit from %d bats\n", n);
    1818         375 :                 size_t cur = GDKvm_cursize();
    1819         375 :                 MT_thread_setworking("sleeping");
    1820        9279 :                 for (int i = 0, n = changed && cur > GDK_vm_maxsize / 2 ? 1 : cur > GDK_vm_maxsize / 4 ? 10 : 100; i < n; i++) {
    1821        8870 :                         MT_sleep_ms(100);
    1822        8868 :                         if (GDKexiting())
    1823             :                                 return;
    1824             :                 }
    1825          34 :                 MT_thread_setworking("BBPtrim");
    1826          34 :                 changed = BBPtrim(false, nbat);
    1827          34 :                 MT_thread_setworking("BBPcallbacks");
    1828          34 :                 BBPcallbacks();
    1829          34 :                 if (GDKexiting())
    1830             :                         return;
    1831             :         }
    1832             : }
    1833             : 
    1834             : static MT_Id manager;
    1835             : 
    1836             : gdk_return
    1837         341 : BBPinit(bool allow_hge_upgrade)
    1838             : {
    1839         341 :         FILE *fp = NULL;
    1840         341 :         struct stat st;
    1841         341 :         unsigned bbpversion = 0;
    1842         341 :         int i;
    1843         341 :         int lineno = 0;
    1844             : #ifdef GDKLIBRARY_HASHASH
    1845         341 :         bat *hashbats = NULL;
    1846         341 :         bat nhashbats = 0;
    1847         341 :         gdk_return res = GDK_SUCCEED;
    1848             : #endif
    1849         341 :         ATOMIC_BASE_TYPE dbg = ATOMIC_GET(&GDKdebug);
    1850             : 
    1851         341 :         ATOMIC_AND(&GDKdebug, ~TAILCHKMASK);
    1852             : 
    1853             :         /* the maximum number of BATs allowed in the system and the
    1854             :          * size of the "physical" array are linked in a complicated
    1855             :          * manner.  The expression below shows the relationship */
    1856         341 :         static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * ((sizeof(BBP[0][0].physical) + 2) * 2 / 5))), "\"physical\" array in BBPrec is too small");
    1857             :         /* similarly, the maximum number of BATs allowed also has a
    1858             :          * (somewhat simpler) relation with the size of the "bak"
    1859             :          * array */
    1860         341 :         static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * (sizeof(BBP[0][0].bak) - 5))), "\"bak\" array in BBPrec is too small");
    1861             : 
    1862         341 :         if (!GDKinmemory(0)) {
    1863         340 :                 str bbpdirstr, backupbbpdirstr;
    1864             : 
    1865         340 :                 BBPtmlock();
    1866             : 
    1867         340 :                 if ((bbpdirstr = GDKfilepath(0, BATDIR, "BBP", "dir")) == NULL) {
    1868           0 :                         TRC_CRITICAL(GDK, "GDKmalloc failed\n");
    1869           0 :                         BBPtmunlock();
    1870           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1871           0 :                         return GDK_FAIL;
    1872             :                 }
    1873             : 
    1874         340 :                 if ((backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP", "dir")) == NULL) {
    1875           0 :                         GDKfree(bbpdirstr);
    1876           0 :                         TRC_CRITICAL(GDK, "GDKmalloc failed\n");
    1877           0 :                         BBPtmunlock();
    1878           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1879           0 :                         return GDK_FAIL;
    1880             :                 }
    1881             : 
    1882         340 :                 if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) {
    1883           0 :                         GDKfree(bbpdirstr);
    1884           0 :                         GDKfree(backupbbpdirstr);
    1885           0 :                         TRC_CRITICAL(GDK, "cannot remove directory %s\n", TEMPDIR);
    1886           0 :                         BBPtmunlock();
    1887           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1888           0 :                         return GDK_FAIL;
    1889             :                 }
    1890             : 
    1891         340 :                 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) {
    1892           0 :                         GDKfree(bbpdirstr);
    1893           0 :                         GDKfree(backupbbpdirstr);
    1894           0 :                         TRC_CRITICAL(GDK, "cannot remove directory %s\n", DELDIR);
    1895           0 :                         BBPtmunlock();
    1896           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1897           0 :                         return GDK_FAIL;
    1898             :                 }
    1899             : 
    1900             :                 /* first move everything from SUBDIR to BAKDIR (its parent) */
    1901         340 :                 if (BBPrecover_subdir() != GDK_SUCCEED) {
    1902           0 :                         GDKfree(bbpdirstr);
    1903           0 :                         GDKfree(backupbbpdirstr);
    1904           0 :                         TRC_CRITICAL(GDK, "cannot properly recover_subdir process %s.", SUBDIR);
    1905           0 :                         BBPtmunlock();
    1906           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1907           0 :                         return GDK_FAIL;
    1908             :                 }
    1909             : 
    1910             :                 /* try to obtain a BBP.dir from bakdir */
    1911         340 :                 if (MT_stat(backupbbpdirstr, &st) == 0) {
    1912             :                         /* backup exists; *must* use it */
    1913         118 :                         if (recover_dir(0, MT_stat(bbpdirstr, &st) == 0) != GDK_SUCCEED) {
    1914           0 :                                 GDKfree(bbpdirstr);
    1915           0 :                                 GDKfree(backupbbpdirstr);
    1916           0 :                                 BBPtmunlock();
    1917           0 :                                 goto bailout;
    1918             :                         }
    1919         118 :                         if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1920           0 :                                 GDKfree(bbpdirstr);
    1921           0 :                                 GDKfree(backupbbpdirstr);
    1922           0 :                                 TRC_CRITICAL(GDK, "cannot open recovered BBP.dir.");
    1923           0 :                                 BBPtmunlock();
    1924           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    1925           0 :                                 return GDK_FAIL;
    1926             :                         }
    1927         222 :                 } else if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1928             :                         /* there was no BBP.dir either. Panic! try to use a
    1929             :                          * BBP.bak */
    1930         221 :                         if (MT_stat(backupbbpdirstr, &st) < 0) {
    1931             :                                 /* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir):
    1932             :                                  * create a new one */
    1933         221 :                                 TRC_DEBUG(IO_, "initializing BBP.\n");
    1934         221 :                                 if (BBPdir_init() != GDK_SUCCEED) {
    1935           0 :                                         GDKfree(bbpdirstr);
    1936           0 :                                         GDKfree(backupbbpdirstr);
    1937           0 :                                         BBPtmunlock();
    1938           0 :                                         goto bailout;
    1939             :                                 }
    1940           0 :                         } else if (GDKmove(0, BATDIR, "BBP", "bak", BATDIR, "BBP", "dir", true) == GDK_SUCCEED)
    1941           0 :                                 TRC_DEBUG(IO_, "reverting to dir saved in BBP.bak.\n");
    1942             : 
    1943         221 :                         if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1944           0 :                                 GDKsyserror("cannot open BBP.dir");
    1945           0 :                                 GDKfree(bbpdirstr);
    1946           0 :                                 GDKfree(backupbbpdirstr);
    1947           0 :                                 BBPtmunlock();
    1948           0 :                                 goto bailout;
    1949             :                         }
    1950             :                 }
    1951             :                 assert(fp != NULL);
    1952         340 :                 GDKfree(bbpdirstr);
    1953         340 :                 GDKfree(backupbbpdirstr);
    1954         340 :                 BBPtmunlock();
    1955             :         }
    1956             : 
    1957             :         /* scan the BBP.dir to obtain current size */
    1958         341 :         BBPlimit = BBPINIT;
    1959         341 :         memset(BBP0, 0, sizeof(BBP0));
    1960         341 :         memset(BBP, 0, sizeof(BBP));
    1961         341 :         BBP[0] = BBP0;
    1962             : 
    1963         341 :         bat bbpsize;
    1964         341 :         bbpsize = 1;
    1965         341 :         if (GDKinmemory(0)) {
    1966             :                 bbpversion = GDKLIBRARY;
    1967             :         } else {
    1968         340 :                 lng logno;
    1969         340 :                 bbpversion = BBPheader(fp, &lineno, &bbpsize, &logno, allow_hge_upgrade);
    1970         340 :                 if (bbpversion == 0) {
    1971           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1972           0 :                         return GDK_FAIL;
    1973             :                 }
    1974         340 :                 assert(bbpversion > GDKLIBRARY_MINMAX_POS || logno == 0);
    1975         340 :                 ATOMIC_SET(&BBPlogno, logno);
    1976             :         }
    1977             : 
    1978             :         /* allocate BBP records */
    1979         341 :         if (BBPextend(bbpsize) != GDK_SUCCEED) {
    1980           0 :                 ATOMIC_SET(&GDKdebug, dbg);
    1981           0 :                 return GDK_FAIL;
    1982             :         }
    1983         341 :         ATOMIC_SET(&BBPsize, bbpsize);
    1984             : 
    1985         341 :         if (!GDKinmemory(0)) {
    1986         340 :                 if (BBPreadEntries(fp, bbpversion, lineno
    1987             : #ifdef GDKLIBRARY_HASHASH
    1988             :                                    , &hashbats, &nhashbats
    1989             : #endif
    1990             :                             ) != GDK_SUCCEED) {
    1991           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1992           0 :                         return GDK_FAIL;
    1993             :                 }
    1994         340 :                 fclose(fp);
    1995             :         }
    1996             : 
    1997             :         /* remove trailing free bats from potential free list (they will
    1998             :          * get added when needed) */
    1999       16657 :         for (bat i = (bat) ATOMIC_GET(&BBPsize) - 1; i > 0; i--) {
    2000       16435 :                 if (BBP_desc(i)->batCacheid != 0)
    2001             :                         break;
    2002       16316 :                 bbpsize--;
    2003             :         }
    2004         341 :         ATOMIC_SET(&BBPsize, bbpsize);
    2005             : 
    2006             :         /* add free bats to free list in such a way that low numbered
    2007             :          * ones are at the head of the list */
    2008       87188 :         for (bat i = (bat) ATOMIC_GET(&BBPsize) - 1; i > 0; i--) {
    2009       86847 :                 if (BBP_desc(i)->batCacheid == 0) {
    2010       55375 :                         BBP_next(i) = BBP_free;
    2011       55375 :                         BBP_free = i;
    2012       55375 :                         BBP_nfree++;
    2013             :                 }
    2014             :         }
    2015             : 
    2016             :         /* will call BBPrecover if needed */
    2017         341 :         if (!GDKinmemory(0)) {
    2018         340 :                 BBPtmlock();
    2019         340 :                 gdk_return rc = BBPprepare(false);
    2020         340 :                 BBPtmunlock();
    2021         340 :                 if (rc != GDK_SUCCEED) {
    2022             : #ifdef GDKLIBRARY_HASHASH
    2023           0 :                         GDKfree(hashbats);
    2024             : #endif
    2025           0 :                         TRC_CRITICAL(GDK, "cannot properly prepare process %s.", BAKDIR);
    2026           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    2027           0 :                         return rc;
    2028             :                 }
    2029             :         }
    2030             : 
    2031         341 :         if (BBPcheckbats(bbpversion) != GDK_SUCCEED) {
    2032             : #ifdef GDKLIBRARY_HASHASH
    2033           0 :                 GDKfree(hashbats);
    2034             : #endif
    2035           0 :                 ATOMIC_SET(&GDKdebug, dbg);
    2036           0 :                 return GDK_FAIL;
    2037             :         }
    2038             : 
    2039             : #ifdef GDKLIBRARY_TAILN
    2040         341 :         char *needstrbatmove;
    2041         341 :         if (GDKinmemory(0)) {
    2042             :                 needstrbatmove = NULL;
    2043             :         } else {
    2044         340 :                 if ((needstrbatmove = GDKfilepath(0, BATDIR, "needstrbatmove", NULL)) == NULL) {
    2045             : #ifdef GDKLIBRARY_HASHASH
    2046           0 :                         GDKfree(hashbats);
    2047             : #endif
    2048           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    2049           0 :                         return GDK_FAIL;
    2050             :                 }
    2051         340 :                 if (bbpversion <= GDKLIBRARY_TAILN) {
    2052             :                         /* create signal file that we need to rename string
    2053             :                          * offset heaps */
    2054           0 :                         int fd = MT_open(needstrbatmove, O_WRONLY | O_CREAT);
    2055           0 :                         if (fd < 0) {
    2056           0 :                                 TRC_CRITICAL(GDK, "cannot create signal file needstrbatmove.\n");
    2057           0 :                                 GDKfree(needstrbatmove);
    2058             : #ifdef GDKLIBRARY_HASHASH
    2059           0 :                                 GDKfree(hashbats);
    2060             : #endif
    2061           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2062           0 :                                 return GDK_FAIL;
    2063             :                         }
    2064           0 :                         close(fd);
    2065             :                 } else {
    2066             :                         /* check signal file whether we need to rename string
    2067             :                          * offset heaps */
    2068         340 :                         int fd = MT_open(needstrbatmove, O_RDONLY);
    2069         340 :                         if (fd >= 0) {
    2070             :                                 /* yes, we do */
    2071           0 :                                 close(fd);
    2072         340 :                         } else if (errno == ENOENT) {
    2073             :                                 /* no, we don't: set var to NULL */
    2074         340 :                                 GDKfree(needstrbatmove);
    2075         340 :                                 needstrbatmove = NULL;
    2076             :                         } else {
    2077           0 :                                 GDKsyserror("unexpected error opening %s\n", needstrbatmove);
    2078           0 :                                 GDKfree(needstrbatmove);
    2079             : #ifdef GDKLIBRARY_HASHASH
    2080           0 :                                 GDKfree(hashbats);
    2081             : #endif
    2082           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2083           0 :                                 return GDK_FAIL;
    2084             :                         }
    2085             :                 }
    2086             :         }
    2087             : #endif
    2088             : 
    2089             : #ifdef GDKLIBRARY_HASHASH
    2090         341 :         if (nhashbats > 0)
    2091           0 :                 res = fixhashash(hashbats, nhashbats);
    2092         341 :         GDKfree(hashbats);
    2093         341 :         if (res != GDK_SUCCEED)
    2094             :                 return res;
    2095             : #endif
    2096             : 
    2097             : #ifdef GDKLIBRARY_JSON
    2098         341 :         if (bbpversion <= GDKLIBRARY_JSON) {
    2099           8 :                 char *jsonupgradestr;
    2100           8 :                 if (GDKinmemory(0)) {
    2101         341 :                         jsonupgradestr = NULL;
    2102             :                 } else {
    2103           8 :                         if ((jsonupgradestr = GDKfilepath(0, BATDIR, "jsonupgradeneeded", NULL)) == NULL) {
    2104           0 :                                 TRC_CRITICAL(GDK, "GDKfilepath failed\n");
    2105           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2106           0 :                                 return GDK_FAIL;
    2107             :                         }
    2108             : 
    2109             :                         /* create signal file that we need to upgrade
    2110             :                          * stored json strings. This will be performed
    2111             :                          * by an upgrade function in the GDK that will
    2112             :                          * be called at the end of the json module
    2113             :                          * initialzation with a callback that actually
    2114             :                          * knows how to perform the upgrade. */
    2115           8 :                         int fd = MT_open(jsonupgradestr, O_WRONLY | O_CREAT);
    2116           8 :                         GDKfree(jsonupgradestr);
    2117           8 :                         if (fd < 0) {
    2118           0 :                                 TRC_CRITICAL(GDK, "cannot create signal file jsonupgradeneeded");
    2119           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2120           0 :                                 return GDK_FAIL;
    2121             :                         }
    2122             : 
    2123           8 :                         close(fd);
    2124             :                 }
    2125             :         }
    2126             : #endif
    2127             : 
    2128         341 :         if (bbpversion < GDKLIBRARY && TMcommit() != GDK_SUCCEED) {
    2129           0 :                 TRC_CRITICAL(GDK, "TMcommit failed\n");
    2130           0 :                 ATOMIC_SET(&GDKdebug, dbg);
    2131           0 :                 return GDK_FAIL;
    2132             :         }
    2133             : 
    2134             : #ifdef GDKLIBRARY_TAILN
    2135             :         /* we rename the offset heaps after the above commit: in this
    2136             :          * version we accept both the old and new names, but we want to
    2137             :          * convert so that future versions only have the new name */
    2138         341 :         if (needstrbatmove) {
    2139             :                 /* note, if renaming fails, nothing is lost: a next
    2140             :                  * invocation will just try again; an older version of
    2141             :                  * mserver will not work because of the TMcommit
    2142             :                  * above */
    2143           0 :                 if (movestrbats() != GDK_SUCCEED) {
    2144           0 :                         GDKfree(needstrbatmove);
    2145           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    2146           0 :                         return GDK_FAIL;
    2147             :                 }
    2148           0 :                 MT_remove(needstrbatmove);
    2149           0 :                 GDKfree(needstrbatmove);
    2150           0 :                 needstrbatmove = NULL;
    2151             :         }
    2152             : #endif
    2153         341 :         ATOMIC_SET(&GDKdebug, dbg);
    2154             : 
    2155             :         /* cleanup any leftovers (must be done after BBPrecover) */
    2156        1350 :         for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) {
    2157             :                 int j;
    2158        1361 :                 for (j = 0; j < i; j++) {
    2159             :                         /* don't clean a directory twice */
    2160         845 :                         if (BBPfarms[j].dirname &&
    2161         845 :                             strcmp(BBPfarms[i].dirname,
    2162             :                                    BBPfarms[j].dirname) == 0)
    2163             :                                 break;
    2164             :                 }
    2165        1009 :                 if (j == i) {
    2166         516 :                         char *d = GDKfilepath(i, NULL, BATDIR, NULL);
    2167         516 :                         if (d == NULL) {
    2168             :                                 return GDK_FAIL;
    2169             :                         }
    2170         516 :                         BBPdiskscan(d, strlen(d) - strlen(BATDIR));
    2171         516 :                         GDKfree(d);
    2172             :                 }
    2173             :         }
    2174             : 
    2175         341 :         if (MT_create_thread(&manager, BBPmanager, NULL, MT_THR_DETACHED, "BBPmanager") < 0) {
    2176           0 :                 TRC_CRITICAL(GDK, "Could not start BBPmanager thread.");
    2177           0 :                 return GDK_FAIL;
    2178             :         }
    2179             :         return GDK_SUCCEED;
    2180             : 
    2181           0 :   bailout:
    2182             :         /* now it is time for real panic */
    2183           0 :         TRC_CRITICAL(GDK, "could not write %s%cBBP.dir.", BATDIR, DIR_SEP);
    2184           0 :         return GDK_FAIL;
    2185             : }
    2186             : 
    2187             : /*
    2188             :  * During the exit phase all non-persistent BATs are removed.  Upon
    2189             :  * exit the status of the BBP tables is saved on disk.  This function
    2190             :  * is called once and during the shutdown of the server. Since
    2191             :  * shutdown may be issued from any thread (dangerous) it may lead to
    2192             :  * interference in a parallel session.
    2193             :  */
    2194             : 
    2195             : static int backup_files = 0, backup_dir = 0, backup_subdir = 0;
    2196             : static char *lockfile = NULL;
    2197             : 
    2198             : void
    2199         339 : BBPexit(void)
    2200             : {
    2201         339 :         bat i;
    2202         339 :         bool skipped;
    2203             : 
    2204             :         //BBPlock();    /* stop all threads ever touching more descriptors */
    2205             : 
    2206             :         /* free all memory (just for leak-checking in Purify) */
    2207         339 :         do {
    2208         339 :                 skipped = false;
    2209      640308 :                 for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
    2210      639969 :                         if (BBPvalid(i)) {
    2211      514694 :                                 BAT *b = BBP_desc(i);
    2212             : 
    2213      514694 :                                 if (b->batCacheid != 0) {
    2214      514694 :                                         if (BATshared(b)) {
    2215           0 :                                                 skipped = true;
    2216           0 :                                                 continue;
    2217             :                                         }
    2218      514694 :                                         MT_lock_set(&b->theaplock);
    2219      514694 :                                         bat tp = VIEWtparent(b);
    2220           0 :                                         if (tp != 0) {
    2221           0 :                                                 --BBP_lrefs(tp);
    2222           0 :                                                 HEAPdecref(b->theap, false);
    2223           0 :                                                 b->theap = NULL;
    2224             :                                         }
    2225      514694 :                                         tp = VIEWvtparent(b);
    2226           0 :                                         if (tp != 0) {
    2227           0 :                                                 --BBP_lrefs(tp);
    2228           0 :                                                 HEAPdecref(b->tvheap, false);
    2229           0 :                                                 b->tvheap = NULL;
    2230             :                                         }
    2231      514694 :                                         if (b->oldtail) {
    2232           9 :                                                 Heap *h = b->oldtail;
    2233           9 :                                                 b->oldtail = NULL;
    2234           9 :                                                 ATOMIC_AND(&h->refs, ~DELAYEDREMOVE);
    2235           9 :                                                 HEAPdecref(h, false);
    2236             :                                         }
    2237      514694 :                                         PROPdestroy_nolock(b);
    2238      514694 :                                         MT_lock_unset(&b->theaplock);
    2239      514694 :                                         BATfree(b);
    2240             :                                 }
    2241      514694 :                                 BBP_pid(i) = 0;
    2242      514694 :                                 BBPuncacheit(i, true);
    2243      514694 :                                 if (BBP_logical(i) != BBP_bak(i))
    2244       12050 :                                         GDKfree(BBP_logical(i));
    2245      514694 :                                 BBP_logical(i) = NULL;
    2246             :                         }
    2247             :                 }
    2248         339 :         } while (skipped);
    2249             :         /* these need to be NULL, otherwise no new ones get created */
    2250         339 :         memset(BBP_hash, 0, sizeof(BBP_hash));
    2251         339 :         backup_files = 0;
    2252         339 :         backup_dir = 0;
    2253         339 :         backup_subdir = 0;
    2254         339 :         if (lockfile) {
    2255         338 :                 GDKfree(lockfile);
    2256         338 :                 lockfile = NULL;
    2257             :         }
    2258         339 : }
    2259             : 
    2260             : /*
    2261             :  * The routine BBPdir creates the BAT pool dictionary file.  It
    2262             :  * includes some information about the current state of affair in the
    2263             :  * pool.  The location in the buffer pool is saved for later use as
    2264             :  * well.  This is merely done for ease of debugging and of no
    2265             :  * importance to front-ends.  The tail of non-used entries is
    2266             :  * reclaimed as well.
    2267             :  */
    2268             : static inline int
    2269     1525362 : heap_entry(FILE *fp, BATiter *bi, BUN size)
    2270             : {
    2271     1525362 :         size_t free = bi->hfree;
    2272     1525362 :         if (size < BUN_NONE) {
    2273     1525362 :                 if ((bi->type >= 0 && ATOMstorage(bi->type) == TYPE_msk))
    2274      240010 :                         free = ((size + 31) / 32) * 4;
    2275     1285352 :                 else if (bi->width > 0)
    2276     1285352 :                         free = size << bi->shift;
    2277             :                 else
    2278             :                         free = 0;
    2279             :         }
    2280             : 
    2281     4913601 :         return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
    2282             :                        BUNFMT " " OIDFMT " %zu %" PRIu64" %" PRIu64,
    2283     1525362 :                        bi->type >= 0 ? BATatoms[bi->type].name : ATOMunknown_name(bi->type),
    2284     1525362 :                        bi->width,
    2285     1525362 :                        bi->type == TYPE_void || bi->vh != NULL,
    2286     1525362 :                        (unsigned short) bi->sorted |
    2287     1525362 :                            ((unsigned short) bi->revsorted << 7) |
    2288     3050724 :                            ((unsigned short) bi->key << 8) |
    2289     1525362 :                            ((unsigned short) BATtdensebi(bi) << 9) |
    2290     1525362 :                            ((unsigned short) bi->nonil << 10) |
    2291     1525362 :                            ((unsigned short) bi->nil << 11),
    2292      896378 :                        bi->nokey[0] >= size || bi->nokey[1] >= size ? 0 : bi->nokey[0],
    2293     1525362 :                        bi->nokey[0] >= size || bi->nokey[1] >= size ? 0 : bi->nokey[1],
    2294     1525362 :                        bi->nosorted >= size ? 0 : bi->nosorted,
    2295     1525362 :                        bi->norevsorted >= size ? 0 : bi->norevsorted,
    2296             :                        bi->tseq,
    2297             :                        free,
    2298     1525362 :                        bi->minpos < size ? (uint64_t) bi->minpos : (uint64_t) oid_nil,
    2299     1525362 :                        bi->maxpos < size ? (uint64_t) bi->maxpos : (uint64_t) oid_nil);
    2300             : }
    2301             : 
    2302             : static inline int
    2303     1525362 : vheap_entry(FILE *fp, BATiter *bi, BUN size)
    2304             : {
    2305     1525362 :         (void) size;
    2306     1525362 :         if (bi->vh == NULL)
    2307             :                 return 0;
    2308      345816 :         return fprintf(fp, " %zu", size == 0 ? 0 : bi->vhfree);
    2309             : }
    2310             : 
    2311             : static gdk_return
    2312     1525362 : new_bbpentry(FILE *fp, bat i, BUN size, BATiter *bi)
    2313             : {
    2314             : #ifndef NDEBUG
    2315     1525362 :         assert(i > 0);
    2316     1525362 :         assert(i < (bat) ATOMIC_GET(&BBPsize));
    2317     1525362 :         assert(bi->b);
    2318     1525362 :         assert(bi->b->batCacheid == i);
    2319     1525362 :         assert(bi->b->batRole == PERSISTENT);
    2320     1525362 :         assert(0 <= bi->h->farmid && bi->h->farmid < MAXFARMS);
    2321     1525362 :         assert(BBPfarms[bi->h->farmid].roles & (1U << PERSISTENT));
    2322     1525362 :         if (bi->vh) {
    2323      345816 :                 assert(0 <= bi->vh->farmid && bi->vh->farmid < MAXFARMS);
    2324      345816 :                 assert(BBPfarms[bi->vh->farmid].roles & (1U << PERSISTENT));
    2325             :         }
    2326     1525362 :         assert(size <= bi->count || size == BUN_NONE);
    2327     1525362 :         assert(BBP_options(i) == NULL || strpbrk(BBP_options(i), "\r\n") == NULL);
    2328             : #endif
    2329             : 
    2330     1525362 :         if (BBP_options(i) != NULL && strpbrk(BBP_options(i), "\r\n") != NULL) {
    2331           0 :                 GDKerror("options for bat %d contains a newline\n", i);
    2332           0 :                 return GDK_FAIL;
    2333             :         }
    2334     1525362 :         if (size > bi->count)
    2335             :                 size = bi->count;
    2336     1525362 :         if (fprintf(fp, "%d %s %d " BUNFMT " " OIDFMT,
    2337             :                     /* BAT info */
    2338             :                     (int) i,
    2339             :                     BBP_logical(i),
    2340     1525362 :                     (unsigned) bi->restricted << 1,
    2341             :                     size,
    2342     1525362 :                     bi->b->hseqbase) < 0 ||
    2343     3050724 :             heap_entry(fp, bi, size) < 0 ||
    2344     1525362 :             vheap_entry(fp, bi, size) < 0 ||
    2345     3050724 :             (BBP_options(i) && fprintf(fp, " %s", BBP_options(i)) < 0) ||
    2346     1525362 :             fprintf(fp, "\n") < 0) {
    2347           0 :                 GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n");
    2348           0 :                 return GDK_FAIL;
    2349             :         }
    2350             : 
    2351             :         return GDK_SUCCEED;
    2352             : }
    2353             : 
    2354             : static gdk_return
    2355       10980 : BBPdir_header(FILE *f, int n, lng logno)
    2356             : {
    2357       10980 :         if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\nBBPinfo=" LLFMT "\n",
    2358             :                     GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID,
    2359             : #ifdef HAVE_HGE
    2360             :                     SIZEOF_HGE
    2361             : #else
    2362             :                     SIZEOF_LNG
    2363             : #endif
    2364       10980 :                     , n, logno) < 0 ||
    2365       10980 :             ferror(f)) {
    2366           0 :                 GDKsyserror("Writing BBP.dir header failed\n");
    2367           0 :                 return GDK_FAIL;
    2368             :         }
    2369             :         return GDK_SUCCEED;
    2370             : }
    2371             : 
    2372             : static gdk_return
    2373       10980 : BBPdir_first(bool subcommit, lng logno, FILE **obbpfp, FILE **nbbpfp)
    2374             : {
    2375       10980 :         FILE *obbpf = NULL, *nbbpf = NULL;
    2376       10980 :         int n = 0;
    2377       10980 :         lng ologno;
    2378             : 
    2379       10980 :         if (obbpfp)
    2380       10759 :                 *obbpfp = NULL;
    2381       10980 :         *nbbpfp = NULL;
    2382             : 
    2383       10980 :         if ((nbbpf = GDKfilelocate(0, "BBP", "w", "dir")) == NULL) {
    2384             :                 return GDK_FAIL;
    2385             :         }
    2386             : 
    2387       10980 :         if (subcommit) {
    2388       10743 :                 char buf[512];
    2389             : 
    2390       10743 :                 assert(obbpfp != NULL);
    2391             :                 /* we need to copy the backup BBP.dir to the new, but
    2392             :                  * replacing the entries for the subcommitted bats */
    2393       10743 :                 if ((obbpf = GDKfileopen(0, SUBDIR, "BBP", "dir", "r")) == NULL &&
    2394           0 :                     (obbpf = GDKfileopen(0, BAKDIR, "BBP", "dir", "r")) == NULL) {
    2395           0 :                         GDKsyserror("subcommit attempted without backup BBP.dir");
    2396           0 :                         goto bailout;
    2397             :                 }
    2398             :                 /* read first three lines */
    2399       21486 :                 if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */
    2400       21486 :                     fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */
    2401       10743 :                     fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */
    2402           0 :                         GDKerror("subcommit attempted with invalid backup BBP.dir.");
    2403           0 :                         goto bailout;
    2404             :                 }
    2405             :                 /* third line contains BBPsize */
    2406       10743 :                 if (sscanf(buf, "BBPsize=%d", &n) != 1) {
    2407           0 :                         GDKerror("cannot read BBPsize in backup BBP.dir.");
    2408           0 :                         goto bailout;
    2409             :                 }
    2410             :                 /* fourth line contains BBPinfo */
    2411       10743 :                 if (fgets(buf, sizeof(buf), obbpf) == NULL ||
    2412       10743 :                     sscanf(buf, "BBPinfo=" LLSCN, &ologno) != 1) {
    2413           0 :                         GDKerror("cannot read BBPinfo in backup BBP.dir.");
    2414           0 :                         goto bailout;
    2415             :                 }
    2416             :         }
    2417             : 
    2418       10980 :         if (n < (bat) ATOMIC_GET(&BBPsize))
    2419        2692 :                 n = (bat) ATOMIC_GET(&BBPsize);
    2420             : 
    2421       10980 :         TRC_DEBUG(IO_, "writing BBP.dir (%d bats).\n", n);
    2422             : 
    2423       10980 :         if (BBPdir_header(nbbpf, n, logno) != GDK_SUCCEED) {
    2424           0 :                 goto bailout;
    2425             :         }
    2426             : 
    2427       10980 :         if (obbpfp)
    2428       10759 :                 *obbpfp = obbpf;
    2429       10980 :         *nbbpfp = nbbpf;
    2430             : 
    2431       10980 :         return GDK_SUCCEED;
    2432             : 
    2433           0 :   bailout:
    2434           0 :         if (obbpf != NULL)
    2435           0 :                 fclose(obbpf);
    2436           0 :         if (nbbpf != NULL)
    2437           0 :                 fclose(nbbpf);
    2438           0 :         return GDK_FAIL;
    2439             : }
    2440             : 
    2441             : static bat
    2442     1723109 : BBPdir_step(bat bid, BUN size, int n, char *buf, size_t bufsize,
    2443             :             FILE **obbpfp, FILE *nbbpf, BATiter *bi)
    2444             : {
    2445     1723109 :         if (n < -1)          /* safety catch */
    2446             :                 return n;
    2447     4412941 :         while (n >= 0 && n < bid) {
    2448     2689832 :                 if (n > 0) {
    2449     1225912 :                         if (fputs(buf, nbbpf) == EOF) {
    2450           0 :                                 GDKerror("Writing BBP.dir file failed.\n");
    2451           0 :                                 goto bailout;
    2452             :                         }
    2453             :                 }
    2454     2689832 :                 if (fgets(buf, (int) bufsize, *obbpfp) == NULL) {
    2455        2781 :                         if (ferror(*obbpfp)) {
    2456           0 :                                 GDKerror("error reading backup BBP.dir.");
    2457           0 :                                 goto bailout;
    2458             :                         }
    2459        2781 :                         n = -1;
    2460        2781 :                         if (fclose(*obbpfp) == EOF) {
    2461           0 :                                 GDKsyserror("Closing backup BBP.dir file failed\n");
    2462           0 :                                 GDKclrerr(); /* ignore error */
    2463             :                         }
    2464        2781 :                         *obbpfp = NULL;
    2465             :                 } else {
    2466     2687051 :                         if (sscanf(buf, "%d", &n) != 1 || n <= 0 || n >= N_BBPINIT * BBPINIT) {
    2467           0 :                                 GDKerror("subcommit attempted with invalid backup BBP.dir.");
    2468           0 :                                 goto bailout;
    2469             :                         }
    2470             :                 }
    2471             :         }
    2472     1723109 :         if (bi) {
    2473     1525362 :                 assert(BBP_status(bid) & BBPPERSISTENT);
    2474     1525362 :                 if (new_bbpentry(nbbpf, bid, size, bi) != GDK_SUCCEED)
    2475           0 :                         goto bailout;
    2476             :         }
    2477     1723109 :         return n == -1 ? -1 : n == bid ? 0 : n;
    2478             : 
    2479           0 :   bailout:
    2480           0 :         if (*obbpfp)
    2481           0 :                 fclose(*obbpfp);
    2482           0 :         fclose(nbbpf);
    2483           0 :         return -2;
    2484             : }
    2485             : 
    2486             : static gdk_return
    2487       10980 : BBPdir_last(int n, char *buf, size_t bufsize, FILE *obbpf, FILE *nbbpf)
    2488             : {
    2489       10980 :         if (n > 0 && fputs(buf, nbbpf) == EOF) {
    2490           0 :                 GDKerror("Writing BBP.dir file failed.\n");
    2491           0 :                 goto bailout;
    2492             :         }
    2493      295318 :         while (obbpf) {
    2494      292300 :                 if (fgets(buf, (int) bufsize, obbpf) == NULL) {
    2495        7962 :                         if (ferror(obbpf)) {
    2496           0 :                                 GDKerror("error reading backup BBP.dir.");
    2497           0 :                                 goto bailout;
    2498             :                         }
    2499        7962 :                         if (fclose(obbpf) == EOF) {
    2500           0 :                                 GDKsyserror("Closing backup BBP.dir file failed\n");
    2501           0 :                                 GDKclrerr(); /* ignore error */
    2502             :                         }
    2503             :                         obbpf = NULL;
    2504             :                 } else {
    2505      284338 :                         if (fputs(buf, nbbpf) == EOF) {
    2506           0 :                                 GDKerror("Writing BBP.dir file failed.\n");
    2507           0 :                                 goto bailout;
    2508             :                         }
    2509             :                 }
    2510             :         }
    2511       10980 :         if (fflush(nbbpf) == EOF ||
    2512       10980 :             (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
    2513             : #if defined(NATIVE_WIN32)
    2514             :              && _commit(_fileno(nbbpf)) < 0
    2515             : #elif defined(HAVE_FDATASYNC)
    2516           6 :              && fdatasync(fileno(nbbpf)) < 0
    2517             : #elif defined(HAVE_FSYNC)
    2518             :              && fsync(fileno(nbbpf)) < 0
    2519             : #endif
    2520             :                     )) {
    2521           0 :                 GDKsyserror("Syncing BBP.dir file failed\n");
    2522           0 :                 goto bailout;
    2523             :         }
    2524       10980 :         if (fclose(nbbpf) == EOF) {
    2525           0 :                 GDKsyserror("Closing BBP.dir file failed\n");
    2526           0 :                 nbbpf = NULL;   /* can't close again */
    2527           0 :                 goto bailout;
    2528             :         }
    2529             : 
    2530       10980 :         TRC_DEBUG(IO_, "end\n");
    2531             : 
    2532             :         return GDK_SUCCEED;
    2533             : 
    2534           0 :   bailout:
    2535           0 :         if (obbpf != NULL)
    2536           0 :                 fclose(obbpf);
    2537           0 :         if (nbbpf != NULL)
    2538           0 :                 fclose(nbbpf);
    2539             :         return GDK_FAIL;
    2540             : }
    2541             : 
    2542             : gdk_return
    2543         221 : BBPdir_init(void)
    2544             : {
    2545         221 :         FILE *fp;
    2546         221 :         gdk_return rc;
    2547             : 
    2548         221 :         rc = BBPdir_first(false, 0, NULL, &fp);
    2549         221 :         if (rc == GDK_SUCCEED)
    2550         221 :                 rc = BBPdir_last(-1, NULL, 0, NULL, fp);
    2551         221 :         return rc;
    2552             : }
    2553             : 
    2554             : /* function used for debugging */
    2555             : void
    2556           0 : BBPdump(void)
    2557             : {
    2558           0 :         size_t mem = 0, vm = 0;
    2559           0 :         int n = 0;
    2560             : 
    2561           0 :         for (bat i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
    2562           0 :                 if (BBP_refs(i) == 0 && BBP_lrefs(i) == 0)
    2563           0 :                         continue;
    2564           0 :                 BAT *b = BBP_desc(i);
    2565           0 :                 unsigned status = BBP_status(i);
    2566           0 :                 printf("# %d: " ALGOOPTBATFMT " refs=%d lrefs=%d status=%u%s",
    2567             :                        i,
    2568           0 :                        ALGOOPTBATPAR(b),
    2569             :                        BBP_refs(i),
    2570           0 :                        BBP_lrefs(i),
    2571             :                        status,
    2572           0 :                        status & BBPLOADED ? "" : " not cached");
    2573           0 :                 if (b->batCacheid == 0) {
    2574           0 :                         printf(", no descriptor\n");
    2575           0 :                         continue;
    2576             :                 }
    2577           0 :                 if (b->theap) {
    2578           0 :                         if (b->theap->parentid != b->batCacheid) {
    2579           0 :                                 printf(" Theap -> %d", b->theap->parentid);
    2580             :                         } else {
    2581           0 :                                 printf(" Theap=[%zu,%zu,f=%d]%s%s",
    2582             :                                        b->theap->free,
    2583             :                                        b->theap->size,
    2584           0 :                                        b->theap->farmid,
    2585           0 :                                        b->theap->base == NULL ? "X" : b->theap->storage == STORE_MMAP ? "M" : "",
    2586           0 :                                        status & BBPSWAPPED ? "(Swapped)" : b->theap->dirty ? "(Dirty)" : "");
    2587           0 :                                 mem += HEAPmemsize(b->theap);
    2588           0 :                                 vm += HEAPvmsize(b->theap);
    2589           0 :                                 n++;
    2590             :                         }
    2591             :                 }
    2592           0 :                 if (b->tvheap) {
    2593           0 :                         if (b->tvheap->parentid != b->batCacheid) {
    2594           0 :                                 printf(" Tvheap -> %d",
    2595             :                                        b->tvheap->parentid);
    2596             :                         } else {
    2597           0 :                                 printf(" Tvheap=[%zu,%zu,f=%d]%s%s",
    2598             :                                        b->tvheap->free,
    2599             :                                        b->tvheap->size,
    2600           0 :                                        b->tvheap->farmid,
    2601           0 :                                        b->tvheap->base == NULL ? "X" : b->tvheap->storage == STORE_MMAP ? "M" : "",
    2602           0 :                                        b->tvheap->dirty ? "(Dirty)" : "");
    2603           0 :                                 mem += HEAPmemsize(b->tvheap);
    2604           0 :                                 vm += HEAPvmsize(b->tvheap);
    2605             :                         }
    2606             :                 }
    2607           0 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    2608           0 :                         if (b->thash && b->thash != (Hash *) 1) {
    2609           0 :                                 size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt);
    2610           0 :                                 size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt);
    2611           0 :                                 printf(" Thash=[%zu,%zu,f=%d/%d]", m, v,
    2612           0 :                                        b->thash->heaplink.farmid,
    2613           0 :                                        b->thash->heapbckt.farmid);
    2614           0 :                                 mem += m;
    2615           0 :                                 vm += v;
    2616             :                         }
    2617           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    2618             :                 }
    2619           0 :                 printf(" role: %s\n",
    2620           0 :                        b->batRole == PERSISTENT ? "persistent" : "transient");
    2621             :         }
    2622           0 :         printf("# %d bats: mem=%zu, vm=%zu\n", n, mem, vm);
    2623           0 :         fflush(stdout);
    2624           0 : }
    2625             : 
    2626             : /*
    2627             :  * @+ BBP Readonly Interface
    2628             :  *
    2629             :  * These interface functions do not change the BBP tables. If they
    2630             :  * only access one specific BAT, the caller must have ensured that no
    2631             :  * other thread is modifying that BAT, therefore such functions do not
    2632             :  * need locking.
    2633             :  *
    2634             :  * BBP index lookup by BAT name:
    2635             :  */
    2636             : static inline bat
    2637       34814 : BBP_find(const char *nme, bool lock)
    2638             : {
    2639       34814 :         bat i = BBPnamecheck(nme);
    2640             : 
    2641        9590 :         if (i != 0) {
    2642             :                 /* for tmp_X BATs, we already know X */
    2643        9590 :                 const char *s;
    2644             : 
    2645        9590 :                 if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) {
    2646        9590 :                         i = 0;
    2647             :                 }
    2648       25224 :         } else if (*nme != '.') {
    2649             :                 /* must lock since hash-lookup traverses other BATs */
    2650       25224 :                 if (lock)
    2651        1542 :                         MT_lock_set(&BBPnameLock);
    2652       25585 :                 for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) {
    2653        1198 :                         if (strcmp(BBP_logical(i), nme) == 0)
    2654             :                                 break;
    2655             :                 }
    2656       25224 :                 if (lock)
    2657        1542 :                         MT_lock_unset(&BBPnameLock);
    2658             :         }
    2659       34814 :         return i;
    2660             : }
    2661             : 
    2662             : bat
    2663        1542 : BBPindex(const char *nme)
    2664             : {
    2665        1542 :         return BBP_find(nme, true);
    2666             : }
    2667             : 
    2668             : /*
    2669             :  * @+ BBP Update Interface
    2670             :  * Operations to insert, delete, clear, and modify BBP entries.
    2671             :  * Our policy for the BBP is to provide unlocked BBP access for
    2672             :  * speed, but still write operations have to be locked.
    2673             :  * #ifdef DEBUG_THREADLOCAL_BATS
    2674             :  * Create the shadow version (reversed) of a bat.
    2675             :  *
    2676             :  * An existing BAT is inserted into the BBP
    2677             :  */
    2678             : 
    2679             : /* The free list is empty.  We create a new entry by either just
    2680             :  * increasing BBPsize (up to BBPlimit) or extending the BBP (which
    2681             :  * increases BBPlimit).
    2682             :  *
    2683             :  * Note that this is the only place in normal, multi-threaded operation
    2684             :  * where BBPsize is assigned a value (never decreasing) and that the
    2685             :  * assignment happens after any necessary memory was allocated and
    2686             :  * initialized. */
    2687             : static gdk_return
    2688       55325 : maybeextend(void)
    2689             : {
    2690       55325 :         bat size = (bat) ATOMIC_GET(&BBPsize);
    2691       55326 :         if (size + BBP_FREE_LOWATER > BBPlimit &&
    2692           1 :             BBPextend(size + BBP_FREE_LOWATER) != GDK_SUCCEED) {
    2693             :                 /* nothing available */
    2694             :                 return GDK_FAIL;
    2695             :         }
    2696       55325 :         ATOMIC_SET(&BBPsize, size + BBP_FREE_LOWATER);
    2697       55325 :         assert(BBP_free == 0);
    2698       55325 :         BBP_free = size;
    2699      553250 :         for (int i = 1; i < BBP_FREE_LOWATER; i++) {
    2700      497925 :                 bat sz = size;
    2701      497925 :                 BBP_next(sz) = ++size;
    2702             :         }
    2703       55325 :         BBP_next(size) = 0;
    2704       55325 :         BBP_nfree += BBP_FREE_LOWATER;
    2705       55325 :         return GDK_SUCCEED;
    2706             : }
    2707             : 
    2708             : /* return new BAT id (> 0); return 0 on failure */
    2709             : bat
    2710    21872702 : BBPallocbat(int tt)
    2711             : {
    2712    21872702 :         MT_Id pid = MT_getpid();
    2713    21900696 :         bool lock = locked_by == 0 || locked_by != pid;
    2714    21900696 :         bat i;
    2715    21900696 :         int len = 0;
    2716    21900696 :         struct freebats *t = MT_thread_getfreebats();
    2717             : 
    2718    21937335 :         if (t->freebats == 0) {
    2719             :                 /* critical section: get a new BBP entry */
    2720      229494 :                 assert(t->nfreebats == 0);
    2721      229494 :                 if (lock) {
    2722      229494 :                         MT_lock_set(&GDKcacheLock);
    2723             :                 }
    2724             : 
    2725             :                 /* get a global bat, perhaps extend */
    2726      229615 :                 if (BBP_free <= 0) {
    2727             :                         /* we need to extend the BBP */
    2728       55325 :                         gdk_return r;
    2729       55325 :                         r = maybeextend();
    2730       55325 :                         if (r != GDK_SUCCEED) {
    2731           0 :                                 if (lock) {
    2732           0 :                                         MT_lock_unset(&GDKcacheLock);
    2733             :                                 }
    2734             :                                 /* failed */
    2735           0 :                                 return 0;
    2736             :                         }
    2737             :                 }
    2738      229615 :                 t->freebats = i = BBP_free;
    2739      229615 :                 bat l = 0;
    2740     2492231 :                 for (int x = 0; x < BBP_FREE_LOWATER && i; x++) {
    2741     2262616 :                         assert(BBP_next(i) == 0 || BBP_next(i) > i);
    2742     2262616 :                         t->nfreebats++;
    2743     2262616 :                         BBP_nfree--;
    2744     2262616 :                         l = i;
    2745     2262616 :                         i = BBP_next(i);
    2746             :                 }
    2747      229615 :                 BBP_next(l) = 0;
    2748      229615 :                 BBP_free = i;
    2749             : 
    2750      229615 :                 if (lock) {
    2751      229615 :                         MT_lock_unset(&GDKcacheLock);
    2752             :                 }
    2753             :                 /* rest of the work outside the lock */
    2754             :         }
    2755    21937456 :         if (t->nfreebats > 0) {
    2756    21937456 :                 assert(t->freebats > 0);
    2757    21937456 :                 i = t->freebats;
    2758    21937456 :                 t->freebats = BBP_next(i);
    2759    21937456 :                 assert(t->freebats == 0 || t->freebats > i);
    2760    21937456 :                 BBP_next(i) = 0;
    2761    21937456 :                 t->nfreebats--;
    2762             :         } else {
    2763           0 :                 assert(t->nfreebats == 0);
    2764           0 :                 assert(t->freebats == 0);
    2765             :                 return 0;
    2766             :         }
    2767             : 
    2768             :         /* fill in basic BBP fields for the new bat */
    2769             : 
    2770    21937456 :         MT_lock_set(&GDKswapLock(i));
    2771    21973933 :         BBP_status_set(i, BBPDELETING|BBPHOT);
    2772    21973933 :         BBP_refs(i) = 1;        /* new bats have 1 pin */
    2773    21973933 :         BBP_lrefs(i) = 0;       /* ie. no logical refs */
    2774    21973933 :         BBP_pid(i) = pid;
    2775    21973933 :         MT_lock_unset(&GDKswapLock(i));
    2776             : 
    2777    21997297 :         if (*BBP_bak(i) == 0)
    2778      580772 :                 len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o", (unsigned) i);
    2779    21997297 :         if (len == -1 || len >= FILENAME_MAX) {
    2780           0 :                 GDKerror("impossible error\n");
    2781           0 :                 return 0;
    2782             :         }
    2783    21997297 :         BBP_logical(i) = BBP_bak(i);
    2784             : 
    2785             :         /* Keep the physical location around forever */
    2786    21997297 :         if (!GDKinmemory(0) && *BBP_physical(i) == 0) {
    2787      582157 :                 BBPgetfilename(BBP_physical(i), sizeof(BBP_physical(i)), i);
    2788      582229 :                 TRC_DEBUG(BAT_, "%d = new %s(%s)\n", (int) i, BBP_logical(i), ATOMname(tt));
    2789             :         }
    2790             : 
    2791             :         return i;
    2792             : }
    2793             : 
    2794             : gdk_return
    2795    21935205 : BBPcacheit(BAT *bn, bool lock)
    2796             : {
    2797    21935205 :         bat i = bn->batCacheid;
    2798    21935205 :         unsigned mode;
    2799             : 
    2800    21935205 :         if (lock)
    2801    43834632 :                 lock = locked_by == 0 || locked_by != MT_getpid();
    2802             : 
    2803    21935205 :         assert(i > 0);
    2804             : 
    2805    21935205 :         if (lock)
    2806    21929968 :                 MT_lock_set(&GDKswapLock(i));
    2807    21960673 :         mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING | BBPSWAPPED);
    2808             : 
    2809             :         /* cache it! */
    2810    21960673 :         BBP_status_set(i, mode);
    2811             : 
    2812    21960673 :         if (lock)
    2813    21997605 :                 MT_lock_unset(&GDKswapLock(i));
    2814    21961286 :         return GDK_SUCCEED;
    2815             : }
    2816             : 
    2817             : /*
    2818             :  * BBPuncacheit changes the BBP status to swapped out.  Currently only
    2819             :  * used in BBPfree (bat swapped out) and BBPclear (bat destroyed
    2820             :  * forever).
    2821             :  */
    2822             : 
    2823             : static void
    2824    21893872 : BBPuncacheit(bat i, bool unloaddesc)
    2825             : {
    2826    21893872 :         if (i < 0)
    2827             :                 i = -i;
    2828    21893872 :         if (BBPcheck(i)) {
    2829    21910832 :                 BAT *b = BBP_desc(i);
    2830             : 
    2831    21910832 :                 assert(unloaddesc || BBP_refs(i) == 0);
    2832             : 
    2833    21910832 :                 if (BBP_status(i) & BBPLOADED) {
    2834    21907449 :                         TRC_DEBUG(BAT_, "uncache %d (%s)\n", (int) i, BBP_logical(i));
    2835             : 
    2836             :                         /* clearing bits can be done without the lock */
    2837    21907449 :                         BBP_status_off(i, BBPLOADED);
    2838             :                 }
    2839    21910832 :                 if (unloaddesc) {
    2840    22039447 :                         BATdestroy(b);
    2841             :                 }
    2842             :         }
    2843    21795035 : }
    2844             : 
    2845             : /*
    2846             :  * @- BBPclear
    2847             :  * BBPclear removes a BAT from the BBP directory forever.
    2848             :  */
    2849             : static inline void
    2850       73907 : BBPhandover(struct freebats *t, uint32_t n)
    2851             : {
    2852       73907 :         bat *p, bid;
    2853             :         /* take one bat from our private free list and hand it over to
    2854             :          * the global free list */
    2855       73907 :         if (n >= t->nfreebats) {
    2856       42676 :                 bid = t->freebats;
    2857       42676 :                 t->freebats = 0;
    2858       42676 :                 BBP_nfree += t->nfreebats;
    2859       42676 :                 t->nfreebats = 0;
    2860             :         } else {
    2861       31231 :                 p = &t->freebats;
    2862      343541 :                 for (uint32_t i = n; i < t->nfreebats; i++)
    2863      312310 :                         p = &BBP_next(*p);
    2864       31231 :                 bid = *p;
    2865       31231 :                 *p = 0;
    2866       31231 :                 BBP_nfree += n;
    2867       31231 :                 t->nfreebats -= n;
    2868             :         }
    2869             :         p = &BBP_free;
    2870     1847381 :         while (bid != 0) {
    2871    14536676 :                 while (*p && *p < bid)
    2872    12763202 :                         p = &BBP_next(*p);
    2873     1773474 :                 bat i = BBP_next(bid);
    2874     1773474 :                 BBP_next(bid) = *p;
    2875     1773474 :                 *p = bid;
    2876     1773474 :                 bid = i;
    2877             :         }
    2878       73907 : }
    2879             : 
    2880             : #ifndef NDEBUG
    2881             : extern void printlist(bat bid) __attribute__((__cold__));
    2882             : /* print a bat free list, pass start of free list as argument
    2883             :  * to be used from the debugger */
    2884             : void
    2885           0 : printlist(bat bid)
    2886             : {
    2887           0 :         int n = 0;
    2888           0 :         while (bid) {
    2889           0 :                 printf("%d ", bid);
    2890           0 :                 bid = BBP_next(bid);
    2891           0 :                 n++;
    2892             :         }
    2893           0 :         printf("(%d)\n", n);
    2894           0 : }
    2895             : #endif
    2896             : 
    2897             : static inline void
    2898    21371565 : bbpclear(bat i, bool lock)
    2899             : {
    2900    21371565 :         struct freebats *t = MT_thread_getfreebats();
    2901             : 
    2902    21373029 :         TRC_DEBUG(BAT_, "clear %d (%s)\n", (int) i, BBP_logical(i));
    2903    21373029 :         BBPuncacheit(i, true);
    2904    21404538 :         TRC_DEBUG(BAT_, "set to unloading %d\n", i);
    2905    21404538 :         if (lock) {
    2906    21442482 :                 MT_lock_set(&GDKswapLock(i));
    2907             :         }
    2908             : 
    2909    21426656 :         BBP_status_set(i, BBPUNLOADING);
    2910    21426656 :         BBP_refs(i) = 0;
    2911    21426656 :         BBP_lrefs(i) = 0;
    2912    21426656 :         if (lock)
    2913    21508211 :                 MT_lock_unset(&GDKswapLock(i));
    2914    21432298 :         if (!BBPtmpcheck(BBP_logical(i))) {
    2915        2722 :                 MT_lock_set(&BBPnameLock);
    2916        2722 :                 BBP_delete(i);
    2917        2722 :                 MT_lock_unset(&BBPnameLock);
    2918             :         }
    2919    21432298 :         if (BBP_logical(i) != BBP_bak(i))
    2920        2722 :                 GDKfree(BBP_logical(i));
    2921    21512517 :         BBP_status_set(i, 0);
    2922    21512517 :         BBP_logical(i) = NULL;
    2923    21512517 :         bat *p;
    2924    64212562 :         for (p = &t->freebats; *p && *p < i; p = &BBP_next(*p))
    2925             :                 ;
    2926    21512517 :         BBP_next(i) = *p;
    2927    21512517 :         *p = i;
    2928    21512517 :         t->nfreebats++;
    2929    21512517 :         BBP_pid(i) = ~(MT_Id)0; /* not zero, not a valid thread id */
    2930    21512517 :         if (t->nfreebats > BBP_FREE_HIWATER) {
    2931       31229 :                 if (lock)
    2932       31228 :                         MT_lock_set(&GDKcacheLock);
    2933       31232 :                 BBPhandover(t, t->nfreebats - BBP_FREE_LOWATER);
    2934       31231 :                 if (lock)
    2935       31231 :                         MT_lock_unset(&GDKcacheLock);
    2936             :         }
    2937    21512519 : }
    2938             : 
    2939             : void
    2940    21374638 : BBPclear(bat i)
    2941             : {
    2942    21374638 :         if (BBPcheck(i)) {
    2943    21371779 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    2944    21371779 :                 bbpclear(i, lock);
    2945             :         }
    2946    21497087 : }
    2947             : 
    2948             : void
    2949       46855 : BBPrelinquishbats(void)
    2950             : {
    2951       46855 :         struct freebats *t = MT_thread_getfreebats();
    2952       46910 :         if (t == NULL || t->nfreebats == 0)
    2953             :                 return;
    2954       42629 :         MT_lock_set(&GDKcacheLock);
    2955       85352 :         while (t->nfreebats > 0) {
    2956       42676 :                 BBPhandover(t, t->nfreebats);
    2957             :         }
    2958       42676 :         MT_lock_unset(&GDKcacheLock);
    2959             : }
    2960             : 
    2961             : /*
    2962             :  * @- BBP rename
    2963             :  *
    2964             :  * Each BAT has a logical name that is globally unique.
    2965             :  * The batId is the same as the logical BAT name.
    2966             :  *
    2967             :  * The default logical name of a BAT is tmp_X, where X is the
    2968             :  * batCacheid.  Apart from being globally unique, new logical bat
    2969             :  * names cannot be of the form tmp_X, unless X is the batCacheid.
    2970             :  *
    2971             :  * Physical names consist of a directory name followed by a logical
    2972             :  * name suffix.  The directory name is derived from the batCacheid,
    2973             :  * and is currently organized in a hierarchy that puts max 64 bats in
    2974             :  * each directory (see BBPgetsubdir).
    2975             :  *
    2976             :  * Concerning the physical suffix: it is almost always bat_X. This
    2977             :  * saves us a whole lot of trouble, as bat_X is always unique and no
    2978             :  * conflicts can occur.  Other suffixes are only supported in order
    2979             :  * just for backward compatibility with old repositories (you won't
    2980             :  * see them anymore in new repositories).
    2981             :  */
    2982             : int
    2983       33272 : BBPrename(BAT *b, const char *nme)
    2984             : {
    2985       33272 :         if (b == NULL)
    2986             :                 return 0;
    2987             : 
    2988       33272 :         bat bid = b->batCacheid;
    2989       33272 :         bat tmpid = 0, i;
    2990             : 
    2991       33272 :         if (nme == NULL) {
    2992        9590 :                 if (BBP_bak(bid)[0] == 0 &&
    2993           0 :                     snprintf(BBP_bak(bid), sizeof(BBP_bak(bid)), "tmp_%o", (unsigned) bid) >= (int) sizeof(BBP_bak(bid))) {
    2994             :                         /* cannot happen */
    2995           0 :                         TRC_CRITICAL(GDK, "BBP default filename too long\n");
    2996           0 :                         return BBPRENAME_LONG;
    2997             :                 }
    2998        9590 :                 nme = BBP_bak(bid);
    2999             :         }
    3000             : 
    3001             :         /* If name stays same, do nothing */
    3002       33272 :         if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0)
    3003             :                 return 0;
    3004             : 
    3005       33272 :         if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) {
    3006           0 :                 GDKerror("illegal temporary name: '%s'\n", nme);
    3007           0 :                 return BBPRENAME_ILLEGAL;
    3008             :         }
    3009       33272 :         if (strLen(nme) >= IDLENGTH) {
    3010           0 :                 GDKerror("illegal temporary name: '%s'\n", nme);
    3011           0 :                 return BBPRENAME_LONG;
    3012             :         }
    3013             : 
    3014       33272 :         MT_lock_set(&BBPnameLock);
    3015       33272 :         i = BBP_find(nme, false);
    3016       33272 :         if (i != 0) {
    3017           1 :                 MT_lock_unset(&BBPnameLock);
    3018           1 :                 GDKerror("name is in use: '%s'.\n", nme);
    3019           1 :                 return BBPRENAME_ALREADY;
    3020             :         }
    3021             : 
    3022       33271 :         char *nnme;
    3023       33271 :         if (nme == BBP_bak(bid) || strcmp(nme, BBP_bak(bid)) == 0) {
    3024       33271 :                 nnme = BBP_bak(bid);
    3025             :         } else {
    3026       23681 :                 nnme = GDKstrdup(nme);
    3027       23681 :                 if (nnme == NULL) {
    3028           0 :                         MT_lock_unset(&BBPnameLock);
    3029           0 :                         return BBPRENAME_MEMORY;
    3030             :                 }
    3031             :         }
    3032             : 
    3033             :         /* carry through the name change */
    3034       33271 :         if (BBP_logical(bid) && !BBPtmpcheck(BBP_logical(bid))) {
    3035        9590 :                 BBP_delete(bid);
    3036             :         }
    3037       33271 :         if (BBP_logical(bid) != BBP_bak(bid))
    3038        9590 :                 GDKfree(BBP_logical(bid));
    3039       33271 :         BBP_logical(bid) = nnme;
    3040       33271 :         if (tmpid == 0) {
    3041       23681 :                 BBP_insert(bid);
    3042             :         }
    3043       33271 :         MT_lock_set(&b->theaplock);
    3044       33271 :         bool transient = b->batTransient;
    3045       33271 :         MT_lock_unset(&b->theaplock);
    3046       33271 :         if (!transient) {
    3047        7338 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    3048             : 
    3049        7338 :                 if (lock)
    3050        7338 :                         MT_lock_set(&GDKswapLock(i));
    3051        7338 :                 BBP_status_on(bid, BBPRENAMED);
    3052        7338 :                 if (lock)
    3053        7338 :                         MT_lock_unset(&GDKswapLock(i));
    3054             :         }
    3055       33271 :         MT_lock_unset(&BBPnameLock);
    3056       33271 :         return 0;
    3057             : }
    3058             : 
    3059             : /*
    3060             :  * @+ BBP swapping Policy
    3061             :  * The BAT can be moved back to disk using the routine BBPfree.  It
    3062             :  * frees the storage for other BATs. After this call BAT* references
    3063             :  * maintained for the BAT are wrong.  We should keep track of dirty
    3064             :  * unloaded BATs. They may have to be committed later on, which may
    3065             :  * include reading them in again.
    3066             :  *
    3067             :  * BBPswappable: may this bat be unloaded?  Only real bats without
    3068             :  * memory references can be unloaded.
    3069             :  */
    3070             : static inline void
    3071     3358090 : BBPspin(bat i, const char *s, unsigned event)
    3072             : {
    3073     3358090 :         if (BBPcheck(i) && (BBP_status(i) & event)) {
    3074             :                 lng spin = LL_CONSTANT(0);
    3075             : 
    3076          89 :                 do {
    3077          89 :                         MT_sleep_ms(KITTENNAP);
    3078          89 :                         spin++;
    3079          89 :                 } while (BBP_status(i) & event);
    3080           4 :                 TRC_DEBUG(BAT_, "%d,%s,%u: " LLFMT " loops\n", (int) i, s, event, spin);
    3081             :         }
    3082     3358003 : }
    3083             : 
    3084             : void
    3085    10162374 : BBPcold(bat i)
    3086             : {
    3087    10162374 :         if (!is_bat_nil(i)) {
    3088    10174250 :                 BAT *b = BBP_desc(i);
    3089    10174250 :                 if (b->batRole == PERSISTENT)
    3090         692 :                         BBP_status_off(i, BBPHOT);
    3091             :         }
    3092    10162374 : }
    3093             : 
    3094             : /* This function can fail if the input parameter (i) is incorrect
    3095             :  * (unlikely). */
    3096             : static inline int
    3097   145593099 : incref(bat i, bool logical, bool lock)
    3098             : {
    3099   145593099 :         int refs;
    3100   145593099 :         BAT *b;
    3101             : 
    3102   145593099 :         if (!BBPcheck(i))
    3103             :                 return 0;
    3104             : 
    3105   145601709 :         if (lock) {
    3106    40810895 :                 for (;;) {
    3107    40810895 :                         MT_lock_set(&GDKswapLock(i));
    3108    41914252 :                         if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
    3109             :                                 break;
    3110             :                         /* the BATs is "unstable", try again */
    3111           0 :                         MT_lock_unset(&GDKswapLock(i));
    3112           0 :                         BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
    3113             :                 }
    3114             :         }
    3115             :         /* we have the lock */
    3116             : 
    3117   146705066 :         b = BBP_desc(i);
    3118   146705066 :         if (b->batCacheid == 0) {
    3119             :                 /* should not have happened */
    3120           0 :                 if (lock)
    3121           0 :                         MT_lock_unset(&GDKswapLock(i));
    3122           0 :                 return 0;
    3123             :         }
    3124             : 
    3125   146705066 :         assert(BBP_refs(i) + BBP_lrefs(i) ||
    3126             :                BBP_status(i) & (BBPDELETED | BBPSWAPPED));
    3127   146705066 :         if (logical) {
    3128    41004665 :                 refs = ++BBP_lrefs(i);
    3129    41004665 :                 BBP_pid(i) = 0;
    3130             :         } else {
    3131   105700401 :                 refs = ++BBP_refs(i);
    3132   105700401 :                 BBP_status_on(i, BBPHOT);
    3133             :         }
    3134   146705066 :         if (lock)
    3135    40994198 :                 MT_lock_unset(&GDKswapLock(i));
    3136             : 
    3137             :         return refs;
    3138             : }
    3139             : 
    3140             : /* increment the physical reference counter for the given bat
    3141             :  * returns the new reference count
    3142             :  * also increments the physical reference count of the parent bat(s) (if
    3143             :  * any) */
    3144             : int
    3145       74208 : BBPfix(bat i)
    3146             : {
    3147       74208 :         return BATdescriptor(i) ? 1 : 0;
    3148             : }
    3149             : 
    3150             : /* increment the logical reference count for the given bat
    3151             :  * returns the new reference count */
    3152             : int
    3153    33732776 : BBPretain(bat i)
    3154             : {
    3155    33732776 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    3156             : 
    3157    33732776 :         return incref(i, true, lock);
    3158             : }
    3159             : 
    3160             : static inline int
    3161   177184759 : decref(bat i, bool logical, bool lock, const char *func)
    3162             : {
    3163   177184759 :         int refs = 0, lrefs;
    3164   177184759 :         bool swap = false;
    3165   177184759 :         bool locked = false;
    3166   177184759 :         int farmid = 0;
    3167   177184759 :         BAT *b;
    3168             : 
    3169   177184759 :         if (is_bat_nil(i))
    3170             :                 return -1;
    3171   166075968 :         assert(i > 0);
    3172   166075968 :         if (BBPcheck(i) == 0)
    3173             :                 return -1;
    3174             : 
    3175   165841935 :         if (lock)
    3176   165769173 :                 MT_lock_set(&GDKswapLock(i));
    3177             : 
    3178   166639991 :         while (BBP_status(i) & BBPUNLOADING) {
    3179           0 :                 if (lock)
    3180           0 :                         MT_lock_unset(&GDKswapLock(i));
    3181           0 :                 BBPspin(i, func, BBPUNLOADING);
    3182           0 :                 if (lock)
    3183   166639991 :                         MT_lock_set(&GDKswapLock(i));
    3184             :         }
    3185             : 
    3186   167196043 :         b = (BBP_status(i) & BBPLOADED) ? BBP_desc(i) : NULL;
    3187             : 
    3188             :         /* decrement references by one */
    3189   167196043 :         if (logical) {
    3190    40962544 :                 if (BBP_lrefs(i) == 0) {
    3191           0 :                         GDKerror("%s: %s does not have logical references.\n", func, BBP_logical(i));
    3192           0 :                         assert(0);
    3193             :                 } else {
    3194    40962544 :                         refs = --BBP_lrefs(i);
    3195             :                 }
    3196             :                 /* cannot release last logical ref if still shared */
    3197             :                 // but we could still have a bat iterator on it
    3198             :                 //assert(!BATshared(BBP_desc(i)) || refs > 0);
    3199             :         } else {
    3200   126233499 :                 if (BBP_refs(i) == 0) {
    3201           0 :                         GDKerror("%s: %s does not have pointer fixes.\n", func, BBP_logical(i));
    3202           0 :                         assert(0);
    3203             :                 } else {
    3204   126233499 :                         refs = --BBP_refs(i);
    3205   126233499 :                         if (b && refs == 0) {
    3206   104807141 :                                 MT_lock_set(&b->theaplock);
    3207   105268839 :                                 locked = true;
    3208   105268839 :                                 if (VIEWtparent(b) || VIEWvtparent(b))
    3209    17909503 :                                         BBP_status_on(i, BBPHOT);
    3210             :                         }
    3211             :                 }
    3212             :         }
    3213   167657741 :         if (b) {
    3214   166965233 :                 if (!locked) {
    3215    62460150 :                         MT_lock_set(&b->theaplock);
    3216    62697557 :                         locked = true;
    3217             :                 }
    3218             : #if 0
    3219             :                 if (b->batCount > b->batInserted && !isVIEW(b)) {
    3220             :                         /* if batCount is larger than batInserted and
    3221             :                          * the dirty bits are off, it may be that a
    3222             :                          * (sub)commit happened in parallel to an
    3223             :                          * update; we must undo the turning off of the
    3224             :                          * dirty bits */
    3225             :                         if (b->theap && b->theap->parentid == i)
    3226             :                                 b->theap->dirty = true;
    3227             :                         if (b->tvheap && b->tvheap->parentid == i)
    3228             :                                 b->tvheap->dirty = true;
    3229             :                 }
    3230             : #endif
    3231   167112838 :                 if (b->theap)
    3232   167112838 :                         farmid = b->theap->farmid;
    3233             :         }
    3234             : 
    3235             :         /* we destroy transients asap and unload persistent bats only
    3236             :          * if they have been made cold or are not dirty */
    3237   167805346 :         unsigned chkflag = BBPSYNCING;
    3238   167805346 :         bool swapdirty = false;
    3239   167805346 :         if (b) {
    3240   167197061 :                 size_t cursize;
    3241   167197061 :                 if ((cursize = GDKvm_cursize()) < (size_t) (GDK_vm_maxsize * 0.75)) {
    3242   167124965 :                         if (!locked) {
    3243           0 :                                 MT_lock_set(&b->theaplock);
    3244           0 :                                 locked = true;
    3245             :                         }
    3246   167124965 :                         if (((b->theap ? b->theap->size : 0) + (b->tvheap ? b->tvheap->size : 0)) < (GDK_vm_maxsize - cursize) / 32)
    3247   167197682 :                                 chkflag |= BBPHOT;
    3248           0 :                 } else if (cursize > (size_t) (GDK_vm_maxsize * 0.85))
    3249   167733250 :                         swapdirty = true;
    3250             :         }
    3251             :         /* only consider unloading if refs is 0; if, in addition, lrefs
    3252             :          * is 0, we can definitely unload, else only if some more
    3253             :          * conditions are met */
    3254   289889362 :         if (BBP_refs(i) == 0 &&
    3255   143656526 :             (BBP_lrefs(i) == 0 ||
    3256   122127714 :              (b != NULL && b->theap != NULL
    3257   122132538 :               ? ((swapdirty || !BATdirty(b)) &&
    3258    12390380 :                  !(BBP_status(i) & chkflag) &&
    3259        9333 :                  (BBP_status(i) & BBPPERSISTENT) &&
    3260             :                  /* cannot unload in-memory data */
    3261        4604 :                  !GDKinmemory(farmid) &&
    3262             :                  /* do not unload views or parents of views */
    3263        4604 :                  !BATshared(b) &&
    3264   122132198 :                  b->batCacheid == b->theap->parentid &&
    3265        4484 :                  (b->tvheap == NULL || b->batCacheid == b->tvheap->parentid))
    3266       28398 :               : (BBP_status(i) & BBPTMP)))) {
    3267             :                 /* bat will be unloaded now. set the UNLOADING bit
    3268             :                  * while locked so no other thread thinks it's
    3269             :                  * available anymore */
    3270    21508341 :                 assert((BBP_status(i) & BBPUNLOADING) == 0);
    3271    21508341 :                 TRC_DEBUG(BAT_, "%s set to unloading BAT %d (status %u, lrefs %d)\n", func, i, BBP_status(i), BBP_lrefs(i));
    3272    21508341 :                 BBP_status_on(i, BBPUNLOADING);
    3273    21508341 :                 swap = true;
    3274             :         } /* else: bat cannot be swapped out */
    3275   167733249 :         lrefs = BBP_lrefs(i);
    3276   167733249 :         if (locked)
    3277   167319116 :                 MT_lock_unset(&b->theaplock);
    3278             : 
    3279             :         /* unlock before re-locking in unload; as saving a dirty
    3280             :          * persistent bat may take a long time */
    3281   168416534 :         if (lock)
    3282   168048773 :                 MT_lock_unset(&GDKswapLock(i));
    3283             : 
    3284   168230677 :         if (swap) {
    3285    21531322 :                 if (b != NULL) {
    3286    21524228 :                         if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
    3287             :                                 /* free memory (if loaded) and delete from
    3288             :                                  * disk (if transient but saved) */
    3289    21518157 :                                 BBPdestroy(b);
    3290             :                         } else {
    3291        6071 :                                 TRC_DEBUG(BAT_, "%s unload and free bat %d\n", func, i);
    3292             :                                 /* free memory of transient */
    3293        6071 :                                 if (BBPfree(b) != GDK_SUCCEED)
    3294             :                                         return -1;      /* indicate failure */
    3295             :                         }
    3296        7094 :                 } else if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
    3297        3651 :                         BATdelete(BBP_desc(i));
    3298        3651 :                         BBPclear(i);
    3299             :                 } else {
    3300        3443 :                         BBP_status_off(i, BBPUNLOADING);
    3301             :                 }
    3302             :         }
    3303             :         return refs;
    3304             : }
    3305             : 
    3306             : int
    3307   120865207 : BBPunfix(bat i)
    3308             : {
    3309   120865207 :         return decref(i, false, true, __func__);
    3310             : }
    3311             : 
    3312             : int
    3313    51940915 : BBPrelease(bat i)
    3314             : {
    3315    51940915 :         return decref(i, true, true, __func__);
    3316             : }
    3317             : 
    3318             : void
    3319     7221906 : BBPkeepref(BAT *b)
    3320             : {
    3321     7221906 :         assert(b != NULL);
    3322     7221906 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    3323     7221906 :         int i = b->batCacheid;
    3324     7221906 :         int refs = incref(i, true, lock);
    3325     7300748 :         if (refs == 1) {
    3326     7032446 :                 MT_lock_set(&b->theaplock);
    3327     7020256 :                 BATsettrivprop(b);
    3328     7008994 :                 MT_lock_unset(&b->theaplock);
    3329             :         }
    3330     7295556 :         if (ATOMIC_GET(&GDKdebug) & CHECKMASK)
    3331     7231169 :                 BATassertProps(b);
    3332     7293591 :         if (BATsetaccess(b, BAT_READ) == NULL)
    3333             :                 return;         /* already decreffed */
    3334             : 
    3335     5584433 :         refs = decref(i, false, lock, __func__);
    3336     5571157 :         (void) refs;
    3337     5571157 :         assert(refs >= 0);
    3338             : }
    3339             : 
    3340             : BAT *
    3341   105438795 : BATdescriptor(bat i)
    3342             : {
    3343   105438795 :         BAT *b = NULL;
    3344             : 
    3345   105438795 :         if (BBPcheck(i)) {
    3346   105471507 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    3347             :                 if (lock) {
    3348   105471507 :                         for (;;) {
    3349   105471507 :                                 MT_lock_set(&GDKswapLock(i));
    3350   105795682 :                                 if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
    3351             :                                         break;
    3352             :                                 /* the BATs is "unstable", try again */
    3353           0 :                                 MT_lock_unset(&GDKswapLock(i));
    3354           0 :                                 BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
    3355             :                         }
    3356             :                 }
    3357   105795682 :                 if (incref(i, false, false) > 0) {
    3358   106303639 :                         if ((BBP_status(i) & BBPLOADED) == 0) {
    3359       20697 :                                 b = getBBPdescriptor(i);
    3360       20697 :                                 if (b == NULL) {
    3361             :                                         /* if loading failed, we need to
    3362             :                                          * compensate for the incref */
    3363           0 :                                         decref(i, false, false, __func__);
    3364             :                                 }
    3365             :                         } else {
    3366   106282942 :                                 b = BBP_desc(i);
    3367             :                         }
    3368             :                 }
    3369   106303639 :                 if (lock)
    3370   106352753 :                         MT_lock_unset(&GDKswapLock(i));
    3371             :         }
    3372   106384088 :         return b;
    3373             : }
    3374             : 
    3375             : /*
    3376             :  * BBPdescriptor checks whether BAT needs loading and does so if
    3377             :  * necessary. You must have at least one fix on the BAT before calling
    3378             :  * this.
    3379             :  */
    3380             : static BAT *
    3381       20697 : getBBPdescriptor(bat i)
    3382             : {
    3383       20697 :         bool load = false;
    3384       20697 :         BAT *b = NULL;
    3385             : 
    3386       20697 :         assert(i > 0);
    3387       20697 :         if (!BBPcheck(i)) {
    3388           0 :                 GDKerror("BBPcheck failed for bat id %d\n", i);
    3389           0 :                 return NULL;
    3390             :         }
    3391       20696 :         assert(BBP_refs(i));
    3392       20696 :         unsigned status = BBP_status(i);
    3393       20696 :         b = BBP_desc(i);
    3394       20696 :         if ((status & BBPLOADED) == 0 || status & BBPWAITING) {
    3395       20696 :                 while (BBP_status(i) & BBPWAITING) {        /* wait for bat to be loaded by other thread */
    3396           0 :                         MT_lock_unset(&GDKswapLock(i));
    3397           0 :                         BBPspin(i, __func__, BBPWAITING);
    3398       20696 :                         MT_lock_set(&GDKswapLock(i));
    3399             :                 }
    3400       20696 :                 if (BBPvalid(i)) {
    3401       20696 :                         if ((BBP_status(i) & BBPLOADED) == 0) {
    3402       20696 :                                 load = true;
    3403       20696 :                                 TRC_DEBUG(BAT_, "set to loading BAT %d\n", i);
    3404       20696 :                                 BBP_status_on(i, BBPLOADING);
    3405             :                         }
    3406             :                 }
    3407             :         }
    3408       20696 :         if (load) {
    3409       20696 :                 TRC_DEBUG(IO_, "load %s\n", BBP_logical(i));
    3410             : 
    3411       20696 :                 b = BATload_intern(i, false);
    3412             : 
    3413       20697 :                 BBP_status_off(i, BBPLOADING);
    3414       20697 :                 CHECKDEBUG if (b != NULL)
    3415       17738 :                         BATassertProps(b);
    3416             :         }
    3417             :         return b;
    3418             : }
    3419             : 
    3420             : /*
    3421             :  * In BBPsave executes unlocked; it just marks the BBP_status of the
    3422             :  * BAT to BBPsaving, so others that want to save or unload this BAT
    3423             :  * must spin lock on the BBP_status field.
    3424             :  */
    3425             : gdk_return
    3426        6705 : BBPsave(BAT *b)
    3427             : {
    3428        6705 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    3429        6705 :         bat bid = b->batCacheid;
    3430        6705 :         gdk_return ret = GDK_SUCCEED;
    3431             : 
    3432        6705 :         MT_lock_set(&b->theaplock);
    3433        6705 :         if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirty(b)) {
    3434             :                 /* do nothing */
    3435        5918 :                 MT_lock_unset(&b->theaplock);
    3436        5918 :                 MT_rwlock_rdlock(&b->thashlock);
    3437        5918 :                 if (b->thash && b->thash != (Hash *) 1 &&
    3438         126 :                     (b->thash->heaplink.dirty || b->thash->heapbckt.dirty))
    3439          77 :                         BAThashsave(b, (BBP_status(bid) & BBPPERSISTENT) != 0);
    3440        5918 :                 MT_rwlock_rdunlock(&b->thashlock);
    3441        5918 :                 return GDK_SUCCEED;
    3442             :         }
    3443         787 :         MT_lock_unset(&b->theaplock);
    3444         787 :         if (lock)
    3445         787 :                 MT_lock_set(&GDKswapLock(bid));
    3446             : 
    3447         787 :         if (BBP_status(bid) & BBPSAVING) {
    3448             :                 /* wait until save in other thread completes */
    3449           0 :                 if (lock)
    3450           0 :                         MT_lock_unset(&GDKswapLock(bid));
    3451           0 :                 BBPspin(bid, __func__, BBPSAVING);
    3452             :         } else {
    3453             :                 /* save it */
    3454         787 :                 unsigned flags = BBPSAVING;
    3455             : 
    3456         787 :                 MT_lock_set(&b->theaplock);
    3457         787 :                 if (DELTAdirty(b)) {
    3458         462 :                         flags |= BBPSWAPPED;
    3459             :                 }
    3460         787 :                 if (b->batTransient) {
    3461         787 :                         flags |= BBPTMP;
    3462             :                 }
    3463         787 :                 MT_lock_unset(&b->theaplock);
    3464         787 :                 BBP_status_on(bid, flags);
    3465         787 :                 if (lock)
    3466         787 :                         MT_lock_unset(&GDKswapLock(bid));
    3467             : 
    3468         787 :                 TRC_DEBUG(IO_, "save " ALGOBATFMT "\n", ALGOBATPAR(b));
    3469             : 
    3470             :                 /* do the time-consuming work unlocked */
    3471         787 :                 if (BBP_status(bid) & BBPEXISTING && b->batInserted > 0)
    3472           0 :                         ret = BBPbackup(b, false);
    3473           0 :                 if (ret == GDK_SUCCEED) {
    3474         787 :                         ret = BATsave(b);
    3475             :                 }
    3476             :                 /* clearing bits can be done without the lock */
    3477         787 :                 BBP_status_off(bid, BBPSAVING);
    3478             :         }
    3479             :         return ret;
    3480             : }
    3481             : 
    3482             : /*
    3483             :  * TODO merge BBPfree with BATfree? Its function is to prepare a BAT
    3484             :  * for being unloaded (or even destroyed, if the BAT is not
    3485             :  * persistent).
    3486             :  */
    3487             : static void
    3488    21484383 : BBPdestroy(BAT *b)
    3489             : {
    3490    21484383 :         bat tp = VIEWtparent(b);
    3491    21484383 :         bat vtp = VIEWvtparent(b);
    3492             : 
    3493    21484383 :         if (b->theap) {
    3494    21505063 :                 HEAPdecref(b->theap, tp == 0);
    3495    21485235 :                 b->theap = NULL;
    3496    21485235 :                 if (tp != 0)
    3497    11378572 :                         BBPrelease(tp);
    3498             :         }
    3499    21461464 :         if (b->tvheap) {
    3500     3357593 :                 HEAPdecref(b->tvheap, vtp == 0);
    3501     3357556 :                 b->tvheap = NULL;
    3502     3357556 :                 if (vtp != 0)
    3503     2466106 :                         BBPrelease(vtp);
    3504             :         }
    3505    21461103 :         if (b->oldtail) {
    3506           2 :                 ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
    3507           2 :                 HEAPdecref(b->oldtail, true);
    3508           2 :                 b->oldtail = NULL;
    3509             :         }
    3510    21461103 :         BATdelete(b);
    3511             : 
    3512    21399462 :         BBPclear(b->batCacheid);     /* if destroyed; de-register from BBP */
    3513    21495428 : }
    3514             : 
    3515             : static gdk_return
    3516        6705 : BBPfree(BAT *b)
    3517             : {
    3518        6705 :         bat bid = b->batCacheid;
    3519        6705 :         gdk_return ret;
    3520             : 
    3521        6705 :         assert(bid > 0);
    3522        6705 :         assert(BBPswappable(b));
    3523        6705 :         assert(!isVIEW(b));
    3524             : 
    3525        6705 :         BBP_unload_inc();
    3526             :         /* write dirty BATs before unloading */
    3527        6705 :         ret = BBPsave(b);
    3528        6705 :         if (ret == GDK_SUCCEED) {
    3529        6705 :                 if (BBP_status(bid) & BBPLOADED)
    3530        6705 :                         BATfree(b);     /* free memory */
    3531        6705 :                 BBPuncacheit(bid, false);
    3532             :         }
    3533        6705 :         TRC_DEBUG(BAT_, "turn off unloading %d\n", bid);
    3534        6705 :         BBP_status_off(bid, BBPUNLOADING);
    3535        6705 :         BBP_unload_dec();
    3536        6705 :         return ret;
    3537             : }
    3538             : 
    3539             : /*
    3540             :  * BBPquickdesc loads a BAT descriptor without loading the entire BAT,
    3541             :  * of which the result be used only for a *limited* number of
    3542             :  * purposes. Specifically, during the global sync/commit, we do not
    3543             :  * want to load any BATs that are not already loaded, both because
    3544             :  * this costs performance, and because getting into memory shortage
    3545             :  * during a commit is extremely dangerous. Loading a BAT tends not to
    3546             :  * be required, since the commit actions mostly involve moving some
    3547             :  * pointers in the BAT descriptor.
    3548             :  */
    3549             : BAT *
    3550     1651466 : BBPquickdesc(bat bid)
    3551             : {
    3552     1651466 :         BAT *b;
    3553             : 
    3554     1651466 :         if (!BBPcheck(bid)) {
    3555         186 :                 if (!is_bat_nil(bid)) {
    3556           0 :                         GDKerror("called with invalid batid.\n");
    3557           0 :                         assert(0);
    3558             :                 }
    3559             :                 return NULL;
    3560             :         }
    3561     1651142 :         BBPspin(bid, __func__, BBPWAITING);
    3562     1651644 :         b = BBP_desc(bid);
    3563     1651644 :         if (b->ttype < 0) {
    3564         241 :                 const char *aname = ATOMunknown_name(b->ttype);
    3565         241 :                 int tt = ATOMindex(aname);
    3566         241 :                 if (tt < 0) {
    3567           0 :                         GDKwarning("atom '%s' unknown in bat '%s'.\n",
    3568             :                                    aname, BBP_physical(bid));
    3569             :                 } else {
    3570         241 :                         b->ttype = tt;
    3571             :                 }
    3572             :         }
    3573             :         return b;
    3574             : }
    3575             : 
    3576             : /*
    3577             :  * @+ Global Commit
    3578             :  */
    3579             : static BAT *
    3580     1723109 : dirty_bat(bat *i, bool subcommit)
    3581             : {
    3582     1723109 :         if (BBPvalid(*i)) {
    3583     1706929 :                 BAT *b;
    3584     1706929 :                 BBPspin(*i, __func__, BBPSAVING);
    3585     1706929 :                 if (BBP_status(*i) & BBPLOADED) {
    3586     1640081 :                         b = BBP_desc(*i);
    3587     1640081 :                         MT_lock_set(&b->theaplock);
    3588     1736806 :                         if ((BBP_status(*i) & BBPNEW) &&
    3589       96725 :                             BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */
    3590           0 :                                 *i = -*i;       /* error */
    3591     1640081 :                         else if ((BBP_status(*i) & BBPPERSISTENT) &&
    3592           0 :                                  (subcommit || BATdirty(b))) {
    3593     1462035 :                                 MT_lock_unset(&b->theaplock);
    3594     1462035 :                                 return b;       /* the bat is loaded, persistent and dirty */
    3595             :                         }
    3596      178046 :                         MT_lock_unset(&b->theaplock);
    3597       66848 :                 } else if (subcommit)
    3598       61788 :                         return BBP_desc(*i);
    3599             :         }
    3600             :         return NULL;
    3601             : }
    3602             : 
    3603             : /*
    3604             :  * @- backup-bat
    3605             :  * Backup-bat moves all files of a BAT to a backup directory. Only
    3606             :  * after this succeeds, it may be saved. If some failure occurs
    3607             :  * halfway saving, we can thus always roll back.
    3608             :  */
    3609             : static gdk_return
    3610      199601 : file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext)
    3611             : {
    3612      199601 :         if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext, false) == GDK_SUCCEED) {
    3613             :                 return GDK_SUCCEED;
    3614             :         } else {
    3615           0 :                 char *path;
    3616           0 :                 struct stat st;
    3617             : 
    3618           0 :                 path = GDKfilepath(farmid, srcdir, name, ext);
    3619           0 :                 if (path == NULL)
    3620           0 :                         return GDK_FAIL;
    3621           0 :                 if (MT_stat(path, &st)) {
    3622             :                         /* source file does not exist; the best
    3623             :                          * recovery is to give an error but continue
    3624             :                          * by considering the BAT as not saved; making
    3625             :                          * sure that this time it does get saved.
    3626             :                          */
    3627           0 :                         GDKsyserror("file_move: cannot stat %s\n", path);
    3628           0 :                         GDKfree(path);
    3629           0 :                         return GDK_FAIL;        /* fishy, but not fatal */
    3630             :                 }
    3631           0 :                 GDKfree(path);
    3632             :         }
    3633           0 :         return GDK_FAIL;
    3634             : }
    3635             : 
    3636             : /* returns true if the file exists */
    3637             : static bool
    3638     2669021 : file_exists(int farmid, const char *dir, const char *name, const char *ext)
    3639             : {
    3640     2669021 :         char *path;
    3641     2669021 :         struct stat st;
    3642     2669021 :         int ret = -1;
    3643             : 
    3644     2669021 :         path = GDKfilepath(farmid, dir, name, ext);
    3645     2669021 :         if (path) {
    3646     2669021 :                 ret = MT_stat(path, &st);
    3647     2669021 :                 TRC_DEBUG(IO_, "stat(%s) = %d\n", path, ret);
    3648     2669021 :                 GDKfree(path);
    3649             :         }
    3650     2669021 :         return (ret == 0);
    3651             : }
    3652             : 
    3653             : static gdk_return
    3654      199597 : heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext)
    3655             : {
    3656             :         /* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap
    3657             :          * heap modes */
    3658      199597 :         if (file_exists(hp->farmid, dstdir, nme, ext)) {
    3659             :                 /* dont overwrite heap with the committed state
    3660             :                  * already in dstdir */
    3661             :                 return GDK_SUCCEED;
    3662      199597 :         } else if (hp->newstorage == STORE_PRIV &&
    3663           0 :                    !file_exists(hp->farmid, srcdir, nme, ext)) {
    3664             : 
    3665             :                 /* In order to prevent half-saved X.new files
    3666             :                  * surviving a recover we create a dummy file in the
    3667             :                  * BACKUP(dstdir) whose presence will trigger
    3668             :                  * BBPrecover to remove them.  Thus, X will prevail
    3669             :                  * where it otherwise wouldn't have.  If X already has
    3670             :                  * a saved X.new, that one is backed up as normal.
    3671             :                  */
    3672             : 
    3673           0 :                 FILE *fp;
    3674           0 :                 long_str kill_ext;
    3675           0 :                 char *path;
    3676             : 
    3677           0 :                 strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill", NULL);
    3678           0 :                 path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext);
    3679           0 :                 if (path == NULL)
    3680             :                         return GDK_FAIL;
    3681           0 :                 fp = MT_fopen(path, "w");
    3682           0 :                 if (fp == NULL)
    3683           0 :                         GDKsyserror("heap_move: cannot open file %s\n", path);
    3684           0 :                 TRC_DEBUG(IO_, "open %s = %d\n", path, fp ? 0 : -1);
    3685           0 :                 GDKfree(path);
    3686             : 
    3687           0 :                 if (fp != NULL) {
    3688           0 :                         fclose(fp);
    3689           0 :                         return GDK_SUCCEED;
    3690             :                 } else {
    3691             :                         return GDK_FAIL;
    3692             :                 }
    3693             :         }
    3694      199597 :         return file_move(hp->farmid, srcdir, dstdir, nme, ext);
    3695             : }
    3696             : 
    3697             : /*
    3698             :  * @- BBPprepare
    3699             :  *
    3700             :  * this routine makes sure there is a BAKDIR/, and initiates one if
    3701             :  * not.  For subcommits, it does the same with SUBDIR.
    3702             :  *
    3703             :  * It is now locked, to get proper file counters, and also to prevent
    3704             :  * concurrent BBPrecovers, etc.
    3705             :  *
    3706             :  * backup_dir == 0 => no backup BBP.dir
    3707             :  * backup_dir == 1 => BBP.dir saved in BACKUP/
    3708             :  * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/
    3709             :  */
    3710             : 
    3711             : static gdk_return
    3712       21858 : BBPprepare(bool subcommit)
    3713             : {
    3714       21858 :         bool start_subcommit;
    3715       21858 :         int set = 1 + subcommit;
    3716       21858 :         gdk_return ret = GDK_SUCCEED;
    3717             : 
    3718       21858 :         start_subcommit = (subcommit && backup_subdir == 0);
    3719       10743 :         if (start_subcommit) {
    3720             :                 /* starting a subcommit. Make sure SUBDIR and DELDIR
    3721             :                  * are clean */
    3722       10743 :                 ret = BBPrecover_subdir();
    3723       10743 :                 if (ret != GDK_SUCCEED)
    3724             :                         return ret;
    3725             :         }
    3726       21858 :         if (backup_files == 0) {
    3727         356 :                 backup_dir = 0;
    3728         356 :                 ret = BBPrecover(0);
    3729         356 :                 if (ret != GDK_SUCCEED)
    3730             :                         return ret;
    3731         356 :                 str bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL);
    3732         356 :                 if (bakdirpath == NULL) {
    3733             :                         return GDK_FAIL;
    3734             :                 }
    3735             : 
    3736         356 :                 if (MT_mkdir(bakdirpath) < 0 && errno != EEXIST) {
    3737           0 :                         GDKsyserror("cannot create directory %s\n", bakdirpath);
    3738           0 :                         GDKfree(bakdirpath);
    3739           0 :                         return GDK_FAIL;
    3740             :                 }
    3741             :                 /* if BAKDIR already exists, don't signal error */
    3742         356 :                 TRC_DEBUG(IO_, "mkdir %s = %d\n", bakdirpath, (int) ret);
    3743         356 :                 GDKfree(bakdirpath);
    3744             :         }
    3745       21858 :         if (start_subcommit) {
    3746             :                 /* make a new SUBDIR (subdir of BAKDIR) */
    3747       10743 :                 str subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
    3748       10743 :                 if (subdirpath == NULL) {
    3749             :                         return GDK_FAIL;
    3750             :                 }
    3751             : 
    3752       10743 :                 if (MT_mkdir(subdirpath) < 0) {
    3753           0 :                         GDKsyserror("cannot create directory %s\n", subdirpath);
    3754           0 :                         GDKfree(subdirpath);
    3755           0 :                         return GDK_FAIL;
    3756             :                 }
    3757       10743 :                 TRC_DEBUG(IO_, "mkdir %s\n", subdirpath);
    3758       10743 :                 GDKfree(subdirpath);
    3759             :         }
    3760       21858 :         if (backup_dir != set) {
    3761             :                 /* a valid backup dir *must* at least contain BBP.dir */
    3762       44040 :                 if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP", "dir", subcommit ? SUBDIR : BAKDIR, "BBP", "dir", true)) != GDK_SUCCEED)
    3763             :                         return ret;
    3764       21842 :                 backup_dir = set;
    3765             :         }
    3766             :         /* increase counters */
    3767       21858 :         backup_subdir += subcommit;
    3768       21858 :         backup_files++;
    3769             : 
    3770       21858 :         return ret;
    3771             : }
    3772             : 
    3773             : static gdk_return
    3774     1043307 : do_backup(Heap *h, bool dirty, bool subcommit)
    3775             : {
    3776     1043307 :         gdk_return ret = GDK_SUCCEED;
    3777     1043307 :         char extnew[16];
    3778             : 
    3779     1043307 :         if (h->wasempty) {
    3780             :                 return GDK_SUCCEED;
    3781             :         }
    3782             : 
    3783             :         /* direct mmap is unprotected (readonly usage, or has WAL
    3784             :          * protection) */
    3785     1043307 :         if (h->storage != STORE_MMAP) {
    3786             :                 /* STORE_PRIV saves into X.new files. Two cases could
    3787             :                  * happen. The first is when a valid X.new exists
    3788             :                  * because of an access change or a previous
    3789             :                  * commit. This X.new should be backed up as
    3790             :                  * usual. The second case is when X.new doesn't
    3791             :                  * exist. In that case we could have half written
    3792             :                  * X.new files (after a crash). To protect against
    3793             :                  * these we write X.new.kill files in the backup
    3794             :                  * directory (see heap_move). */
    3795     1035115 :                 gdk_return mvret = GDK_SUCCEED;
    3796             : 
    3797     1035115 :                 char *srcdir = GDKfilepath(NOFARM, BATDIR, h->filename, NULL);
    3798     1035115 :                 if (srcdir == NULL)
    3799             :                         return GDK_FAIL;
    3800     1035115 :                 char *nme = strrchr(srcdir, DIR_SEP);
    3801     1035115 :                 assert(nme != NULL);
    3802     1035115 :                 *nme++ = '\0';
    3803     1035115 :                 char *ext = strchr(nme, '.');
    3804     1035115 :                 assert(ext != NULL);
    3805     1035115 :                 *ext++ = '\0';
    3806             : 
    3807     1035115 :                 strconcat_len(extnew, sizeof(extnew), ext, ".new", NULL);
    3808     1234712 :                 if (dirty &&
    3809      399194 :                     !file_exists(h->farmid, BAKDIR, nme, extnew) &&
    3810      199597 :                     !file_exists(h->farmid, BAKDIR, nme, ext)) {
    3811             :                         /* if the heap is dirty and there is no heap
    3812             :                          * file (with or without .new extension) in
    3813             :                          * the BAKDIR, move the heap (preferably with
    3814             :                          * .new extension) to the correct backup
    3815             :                          * directory */
    3816      199597 :                         if (file_exists(h->farmid, srcdir, nme, extnew)) {
    3817           0 :                                 mvret = heap_move(h, srcdir,
    3818             :                                                   subcommit ? SUBDIR : BAKDIR,
    3819             :                                                   nme, extnew);
    3820      199597 :                         } else if (file_exists(h->farmid, srcdir, nme, ext)) {
    3821      199597 :                                 mvret = heap_move(h, srcdir,
    3822             :                                                   subcommit ? SUBDIR : BAKDIR,
    3823             :                                                   nme, ext);
    3824      199597 :                                 if (mvret == GDK_SUCCEED) {
    3825             :                                         /* file no longer in "standard"
    3826             :                                          * location */
    3827      199597 :                                         h->hasfile = false;
    3828             :                                 }
    3829             :                         }
    3830      835518 :                 } else if (subcommit) {
    3831             :                         /* if subcommit, we may need to move an
    3832             :                          * already made backup from BAKDIR to
    3833             :                          * SUBDIR */
    3834      835518 :                         if (file_exists(h->farmid, BAKDIR, nme, extnew))
    3835           0 :                                 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew);
    3836      835518 :                         else if (file_exists(h->farmid, BAKDIR, nme, ext))
    3837           4 :                                 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext);
    3838             :                 }
    3839             :                 /* there is a situation where the move may fail,
    3840             :                  * namely if this heap was not supposed to be existing
    3841             :                  * before, i.e. after a BATmaterialize on a persistent
    3842             :                  * bat; as a workaround, do not complain about move
    3843             :                  * failure if the source file is nonexistent
    3844             :                  */
    3845      199601 :                 if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) {
    3846     1035115 :                         ret = GDK_FAIL;
    3847             :                 }
    3848     1035115 :                 if (subcommit &&
    3849     1035115 :                     (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) {
    3850           0 :                         long_str kill_ext;
    3851             : 
    3852           0 :                         strconcat_len(kill_ext, sizeof(kill_ext),
    3853             :                                       ext, ".new.kill", NULL);
    3854           0 :                         if (file_exists(h->farmid, BAKDIR, nme, kill_ext) &&
    3855           0 :                             file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) {
    3856           0 :                                 ret = GDK_FAIL;
    3857             :                         }
    3858             :                 }
    3859     1035115 :                 GDKfree(srcdir);
    3860             :         }
    3861             :         return ret;
    3862             : }
    3863             : 
    3864             : static gdk_return
    3865      827514 : BBPbackup(BAT *b, bool subcommit)
    3866             : {
    3867      827514 :         gdk_return rc = GDK_SUCCEED;
    3868             : 
    3869      827514 :         MT_lock_set(&b->theaplock);
    3870      827514 :         BATiter bi = bat_iterator_nolock(b);
    3871      827514 :         if (!bi.copiedtodisk || bi.transient) {
    3872           1 :                 MT_lock_unset(&b->theaplock);
    3873           1 :                 return GDK_SUCCEED;
    3874             :         }
    3875      827513 :         assert(b->theap->parentid == b->batCacheid);
    3876      827513 :         if (b->oldtail && b->oldtail != (Heap *) 1) {
    3877        1658 :                 bi.h = b->oldtail;
    3878        1658 :                 bi.hdirty = b->oldtail->dirty;
    3879             :         }
    3880      827513 :         bat_iterator_incref(&bi);
    3881      827513 :         MT_lock_unset(&b->theaplock);
    3882             : 
    3883             :         /* determine location dir and physical suffix */
    3884      827513 :         if (bi.type != TYPE_void) {
    3885      827513 :                 rc = do_backup(bi.h, bi.hdirty, subcommit);
    3886      827513 :                 if (rc == GDK_SUCCEED && bi.vh != NULL)
    3887      215794 :                         rc = do_backup(bi.vh, bi.vhdirty, subcommit);
    3888             :         }
    3889      827513 :         bat_iterator_end(&bi);
    3890      827513 :         return rc;
    3891             : }
    3892             : 
    3893             : static inline void
    3894           0 : BBPcheckHeap(Heap *h)
    3895             : {
    3896           0 :         struct stat statb;
    3897           0 :         char *path;
    3898             : 
    3899           0 :         char *s = strrchr(h->filename, DIR_SEP);
    3900           0 :         if (s)
    3901           0 :                 s++;
    3902             :         else
    3903             :                 s = h->filename;
    3904           0 :         path = GDKfilepath(0, BAKDIR, s, NULL);
    3905           0 :         if (path == NULL)
    3906           0 :                 return;
    3907           0 :         if (MT_stat(path, &statb) < 0) {
    3908           0 :                 GDKfree(path);
    3909           0 :                 path = GDKfilepath(0, BATDIR, h->filename, NULL);
    3910           0 :                 if (path == NULL)
    3911             :                         return;
    3912           0 :                 if (MT_stat(path, &statb) < 0) {
    3913           0 :                         GDKsyserror("cannot stat file %s (expected size %zu)\n",
    3914             :                                     path, h->free);
    3915           0 :                         assert(0);
    3916             :                         GDKfree(path);
    3917             :                         return;
    3918             :                 }
    3919             :         }
    3920           0 :         assert((statb.st_mode & S_IFMT) == S_IFREG);
    3921           0 :         assert((size_t) statb.st_size >= h->free);
    3922           0 :         if ((size_t) statb.st_size < h->free) {
    3923             :                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, h->free, (size_t) statb.st_size);
    3924             :                 GDKfree(path);
    3925             :                 return;
    3926             :         }
    3927           0 :         GDKfree(path);
    3928             : }
    3929             : 
    3930             : static void
    3931           0 : BBPcheckBBPdir(void)
    3932             : {
    3933           0 :         FILE *fp;
    3934           0 :         int lineno = 0;
    3935           0 :         bat bbpsize = 0;
    3936           0 :         unsigned bbpversion;
    3937           0 :         lng logno;
    3938             : 
    3939           0 :         fp = GDKfileopen(0, BAKDIR, "BBP", "dir", "r");
    3940           0 :         assert(fp != NULL);
    3941           0 :         if (fp == NULL) {
    3942             :                 fp = GDKfileopen(0, BATDIR, "BBP", "dir", "r");
    3943             :                 assert(fp != NULL);
    3944             :                 if (fp == NULL)
    3945             :                         return;
    3946             :         }
    3947           0 :         bbpversion = BBPheader(fp, &lineno, &bbpsize, &logno, false);
    3948           0 :         if (bbpversion == 0) {
    3949           0 :                 fclose(fp);
    3950           0 :                 return;         /* error reading file */
    3951             :         }
    3952           0 :         assert(bbpversion == GDKLIBRARY);
    3953             : 
    3954           0 :         for (;;) {
    3955           0 :                 BAT b;
    3956           0 :                 Heap h;
    3957           0 :                 Heap vh;
    3958           0 :                 vh = h = (Heap) {
    3959             :                         .free = 0,
    3960             :                 };
    3961           0 :                 b = (BAT) {
    3962             :                         .theap = &h,
    3963             :                         .tvheap = &vh,
    3964             :                 };
    3965           0 :                 char filename[sizeof(BBP_physical(0))];
    3966           0 :                 char batname[129];
    3967             : #ifdef GDKLIBRARY_HASHASH
    3968           0 :                 int hashash;
    3969             : #endif
    3970             : 
    3971           0 :                 switch (BBPreadBBPline(fp, bbpversion, &lineno, &b,
    3972             : #ifdef GDKLIBRARY_HASHASH
    3973             :                                        &hashash,
    3974             : #endif
    3975             :                                        batname, filename, NULL)) {
    3976           0 :                 case 0:
    3977             :                         /* end of file */
    3978           0 :                         fclose(fp);
    3979             :                         /* don't leak errors, this is just debug code */
    3980           0 :                         GDKclrerr();
    3981           0 :                         return;
    3982             :                 case 1:
    3983             :                         /* successfully read an entry */
    3984           0 :                         break;
    3985           0 :                 default:
    3986             :                         /* error */
    3987           0 :                         fclose(fp);
    3988           0 :                         return;
    3989             :                 }
    3990             : #ifdef GDKLIBRARY_HASHASH
    3991           0 :                 assert(hashash == 0);
    3992             : #endif
    3993           0 :                 assert(b.batCacheid < (bat) ATOMIC_GET(&BBPsize));
    3994           0 :                 assert(b.hseqbase <= GDK_oid_max);
    3995           0 :                 if (b.ttype == TYPE_void) {
    3996             :                         /* no files needed */
    3997           0 :                         continue;
    3998             :                 }
    3999           0 :                 if (b.theap->free > 0)
    4000           0 :                         BBPcheckHeap(b.theap);
    4001           0 :                 if (b.tvheap != NULL && b.tvheap->free > 0)
    4002           0 :                         BBPcheckHeap(b.tvheap);
    4003             :         }
    4004             : }
    4005             : 
    4006             : /*
    4007             :  * @+ Atomic Write
    4008             :  * The atomic BBPsync() function first safeguards the old images of
    4009             :  * all files to be written in BAKDIR. It then saves all files. If that
    4010             :  * succeeds fully, BAKDIR is renamed to DELDIR. The rename is
    4011             :  * considered an atomic action. If it succeeds, the DELDIR is removed.
    4012             :  * If something fails, the pre-sync status can be obtained by moving
    4013             :  * back all backed up files; this is done by BBPrecover().
    4014             :  *
    4015             :  * The BBP.dir is also moved into the BAKDIR.
    4016             :  */
    4017             : gdk_return
    4018       10759 : BBPsync(int cnt, bat *restrict subcommit, BUN *restrict sizes, lng logno)
    4019             : {
    4020       10759 :         gdk_return ret = GDK_SUCCEED;
    4021       10759 :         lng t0 = 0, t1 = 0;
    4022       10759 :         str bakdir, deldir;
    4023       10759 :         const bool lock = locked_by == 0 || locked_by != MT_getpid();
    4024       10759 :         char buf[3000];
    4025       10759 :         int n = subcommit ? 0 : -1;
    4026          16 :         FILE *obbpf, *nbbpf;
    4027             : 
    4028       10759 :         if ((bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL)) == NULL)
    4029             :                 return GDK_FAIL;
    4030       10759 :         if ((deldir = GDKfilepath(0, NULL, DELDIR, NULL)) == NULL) {
    4031           0 :                 GDKfree(bakdir);
    4032           0 :                 return GDK_FAIL;
    4033             :         }
    4034             : 
    4035       10759 :         TRC_DEBUG_IF(PERF) t0 = t1 = GDKusec();
    4036             : 
    4037       10759 :         if ((ATOMIC_GET(&GDKdebug) & TAILCHKMASK) && !GDKinmemory(0))
    4038           0 :                 BBPcheckBBPdir();
    4039             : 
    4040       10759 :         ret = BBPprepare(subcommit != NULL);
    4041             : 
    4042       10759 :         if (ret == GDK_SUCCEED) {
    4043       10759 :                 ret = BBPdir_first(subcommit != NULL, logno, &obbpf, &nbbpf);
    4044             :         }
    4045             : 
    4046     1733868 :         for (int idx = 1; ret == GDK_SUCCEED && idx < cnt; idx++) {
    4047     1723109 :                 bat i = subcommit ? subcommit[idx] : idx;
    4048     1723109 :                 BUN size = sizes ? sizes[idx] : BUN_NONE;
    4049     1723109 :                 BATiter bi, *bip;
    4050             : 
    4051     1723109 :                 const bat bid = i;
    4052     1723109 :                 if (lock)
    4053     1701869 :                         MT_lock_set(&GDKswapLock(bid));
    4054             :                 /* set flag that we're syncing, i.e. that we'll
    4055             :                  * be between moving heap to backup dir and
    4056             :                  * saving the new version, in other words, the
    4057             :                  * heap may not exist in the usual location */
    4058     1723109 :                 BBP_status_on(bid, BBPSYNCING);
    4059             :                 /* wait until unloading is finished before
    4060             :                  * attempting to make a backup */
    4061     1723109 :                 while (BBP_status(bid) & BBPUNLOADING) {
    4062           0 :                         if (lock)
    4063           0 :                                 MT_lock_unset(&GDKswapLock(bid));
    4064           0 :                         BBPspin(bid, __func__, BBPUNLOADING);
    4065           0 :                         if (lock)
    4066     1723109 :                                 MT_lock_set(&GDKswapLock(bid));
    4067             :                 }
    4068     1723109 :                 BAT *b = dirty_bat(&i, subcommit != NULL);
    4069     1723109 :                 if (i <= 0)
    4070             :                         ret = GDK_FAIL;
    4071     1723109 :                 else if (BBP_status(bid) & BBPEXISTING &&
    4072     1423115 :                          b != NULL &&
    4073     1423115 :                          b->batInserted > 0)
    4074      827514 :                         ret = BBPbackup(b, subcommit != NULL);
    4075             : 
    4076     1723109 :                 if (lock)
    4077     1701869 :                         MT_lock_unset(&GDKswapLock(bid));
    4078             : 
    4079     1723109 :                 if (ret != GDK_SUCCEED)
    4080             :                         break;
    4081             : 
    4082     1723109 :                 if (BBP_status(i) & BBPPERSISTENT) {
    4083     1525362 :                         MT_lock_set(&BBP_desc(i)->theaplock);
    4084     1525362 :                         bi = bat_iterator_nolock(BBP_desc(i));
    4085     1525362 :                         bat_iterator_incref(&bi);
    4086     1525362 :                         assert(sizes == NULL || size <= bi.count);
    4087     1519280 :                         assert(sizes == NULL || bi.width == 0 || (bi.type == TYPE_msk ? ((size + 31) / 32) * 4 : size << bi.shift) <= bi.hfree);
    4088     1525362 :                         if (size > bi.count) /* includes sizes==NULL */
    4089             :                                 size = bi.count;
    4090     1525362 :                         bi.b->batInserted = size;
    4091     1525362 :                         if (bi.b->ttype >= 0 && ATOMvarsized(bi.b->ttype)) {
    4092             :                                 /* see epilogue() for other part of this */
    4093             :                                 /* remember the tail we're saving */
    4094      345714 :                                 if (BATsetprop_nolock(bi.b, (enum prop_t) 20, TYPE_ptr, &bi.h) == NULL) {
    4095           0 :                                         GDKerror("setprop failed\n");
    4096           0 :                                         ret = GDK_FAIL;
    4097             :                                 } else {
    4098      345714 :                                         if (bi.b->oldtail == NULL)
    4099      343745 :                                                 bi.b->oldtail = (Heap *) 1;
    4100      345714 :                                         HEAPincref(bi.h);
    4101             :                                 }
    4102             :                         }
    4103     1525362 :                         MT_lock_unset(&bi.b->theaplock);
    4104     1525362 :                         if (ret == GDK_SUCCEED && b && size != 0) {
    4105             :                                 /* wait for BBPSAVING so that we
    4106             :                                  * can set it, wait for
    4107             :                                  * BBPUNLOADING before
    4108             :                                  * attempting to save */
    4109      892744 :                                 for (;;) {
    4110      892744 :                                         if (lock)
    4111      892744 :                                                 MT_lock_set(&GDKswapLock(i));
    4112      892744 :                                         if (!(BBP_status(i) & (BBPSAVING|BBPUNLOADING)))
    4113             :                                                 break;
    4114           0 :                                         if (lock)
    4115           0 :                                                 MT_lock_unset(&GDKswapLock(i));
    4116           0 :                                         BBPspin(i, __func__, BBPSAVING|BBPUNLOADING);
    4117             :                                 }
    4118      892744 :                                 BBP_status_on(i, BBPSAVING);
    4119      892744 :                                 if (lock)
    4120      892744 :                                         MT_lock_unset(&GDKswapLock(i));
    4121      892744 :                                 ret = BATsave_iter(b, &bi, size);
    4122      892744 :                                 BBP_status_off(i, BBPSAVING);
    4123             :                         }
    4124             :                         bip = &bi;
    4125             :                 } else {
    4126             :                         bip = NULL;
    4127             :                 }
    4128     1723109 :                 if (ret == GDK_SUCCEED) {
    4129     1723109 :                         n = BBPdir_step(i, size, n, buf, sizeof(buf), &obbpf, nbbpf, bip);
    4130     1723109 :                         if (n < -1)
    4131           0 :                                 ret = GDK_FAIL;
    4132             :                 }
    4133     1723109 :                 if (bip)
    4134     1525362 :                         bat_iterator_end(bip);
    4135             :                 /* we once again have a saved heap */
    4136             :         }
    4137             : 
    4138       10759 :         TRC_DEBUG(PERF, "write time "LLFMT" usec\n", (t0 = GDKusec()) - t1);
    4139             : 
    4140       10759 :         if (ret == GDK_SUCCEED) {
    4141       10759 :                 ret = BBPdir_last(n, buf, sizeof(buf), obbpf, nbbpf);
    4142             :         }
    4143             : 
    4144       10759 :         TRC_DEBUG(PERF, "dir time "LLFMT" usec, %d bats\n", (t1 = GDKusec()) - t0, (bat) ATOMIC_GET(&BBPsize));
    4145             : 
    4146       10759 :         if (ret == GDK_SUCCEED) {
    4147             :                 /* atomic switchover */
    4148             :                 /* this is the big one: this call determines
    4149             :                  * whether the operation of this function
    4150             :                  * succeeded, so no changing of ret after this
    4151             :                  * call anymore */
    4152             : 
    4153       10759 :                 if (MT_rename(bakdir, deldir) < 0 &&
    4154             :                     /* maybe there was an old deldir, so remove and try again */
    4155           0 :                     (GDKremovedir(0, DELDIR) != GDK_SUCCEED ||
    4156           0 :                      MT_rename(bakdir, deldir) < 0))
    4157           0 :                         ret = GDK_FAIL;
    4158           0 :                 if (ret != GDK_SUCCEED)
    4159           0 :                         GDKsyserror("rename(%s,%s) failed\n", bakdir, deldir);
    4160       10759 :                 TRC_DEBUG(IO_, "rename %s %s = %d\n", bakdir, deldir, (int) ret);
    4161             :         }
    4162             : 
    4163             :         /* AFTERMATH */
    4164       10759 :         if (ret == GDK_SUCCEED) {
    4165       10759 :                 ATOMIC_SET(&BBPlogno, logno);       /* the new value */
    4166       10759 :                 backup_files = subcommit ? (backup_files - backup_subdir) : 0;
    4167       10759 :                 backup_dir = backup_subdir = 0;
    4168       10759 :                 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED)
    4169           0 :                         fprintf(stderr, "#BBPsync: cannot remove directory %s\n", DELDIR);
    4170       10759 :                 (void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */
    4171       10759 :                 if (backup_files > 1) {
    4172       10743 :                         TRC_DEBUG(PERF, "backup_files %d > 1\n", backup_files);
    4173       10743 :                         backup_files = 1;
    4174             :                 }
    4175             :         }
    4176       10759 :         TRC_DEBUG(PERF, "%s (ready time "LLFMT" usec)\n",
    4177             :                   ret == GDK_SUCCEED ? "" : " failed",
    4178             :                   (t0 = GDKusec()) - t1);
    4179             : 
    4180       10759 :         if (ret != GDK_SUCCEED) {
    4181             :                 /* clean up extra refs we created */
    4182           0 :                 for (int idx = 1; idx < cnt; idx++) {
    4183           0 :                         bat i = subcommit ? subcommit[idx] : idx;
    4184           0 :                         BAT *b = BBP_desc(i);
    4185           0 :                         if (ATOMvarsized(b->ttype)) {
    4186           0 :                                 MT_lock_set(&b->theaplock);
    4187           0 :                                 ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
    4188           0 :                                 if (p != NULL) {
    4189           0 :                                         HEAPdecref(p->val.pval, false);
    4190           0 :                                         BATrmprop_nolock(b, (enum prop_t) 20);
    4191             :                                 }
    4192           0 :                                 MT_lock_unset(&b->theaplock);
    4193             :                         }
    4194             :                 }
    4195             :         }
    4196             : 
    4197             :         /* turn off the BBPSYNCING bits for all bats, even when things
    4198             :          * didn't go according to plan (i.e., don't check for ret ==
    4199             :          * GDK_SUCCEED) */
    4200     1733868 :         for (int idx = 1; idx < cnt; idx++) {
    4201     1723109 :                 bat i = subcommit ? subcommit[idx] : idx;
    4202     1723109 :                 BBP_status_off(i, BBPSYNCING);
    4203             :         }
    4204             : 
    4205       10759 :         GDKfree(bakdir);
    4206       10759 :         GDKfree(deldir);
    4207       10759 :         return ret;
    4208             : }
    4209             : 
    4210             : /*
    4211             :  * Recovery just moves all files back to their original location. this
    4212             :  * is an incremental process: if something fails, just stop with still
    4213             :  * files left for moving in BACKUP/.  The recovery process can resume
    4214             :  * later with the left over files.
    4215             :  */
    4216             : static gdk_return
    4217           0 : force_move(int farmid, const char *srcdir, const char *dstdir, const char *name)
    4218             : {
    4219           0 :         const char *p;
    4220           0 :         char *dstpath, *killfile;
    4221           0 :         gdk_return ret = GDK_SUCCEED;
    4222             : 
    4223           0 :         if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill") == 0) {
    4224             :                 /* Found a X.new.kill file, ie remove the X.new file */
    4225           0 :                 ptrdiff_t len = p - name;
    4226           0 :                 long_str srcpath;
    4227             : 
    4228           0 :                 strncpy(srcpath, name, len);
    4229           0 :                 srcpath[len] = '\0';
    4230           0 :                 if ((dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL)) == NULL) {
    4231             :                         return GDK_FAIL;
    4232             :                 }
    4233             : 
    4234             :                 /* step 1: remove the X.new file that is going to be
    4235             :                  * overridden by X */
    4236           0 :                 if (MT_remove(dstpath) != 0 && errno != ENOENT) {
    4237             :                         /* if it exists and cannot be removed, all
    4238             :                          * this is going to fail */
    4239           0 :                         GDKsyserror("force_move: remove(%s)\n", dstpath);
    4240           0 :                         GDKfree(dstpath);
    4241           0 :                         return GDK_FAIL;
    4242             :                 }
    4243           0 :                 GDKfree(dstpath);
    4244             : 
    4245             :                 /* step 2: now remove the .kill file. This one is
    4246             :                  * crucial, otherwise we'll never finish recovering */
    4247           0 :                 if ((killfile = GDKfilepath(farmid, srcdir, name, NULL)) == NULL) {
    4248             :                         return GDK_FAIL;
    4249             :                 }
    4250           0 :                 if (MT_remove(killfile) != 0) {
    4251           0 :                         ret = GDK_FAIL;
    4252           0 :                         GDKsyserror("force_move: remove(%s)\n", killfile);
    4253             :                 }
    4254           0 :                 GDKfree(killfile);
    4255           0 :                 return ret;
    4256             :         }
    4257             :         /* try to rename it */
    4258           0 :         ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, false);
    4259             : 
    4260           0 :         if (ret != GDK_SUCCEED) {
    4261           0 :                 char *srcpath;
    4262             : 
    4263           0 :                 GDKclrerr();
    4264             :                 /* two legal possible causes: file exists or dir
    4265             :                  * doesn't exist */
    4266           0 :                 if ((dstpath = GDKfilepath(farmid, dstdir, name, NULL)) == NULL)
    4267             :                         return GDK_FAIL;
    4268           0 :                 if ((srcpath = GDKfilepath(farmid, srcdir, name, NULL)) == NULL) {
    4269           0 :                         GDKfree(dstpath);
    4270           0 :                         return GDK_FAIL;
    4271             :                 }
    4272           0 :                 if (MT_remove(dstpath) != 0)    /* clear destination */
    4273           0 :                         ret = GDK_FAIL;
    4274           0 :                 TRC_DEBUG(IO_, "remove %s = %d\n", dstpath, (int) ret);
    4275             : 
    4276           0 :                 (void) GDKcreatedir(dstdir); /* if fails, move will fail */
    4277           0 :                 ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, true);
    4278           0 :                 TRC_DEBUG(IO_, "link %s %s = %d\n", srcpath, dstpath, (int) ret);
    4279           0 :                 GDKfree(dstpath);
    4280           0 :                 GDKfree(srcpath);
    4281             :         }
    4282             :         return ret;
    4283             : }
    4284             : 
    4285             : gdk_return
    4286         356 : BBPrecover(int farmid)
    4287             : {
    4288         356 :         str bakdirpath;
    4289         356 :         str leftdirpath;
    4290         356 :         DIR *dirp;
    4291         356 :         struct dirent *dent;
    4292         356 :         long_str path, dstpath;
    4293         356 :         bat i;
    4294         356 :         size_t j = strlen(BATDIR);
    4295         356 :         gdk_return ret = GDK_SUCCEED;
    4296         356 :         bool dirseen = false;
    4297         356 :         str dstdir;
    4298             : 
    4299         356 :         bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL);
    4300         356 :         leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL);
    4301         356 :         if (bakdirpath == NULL || leftdirpath == NULL) {
    4302           0 :                 GDKfree(bakdirpath);
    4303           0 :                 GDKfree(leftdirpath);
    4304           0 :                 return GDK_FAIL;
    4305             :         }
    4306         356 :         dirp = opendir(bakdirpath);
    4307         356 :         if (dirp == NULL) {
    4308         237 :                 if (errno != ENOENT)
    4309           0 :                         GDKsyserror("cannot open directory %s\n", bakdirpath);
    4310         237 :                 GDKfree(bakdirpath);
    4311         237 :                 GDKfree(leftdirpath);
    4312         237 :                 return GDK_SUCCEED;     /* nothing to do */
    4313             :         }
    4314         119 :         memcpy(dstpath, BATDIR, j);
    4315         119 :         dstpath[j] = DIR_SEP;
    4316         119 :         dstpath[++j] = 0;
    4317         119 :         dstdir = dstpath + j;
    4318         119 :         TRC_DEBUG(IO_, "start\n");
    4319             : 
    4320         119 :         if (MT_mkdir(leftdirpath) < 0 && errno != EEXIST) {
    4321           0 :                 GDKsyserror("cannot create directory %s\n", leftdirpath);
    4322           0 :                 closedir(dirp);
    4323           0 :                 GDKfree(bakdirpath);
    4324           0 :                 GDKfree(leftdirpath);
    4325           0 :                 return GDK_FAIL;
    4326             :         }
    4327             : 
    4328             :         /* move back all files */
    4329         357 :         while ((dent = readdir(dirp)) != NULL) {
    4330         238 :                 const char *q = strchr(dent->d_name, '.');
    4331             : 
    4332         238 :                 if (q == dent->d_name) {
    4333         238 :                         char *fn;
    4334             : 
    4335         238 :                         if (strcmp(dent->d_name, ".") == 0 ||
    4336         119 :                             strcmp(dent->d_name, "..") == 0)
    4337         238 :                                 continue;
    4338           0 :                         fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL);
    4339           0 :                         if (fn) {
    4340           0 :                                 int uret = MT_remove(fn);
    4341           0 :                                 TRC_DEBUG(IO_, "remove %s = %d\n",
    4342             :                                           fn, uret);
    4343           0 :                                 GDKfree(fn);
    4344             :                         }
    4345           0 :                         continue;
    4346           0 :                 } else if (strcmp(dent->d_name, "BBP.dir") == 0) {
    4347           0 :                         dirseen = true;
    4348           0 :                         continue;
    4349             :                 }
    4350           0 :                 if (q == NULL)
    4351           0 :                         q = dent->d_name + strlen(dent->d_name);
    4352           0 :                 if ((j = q - dent->d_name) + 1 > sizeof(path)) {
    4353             :                         /* name too long: ignore */
    4354           0 :                         continue;
    4355             :                 }
    4356           0 :                 strncpy(path, dent->d_name, j);
    4357           0 :                 path[j] = 0;
    4358           0 :                 if (GDKisdigit(*path)) {
    4359           0 :                         i = strtol(path, NULL, 8);
    4360             :                 } else {
    4361           0 :                         i = BBP_find(path, false);
    4362           0 :                         if (i < 0)
    4363           0 :                                 i = -i;
    4364             :                 }
    4365           0 :                 if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) {
    4366           0 :                         force_move(farmid, BAKDIR, LEFTDIR, dent->d_name);
    4367             :                 } else {
    4368           0 :                         BBPgetsubdir(dstdir, i);
    4369           0 :                         if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED) {
    4370             :                                 ret = GDK_FAIL;
    4371             :                                 break;
    4372             :                         }
    4373             :                         /* don't trust index files after recovery */
    4374           0 :                         GDKunlink(farmid, dstpath, path, "thashl");
    4375           0 :                         GDKunlink(farmid, dstpath, path, "thashb");
    4376           0 :                         GDKunlink(farmid, dstpath, path, "timprints");
    4377           0 :                         GDKunlink(farmid, dstpath, path, "torderidx");
    4378           0 :                         GDKunlink(farmid, dstpath, path, "tstrimps");
    4379             :                 }
    4380             :         }
    4381         119 :         closedir(dirp);
    4382         119 :         if (dirseen && ret == GDK_SUCCEED) {    /* we have a saved BBP.dir; it should be moved back!! */
    4383           0 :                 struct stat st;
    4384           0 :                 char *fn;
    4385             : 
    4386           0 :                 fn = GDKfilepath(farmid, BATDIR, "BBP", "dir");
    4387           0 :                 if (fn == NULL) {
    4388             :                         ret = GDK_FAIL;
    4389             :                 } else {
    4390           0 :                         ret = recover_dir(farmid, MT_stat(fn, &st) == 0);
    4391           0 :                         GDKfree(fn);
    4392             :                 }
    4393             :         }
    4394             : 
    4395         119 :         if (ret == GDK_SUCCEED) {
    4396         119 :                 if (MT_rmdir(bakdirpath) < 0) {
    4397           0 :                         GDKsyserror("cannot remove directory %s\n", bakdirpath);
    4398           0 :                         ret = GDK_FAIL;
    4399             :                 }
    4400         119 :                 TRC_DEBUG(IO_, "rmdir %s = %d\n", bakdirpath, (int) ret);
    4401             :         }
    4402         119 :         if (ret != GDK_SUCCEED)
    4403           0 :                 GDKerror("recovery failed.\n");
    4404             : 
    4405         119 :         TRC_DEBUG(IO_, "end\n");
    4406         119 :         GDKfree(bakdirpath);
    4407         119 :         GDKfree(leftdirpath);
    4408         119 :         return ret;
    4409             : }
    4410             : 
    4411             : /*
    4412             :  * SUBDIR recovery is quite mindlessly moving all files back to the
    4413             :  * parent (BAKDIR).  We do recognize moving back BBP.dir and set
    4414             :  * backed_up_subdir accordingly.
    4415             :  */
    4416             : gdk_return
    4417       11083 : BBPrecover_subdir(void)
    4418             : {
    4419       11083 :         str subdirpath;
    4420       11083 :         DIR *dirp;
    4421       11083 :         struct dirent *dent;
    4422       11083 :         gdk_return ret = GDK_SUCCEED;
    4423             : 
    4424       11083 :         subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
    4425       11083 :         if (subdirpath == NULL)
    4426             :                 return GDK_FAIL;
    4427       11083 :         dirp = opendir(subdirpath);
    4428       11083 :         if (dirp == NULL && errno != ENOENT)
    4429           0 :                 GDKsyserror("cannot open directory %s\n", subdirpath);
    4430       11083 :         GDKfree(subdirpath);
    4431       11083 :         if (dirp == NULL) {
    4432             :                 return GDK_SUCCEED;     /* nothing to do */
    4433             :         }
    4434           0 :         TRC_DEBUG(IO_, "start\n");
    4435             : 
    4436             :         /* move back all files */
    4437           0 :         while ((dent = readdir(dirp)) != NULL) {
    4438           0 :                 if (dent->d_name[0] == '.')
    4439           0 :                         continue;
    4440           0 :                 ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL, true);
    4441           0 :                 if (ret != GDK_SUCCEED)
    4442             :                         break;
    4443           0 :                 if (strcmp(dent->d_name, "BBP.dir") == 0)
    4444           0 :                         backup_dir = 1;
    4445             :         }
    4446           0 :         closedir(dirp);
    4447             : 
    4448             :         /* delete the directory */
    4449           0 :         if (ret == GDK_SUCCEED) {
    4450           0 :                 ret = GDKremovedir(0, SUBDIR);
    4451           0 :                 if (backup_dir == 2) {
    4452           0 :                         TRC_DEBUG(IO_, "%s%cBBP.dir had disappeared!\n", SUBDIR, DIR_SEP);
    4453           0 :                         backup_dir = 0;
    4454             :                 }
    4455             :         }
    4456           0 :         TRC_DEBUG(IO_, "end = %d\n", (int) ret);
    4457             : 
    4458           0 :         if (ret != GDK_SUCCEED)
    4459           0 :                 GDKerror("recovery failed.\n");
    4460             :         return ret;
    4461             : }
    4462             : 
    4463             : /*
    4464             :  * @- The diskscan
    4465             :  * The BBPdiskscan routine walks through the BAT dir, cleans up
    4466             :  * leftovers, and measures disk occupancy.  Leftovers are files that
    4467             :  * cannot belong to a BAT. in order to establish this for [ht]heap
    4468             :  * files, the BAT descriptor is loaded in order to determine whether
    4469             :  * these files are still required.
    4470             :  *
    4471             :  * The routine gathers all bat sizes in a bat that contains bat-ids
    4472             :  * and bytesizes. The return value is the number of bytes of space
    4473             :  * freed.
    4474             :  */
    4475             : static bool
    4476       27578 : persistent_bat(bat bid)
    4477             : {
    4478       27578 :         if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) {
    4479       27578 :                 BAT *b = BBP_desc(bid);
    4480       27578 :                 if ((BBP_status(bid) & BBPLOADED) == 0 || b->batCopiedtodisk) {
    4481             :                         return true;
    4482             :                 }
    4483             :         }
    4484             :         return false;
    4485             : }
    4486             : 
    4487             : static BAT *
    4488       27578 : getdesc(bat bid)
    4489             : {
    4490       27578 :         BAT *b = NULL;
    4491             : 
    4492       27578 :         if (is_bat_nil(bid))
    4493             :                 return NULL;
    4494       27578 :         assert(bid > 0);
    4495       27578 :         if (bid < (bat) ATOMIC_GET(&BBPsize) && BBP_logical(bid))
    4496       27578 :                 b = BBP_desc(bid);
    4497       27578 :         if (b == NULL)
    4498           0 :                 BBPclear(bid);
    4499             :         return b;
    4500             : }
    4501             : 
    4502             : static bool
    4503        1958 : BBPdiskscan(const char *parent, size_t baseoff)
    4504             : {
    4505        1958 :         DIR *dirp = opendir(parent);
    4506        1958 :         struct dirent *dent;
    4507        1958 :         char fullname[FILENAME_MAX];
    4508        1958 :         str dst;
    4509        1958 :         size_t dstlen;
    4510        1958 :         const char *src = parent;
    4511             : 
    4512        1958 :         if (dirp == NULL) {
    4513         176 :                 if (errno != ENOENT)
    4514           0 :                         GDKsyserror("cannot open directory %s\n", parent);
    4515         176 :                 return true;    /* nothing to do */
    4516             :         }
    4517             : 
    4518        1782 :         dst = stpcpy(fullname, src);
    4519        1782 :         if (dst > fullname && dst[-1] != DIR_SEP)
    4520        1782 :                 *dst++ = DIR_SEP;
    4521        1782 :         dstlen = sizeof(fullname) - (dst - fullname);
    4522             : 
    4523       36496 :         while ((dent = readdir(dirp)) != NULL) {
    4524       32932 :                 const char *p;
    4525       32932 :                 bat bid;
    4526       32932 :                 bool ok, delete;
    4527             : 
    4528       32932 :                 if (dent->d_name[0] == '.')
    4529        3564 :                         continue;       /* ignore .dot files and directories (. ..) */
    4530             : 
    4531             : #ifdef GDKLIBRARY_JSON
    4532       29368 :                 if (strcmp(dent->d_name, "jsonupgradeneeded") == 0) {
    4533           8 :                         continue; /* ignore json upgrade signal file  */
    4534             :                 }
    4535             : #endif
    4536             : 
    4537       29360 :                 if (strncmp(dent->d_name, "BBP.", 4) == 0 &&
    4538         340 :                     (strcmp(parent + baseoff, BATDIR) == 0 ||
    4539         340 :                      strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 ||
    4540           0 :                      strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0))
    4541         340 :                         continue;
    4542             : 
    4543       29020 :                 p = strchr(dent->d_name, '.');
    4544             : 
    4545       29020 :                 if (strlen(dent->d_name) >= dstlen) {
    4546             :                         /* found a file with too long a name
    4547             :                            (i.e. unknown); stop pruning in this
    4548             :                            subdir */
    4549           0 :                         fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
    4550           0 :                         break;
    4551             :                 }
    4552       29020 :                 strncpy(dst, dent->d_name, dstlen);
    4553       29020 :                 fullname[sizeof(fullname) - 1] = 0;
    4554             : 
    4555       29020 :                 if (p == NULL && !BBPdiskscan(fullname, baseoff)) {
    4556             :                         /* it was a directory */
    4557        1442 :                         continue;
    4558             :                 }
    4559             : 
    4560       27578 :                 if (p && strcmp(p + 1, "tmp") == 0) {
    4561             :                         delete = true;
    4562             :                         ok = true;
    4563       27578 :                         bid = 0;
    4564             :                 } else {
    4565       27578 :                         bid = strtol(dent->d_name, NULL, 8);
    4566       27578 :                         ok = p && bid;
    4567       27578 :                         delete = false;
    4568             : 
    4569       27578 :                         if (!ok || !persistent_bat(bid)) {
    4570             :                                 delete = true;
    4571       27578 :                         } else if (strncmp(p + 1, "tail", 4) == 0) {
    4572       20771 :                                 BAT *b = getdesc(bid);
    4573       20771 :                                 delete = (b == NULL || !b->ttype || !b->batCopiedtodisk || b->batCount == 0);
    4574       20771 :                                 assert(b == NULL || b->batCount > 0 || b->theap->free == 0);
    4575       20771 :                                 if (!delete) {
    4576       20768 :                                         if (b->ttype == TYPE_str) {
    4577        5581 :                                                 switch (b->twidth) {
    4578        3247 :                                                 case 1:
    4579        3247 :                                                         delete = strcmp(p + 1, "tail1") != 0;
    4580        3247 :                                                         break;
    4581        1935 :                                                 case 2:
    4582        1935 :                                                         delete = strcmp(p + 1, "tail2") != 0;
    4583        1935 :                                                         break;
    4584             : #if SIZEOF_VAR_T == 8
    4585         399 :                                                 case 4:
    4586         399 :                                                         delete = strcmp(p + 1, "tail4") != 0;
    4587         399 :                                                         break;
    4588             : #endif
    4589           0 :                                                 default:
    4590           0 :                                                         delete = strcmp(p + 1, "tail") != 0;
    4591           0 :                                                         break;
    4592             :                                                 }
    4593             :                                         } else {
    4594       15187 :                                                 delete = strcmp(p + 1, "tail") != 0;
    4595             :                                         }
    4596             :                                 }
    4597        6807 :                         } else if (strncmp(p + 1, "theap", 5) == 0) {
    4598        5849 :                                 BAT *b = getdesc(bid);
    4599        5849 :                                 delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk || b->tvheap->free == 0);
    4600         958 :                         } else if (strncmp(p + 1, "thashl", 6) == 0 ||
    4601         480 :                                    strncmp(p + 1, "thashb", 6) == 0) {
    4602             : #ifdef PERSISTENTHASH
    4603         956 :                                 BAT *b = getdesc(bid);
    4604         956 :                                 delete = b == NULL;
    4605         956 :                                 if (!delete)
    4606         956 :                                         b->thash = (Hash *) 1;
    4607             : #else
    4608             :                                 delete = true;
    4609             : #endif
    4610           2 :                         } else if (strncmp(p + 1, "thash", 5) == 0) {
    4611             :                                 /* older versions used .thash which we
    4612             :                                  * can simply ignore */
    4613             :                                 delete = true;
    4614           2 :                         } else if (strncmp(p + 1, "thsh", 4) == 0) {
    4615             :                                 /* temporary hash files which we can
    4616             :                                  * simply ignore */
    4617             :                                 delete = true;
    4618           2 :                         } else if (strncmp(p + 1, "timprints", 9) == 0) {
    4619           0 :                                 BAT *b = getdesc(bid);
    4620           0 :                                 delete = b == NULL;
    4621           0 :                                 if (!delete)
    4622           0 :                                         b->timprints = (Imprints *) 1;
    4623           2 :                         } else if (strncmp(p + 1, "torderidx", 9) == 0) {
    4624             : #ifdef PERSISTENTIDX
    4625           0 :                                 BAT *b = getdesc(bid);
    4626           0 :                                 delete = b == NULL;
    4627           0 :                                 if (!delete)
    4628           0 :                                         b->torderidx = (Heap *) 1;
    4629             : #else
    4630             :                                 delete = true;
    4631             : #endif
    4632           2 :                         } else if (strncmp(p + 1, "tstrimps", 8) == 0) {
    4633           2 :                                 BAT *b = getdesc(bid);
    4634           2 :                                 delete = b == NULL;
    4635           2 :                                 if (!delete)
    4636           2 :                                         b->tstrimps = (Strimps *)1;
    4637           0 :                         } else if (strncmp(p + 1, "new", 3) != 0) {
    4638       27578 :                                 ok = false;
    4639             :                         }
    4640             :                 }
    4641       27578 :                 if (!ok) {
    4642             :                         /* found an unknown file; stop pruning in this
    4643             :                          * subdir */
    4644           0 :                         fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
    4645           0 :                         break;
    4646             :                 }
    4647       27578 :                 if (delete) {
    4648           3 :                         if (MT_remove(fullname) != 0 && errno != ENOENT) {
    4649           0 :                                 GDKsyserror("remove(%s)", fullname);
    4650           0 :                                 continue;
    4651             :                         }
    4652       34717 :                         TRC_DEBUG(IO_, "remove(%s) = 0\n", fullname);
    4653             :                 }
    4654             :         }
    4655        1782 :         closedir(dirp);
    4656        1782 :         return false;
    4657             : }
    4658             : 
    4659             : void
    4660         339 : gdk_bbp_reset(void)
    4661             : {
    4662         339 :         int i;
    4663             : 
    4664         339 :         BBP_free = 0;
    4665         339 :         BBP_nfree = 0;
    4666         340 :         while (BBPlimit > BBPINIT) {
    4667           1 :                 BBPlimit -= BBPINIT;
    4668           1 :                 assert(BBPlimit >= 0);
    4669           1 :                 GDKfree(BBP[BBPlimit >> BBPINITLOG]);
    4670           1 :                 BBP[BBPlimit >> BBPINITLOG] = NULL;
    4671             :         }
    4672         339 :         ATOMIC_SET(&BBPsize, 0);
    4673         339 :         memset(BBP0, 0, sizeof(BBP0));
    4674       11187 :         for (i = 0; i < MAXFARMS; i++)
    4675       10848 :                 GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */
    4676         339 :         memset(BBPfarms, 0, sizeof(BBPfarms));
    4677         339 :         memset(BBP_hash, 0, sizeof(BBP_hash));
    4678             : 
    4679         339 :         locked_by = 0;
    4680         339 :         BBPunloadCnt = 0;
    4681         339 :         backup_files = 0;
    4682         339 :         backup_dir = 0;
    4683         339 :         backup_subdir = 0;
    4684         339 : }
    4685             : 
    4686             : static MT_Lock GDKCallbackListLock = MT_LOCK_INITIALIZER(GDKCallbackListLock);
    4687             : 
    4688             : static struct {
    4689             :         int cnt;
    4690             :         gdk_callback *head;
    4691             : } callback_list = {
    4692             :         .cnt = 0,
    4693             :         .head = NULL,
    4694             : };
    4695             : 
    4696             : /*
    4697             :  * @- Add a callback
    4698             :  * Adds new callback to the callback list.
    4699             :  */
    4700             : gdk_return
    4701           0 : gdk_add_callback(char *name, gdk_callback_func *f, int argc, void *argv[], int
    4702             :                 interval)
    4703             : {
    4704             : 
    4705           0 :         gdk_callback *callback = NULL;
    4706             : 
    4707           0 :         if (!(callback = GDKmalloc(sizeof(gdk_callback) + sizeof(void *) * argc))) {
    4708           0 :                 TRC_CRITICAL(GDK, "Failed to allocate memory!");
    4709           0 :                 return GDK_FAIL;
    4710             :         }
    4711             : 
    4712           0 :         *callback = (gdk_callback) {
    4713             :                 .name = name,
    4714             :                 .argc = argc,
    4715             :                 .interval = interval,
    4716             :                 .func = f,
    4717             :         };
    4718             : 
    4719           0 :         for (int i=0; i < argc; i++) {
    4720           0 :                 callback->argv[i] = argv[i];
    4721             :         }
    4722             : 
    4723           0 :         MT_lock_set(&GDKCallbackListLock);
    4724           0 :         gdk_callback *p = callback_list.head;
    4725           0 :         if (p) {
    4726             :                 int cnt = 1;
    4727           0 :                 do {
    4728             :                         /* check if already added */
    4729           0 :                         if (strcmp(callback->name, p->name) == 0) {
    4730           0 :                                 MT_lock_unset(&GDKCallbackListLock);
    4731           0 :                                 GDKfree(callback);
    4732           0 :                                 return GDK_FAIL;
    4733             :                         }
    4734           0 :                         if (p->next == NULL) {
    4735           0 :                                 p->next = callback;
    4736           0 :                                 p = callback->next;
    4737             :                         } else {
    4738             :                                 p = p->next;
    4739             :                         }
    4740           0 :                         cnt += 1;
    4741           0 :                 } while(p);
    4742           0 :                 callback_list.cnt = cnt;
    4743             :         } else {
    4744           0 :                 callback_list.cnt = 1;
    4745           0 :                 callback_list.head = callback;
    4746             :         }
    4747           0 :         MT_lock_unset(&GDKCallbackListLock);
    4748           0 :         return GDK_SUCCEED;
    4749             : }
    4750             : 
    4751             : /*
    4752             :  * @- Remove a callback
    4753             :  * Removes a callback from the callback list with a given name as an argument.
    4754             :  */
    4755             : gdk_return
    4756           0 : gdk_remove_callback(char *cb_name, gdk_callback_func *argsfree)
    4757             : {
    4758           0 :         gdk_callback *prev = NULL;
    4759           0 :         gdk_return res = GDK_FAIL;
    4760             : 
    4761           0 :         MT_lock_set(&GDKCallbackListLock);
    4762           0 :         gdk_callback *curr = callback_list.head;
    4763           0 :         while(curr) {
    4764           0 :                 if (strcmp(cb_name, curr->name) == 0) {
    4765           0 :                         if (curr == callback_list.head && prev == NULL) {
    4766           0 :                                 callback_list.head = curr->next;
    4767             :                         } else {
    4768           0 :                                 prev->next = curr->next;
    4769             :                         }
    4770           0 :                         if (argsfree)
    4771           0 :                                 argsfree(curr->argc, curr->argv);
    4772           0 :                         GDKfree(curr);
    4773           0 :                         curr = NULL;
    4774           0 :                         callback_list.cnt -=1;
    4775           0 :                         res = GDK_SUCCEED;
    4776             :                 } else {
    4777           0 :                         prev = curr;
    4778           0 :                         curr = curr->next;
    4779             :                 }
    4780             :         }
    4781           0 :         MT_lock_unset(&GDKCallbackListLock);
    4782           0 :         return res;
    4783             : }
    4784             : 
    4785             : static gdk_return
    4786           0 : do_callback(gdk_callback *cb)
    4787             : {
    4788           0 :         cb->last_called = GDKusec();
    4789           0 :         return cb->func(cb->argc, cb->argv);
    4790             : }
    4791             : 
    4792             : static bool
    4793           0 : should_call(gdk_callback *cb)
    4794             : {
    4795           0 :         if (cb->last_called && cb->interval) {
    4796           0 :                 return (cb->last_called + cb->interval * 1000 * 1000) <
    4797           0 :                         GDKusec();
    4798             :         }
    4799             :         return true;
    4800             : }
    4801             : 
    4802             : static void
    4803          34 : BBPcallbacks(void)
    4804             : {
    4805          34 :         MT_lock_set(&GDKCallbackListLock);
    4806          34 :         gdk_callback *next = callback_list.head;
    4807             : 
    4808          34 :         while (next) {
    4809           0 :                 if(should_call(next))
    4810           0 :                         do_callback(next);
    4811           0 :                 next = next->next;
    4812             :         }
    4813          34 :         MT_lock_unset(&GDKCallbackListLock);
    4814          34 : }
    4815             : 
    4816             : /* GDKtmLock protects all accesses and changes to BAKDIR and SUBDIR.
    4817             :  * MUST use BBPtmlock()/BBPtmunlock() to set/unset the lock.
    4818             :  *
    4819             :  * This is at the end of the file on purpose: we don't want people to
    4820             :  * accidentally use GDKtmLock directly. */
    4821             : static MT_Lock GDKtmLock = MT_LOCK_INITIALIZER(GDKtmLock);
    4822             : static int lockfd;
    4823             : 
    4824             : static void
    4825       44630 : BBPtmlockFinish(void)
    4826             : {
    4827       44630 :         if (!GDKinmemory(0) &&
    4828             :             /* also use an external lock file to synchronize with
    4829             :              * external programs */
    4830       44630 :             (lockfile != NULL ||
    4831         340 :              (lockfile = GDKfilepath(0, NULL, ".tm_lock", NULL)) != NULL)) {
    4832       44630 :                     lockfd = MT_lockf(lockfile, F_LOCK);
    4833             :         }
    4834       44630 : }
    4835             : 
    4836             : void
    4837       44630 : BBPtmlock(void)
    4838             : {
    4839       44630 :         MT_lock_set(&GDKtmLock);
    4840       44630 :         BBPtmlockFinish();
    4841       44630 : }
    4842             : 
    4843             : void
    4844       44630 : BBPtmunlock(void)
    4845             : {
    4846       44630 :         if (lockfile && lockfd >= 0) {
    4847       44630 :                 assert(!GDKinmemory(0));
    4848       44630 :                 MT_lockf(lockfile, F_ULOCK);
    4849       44630 :                 close(lockfd);
    4850       44630 :                 lockfd = -1;
    4851             :         }
    4852       44630 :         MT_lock_unset(&GDKtmLock);
    4853       44630 : }
    4854             : 
    4855             : void
    4856         114 : BBPprintinfo(void)
    4857             : {
    4858             :         /* 32 categories for the bats, not all are expected to be filled */
    4859         114 :         struct counters {
    4860             :                 size_t sz;
    4861             :                 size_t vmsz;
    4862             :                 int nr;
    4863         114 :         } bats[2][2][2][2][2] = {0};
    4864         114 :         int nbats = 0;
    4865             : 
    4866         114 :         BBPtmlock();
    4867         114 :         bat sz = (bat) ATOMIC_GET(&BBPsize);
    4868      345820 :         for (bat i = 1; i < sz; i++) {
    4869      345706 :                 MT_lock_set(&GDKswapLock(i));
    4870      345706 :                 int r;
    4871      345706 :                 if ((r = BBP_refs(i)) > 0 || BBP_lrefs(i) > 0) {
    4872      293585 :                         BAT *b = BBP_desc(i);
    4873      293585 :                         nbats++;
    4874      293585 :                         MT_lock_set(&b->theaplock);
    4875      293585 :                         ATOMIC_BASE_TYPE status = BBP_status(i);
    4876      293585 :                         struct counters *bt = &bats[r > 0][BATdirty(b)][(status & BBPPERSISTENT) != 0][(status & BBPLOADED) != 0][(status & BBPHOT) != 0];
    4877      293585 :                         bt->nr++;
    4878      293585 :                         if (b->theap && b->batCacheid == b->theap->parentid) {
    4879      293585 :                                 bt->sz += HEAPmemsize(b->theap);
    4880      293585 :                                 bt->vmsz += HEAPvmsize(b->theap);
    4881             :                         }
    4882      293585 :                         if (b->tvheap && b->batCacheid == b->tvheap->parentid) {
    4883       10838 :                                 bt->sz += HEAPmemsize(b->tvheap);
    4884       10838 :                                 bt->vmsz += HEAPvmsize(b->tvheap);
    4885             :                         }
    4886      293585 :                         MT_lock_unset(&b->theaplock);
    4887             :                 }
    4888      345706 :                 MT_lock_unset(&GDKswapLock(i));
    4889             :         }
    4890         114 :         uint32_t nfree = BBP_nfree;
    4891         114 :         BBPtmunlock();
    4892         114 :         if (bats[1][1][1][1][1].nr > 0)
    4893         114 :                 printf("fix, dirty, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][1][1].nr, bats[1][1][1][1][1].vmsz, bats[1][1][1][1][1].sz);
    4894         114 :         if (bats[1][1][1][1][0].nr > 0)
    4895           0 :                 printf("fix, dirty, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][1][0].nr, bats[1][1][1][1][0].vmsz, bats[1][1][1][1][0].sz);
    4896         114 :         if (bats[1][1][1][0][1].nr > 0)
    4897           0 :                 printf("fix, dirty, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][0][1].nr, bats[1][1][1][0][1].vmsz, bats[1][1][1][0][1].sz);
    4898         114 :         if (bats[1][1][1][0][0].nr > 0)
    4899           0 :                 printf("fix, dirty, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][0][0].nr, bats[1][1][1][0][0].vmsz, bats[1][1][1][0][0].sz);
    4900         114 :         if (bats[1][1][0][1][1].nr > 0)
    4901         114 :                 printf("fix, dirty, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][1][1].nr, bats[1][1][0][1][1].vmsz, bats[1][1][0][1][1].sz);
    4902         114 :         if (bats[1][1][0][1][0].nr > 0)
    4903           1 :                 printf("fix, dirty, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][1][0].nr, bats[1][1][0][1][0].vmsz, bats[1][1][0][1][0].sz);
    4904         114 :         if (bats[1][1][0][0][1].nr > 0)
    4905           0 :                 printf("fix, dirty, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][0][1].nr, bats[1][1][0][0][1].vmsz, bats[1][1][0][0][1].sz);
    4906         114 :         if (bats[1][1][0][0][0].nr > 0)
    4907           0 :                 printf("fix, dirty, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][0][0].nr, bats[1][1][0][0][0].vmsz, bats[1][1][0][0][0].sz);
    4908         114 :         if (bats[1][0][1][1][1].nr > 0)
    4909         105 :                 printf("fix, clean, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][1][1].nr, bats[1][0][1][1][1].vmsz, bats[1][0][1][1][1].sz);
    4910         114 :         if (bats[1][0][1][1][0].nr > 0)
    4911           0 :                 printf("fix, clean, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][1][0].nr, bats[1][0][1][1][0].vmsz, bats[1][0][1][1][0].sz);
    4912         114 :         if (bats[1][0][1][0][1].nr > 0)
    4913           0 :                 printf("fix, clean, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][0][1].nr, bats[1][0][1][0][1].vmsz, bats[1][0][1][0][1].sz);
    4914         114 :         if (bats[1][0][1][0][0].nr > 0)
    4915           0 :                 printf("fix, clean, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][0][0].nr, bats[1][0][1][0][0].vmsz, bats[1][0][1][0][0].sz);
    4916         114 :         if (bats[1][0][0][1][1].nr > 0)
    4917           0 :                 printf("fix, clean, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][1][1].nr, bats[1][0][0][1][1].vmsz, bats[1][0][0][1][1].sz);
    4918         114 :         if (bats[1][0][0][1][0].nr > 0)
    4919           0 :                 printf("fix, clean, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][1][0].nr, bats[1][0][0][1][0].vmsz, bats[1][0][0][1][0].sz);
    4920         114 :         if (bats[1][0][0][0][1].nr > 0)
    4921           0 :                 printf("fix, clean, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][0][1].nr, bats[1][0][0][0][1].vmsz, bats[1][0][0][0][1].sz);
    4922         114 :         if (bats[1][0][0][0][0].nr > 0)
    4923           0 :                 printf("fix, clean, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][0][0].nr, bats[1][0][0][0][0].vmsz, bats[1][0][0][0][0].sz);
    4924         114 :         if (bats[0][1][1][1][1].nr > 0)
    4925         111 :                 printf("no fix, dirty, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][1][1].nr, bats[0][1][1][1][1].vmsz, bats[0][1][1][1][1].sz);
    4926         114 :         if (bats[0][1][1][1][0].nr > 0)
    4927          14 :                 printf("no fix, dirty, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][1][0].nr, bats[0][1][1][1][0].vmsz, bats[0][1][1][1][0].sz);
    4928         114 :         if (bats[0][1][1][0][1].nr > 0)
    4929           0 :                 printf("no fix, dirty, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][0][1].nr, bats[0][1][1][0][1].vmsz, bats[0][1][1][0][1].sz);
    4930         114 :         if (bats[0][1][1][0][0].nr > 0)
    4931           1 :                 printf("no fix, dirty, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][0][0].nr, bats[0][1][1][0][0].vmsz, bats[0][1][1][0][0].sz);
    4932         114 :         if (bats[0][1][0][1][1].nr > 0)
    4933         103 :                 printf("no fix, dirty, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][1][1].nr, bats[0][1][0][1][1].vmsz, bats[0][1][0][1][1].sz);
    4934         114 :         if (bats[0][1][0][1][0].nr > 0)
    4935          12 :                 printf("no fix, dirty, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][1][0].nr, bats[0][1][0][1][0].vmsz, bats[0][1][0][1][0].sz);
    4936         114 :         if (bats[0][1][0][0][1].nr > 0)
    4937           0 :                 printf("no fix, dirty, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][0][1].nr, bats[0][1][0][0][1].vmsz, bats[0][1][0][0][1].sz);
    4938         114 :         if (bats[0][1][0][0][0].nr > 0)
    4939           4 :                 printf("no fix, dirty, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][0][0].nr, bats[0][1][0][0][0].vmsz, bats[0][1][0][0][0].sz);
    4940         114 :         if (bats[0][0][1][1][1].nr > 0)
    4941         114 :                 printf("no fix, clean, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][1][1].nr, bats[0][0][1][1][1].vmsz, bats[0][0][1][1][1].sz);
    4942         114 :         if (bats[0][0][1][1][0].nr > 0)
    4943          11 :                 printf("no fix, clean, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][1][0].nr, bats[0][0][1][1][0].vmsz, bats[0][0][1][1][0].sz);
    4944         114 :         if (bats[0][0][1][0][1].nr > 0)
    4945           1 :                 printf("no fix, clean, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][0][1].nr, bats[0][0][1][0][1].vmsz, bats[0][0][1][0][1].sz);
    4946         114 :         if (bats[0][0][1][0][0].nr > 0)
    4947          10 :                 printf("no fix, clean, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][0][0].nr, bats[0][0][1][0][0].vmsz, bats[0][0][1][0][0].sz);
    4948         114 :         if (bats[0][0][0][1][1].nr > 0)
    4949           2 :                 printf("no fix, clean, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][1][1].nr, bats[0][0][0][1][1].vmsz, bats[0][0][0][1][1].sz);
    4950         114 :         if (bats[0][0][0][1][0].nr > 0)
    4951           1 :                 printf("no fix, clean, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][1][0].nr, bats[0][0][0][1][0].vmsz, bats[0][0][0][1][0].sz);
    4952         114 :         if (bats[0][0][0][0][1].nr > 0)
    4953           0 :                 printf("no fix, clean, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][0][1].nr, bats[0][0][0][0][1].vmsz, bats[0][0][0][0][1].sz);
    4954         114 :         if (bats[0][0][0][0][0].nr > 0)
    4955           1 :                 printf("no fix, clean, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][0][0].nr, bats[0][0][0][0][0].vmsz, bats[0][0][0][0][0].sz);
    4956             : 
    4957         114 :         printf("%d bats total, %d in use, %"PRIu32" free bats in common shared list\n",
    4958             :                sz - 1, nbats, nfree);
    4959         114 : }

Generated by: LCOV version 1.14