LCOV - code coverage report
Current view: top level - gdk - gdk_bbp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1741 2783 62.6 %
Date: 2024-04-25 20:03:45 Functions: 80 92 87.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a M. L. Kersten, P. Boncz, N. J. Nes
      15             :  * @* BAT Buffer Pool (BBP)
      16             :  * The BATs created and loaded are collected in a BAT buffer pool.
      17             :  * The Bat Buffer Pool has a number of functions:
      18             :  * @table @code
      19             :  *
      20             :  * @item administration and lookup
      21             :  * The BBP is a directory which contains status information about all
      22             :  * known BATs.  This interface may be used very heavily, by
      23             :  * data-intensive applications.  To eliminate all overhead, read-only
      24             :  * access to the BBP may be done by table-lookups. The integer index
      25             :  * type for these lookups is @emph{bat}, as retrieved by
      26             :  * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil
      27             :  * bat.
      28             :  *
      29             :  * @item persistence
      30             :  * The BBP is made persistent by saving it to the dictionary file
      31             :  * called @emph{BBP.dir} in the database.
      32             :  *
      33             :  * When the number of BATs rises, having all files in one directory
      34             :  * becomes a bottleneck.  The BBP therefore implements a scheme that
      35             :  * distributes all BATs in a growing directory tree with at most 64
      36             :  * BATs stored in one node.
      37             :  *
      38             :  * @item buffer management
      39             :  * The BBP is responsible for loading and saving of BATs to disk. It
      40             :  * also contains routines to unload BATs from memory when memory
      41             :  * resources get scarce. For this purpose, it administers BAT memory
      42             :  * reference counts (to know which BATs can be unloaded) and BAT usage
      43             :  * statistics (it unloads the least recently used BATs).
      44             :  *
      45             :  * @item recovery
      46             :  * When the database is closed or during a run-time syncpoint, the
      47             :  * system tables must be written to disk in a safe way, that is immune
      48             :  * for system failures (like disk full). To do so, the BBP implements
      49             :  * an atomic commit and recovery protocol: first all files to be
      50             :  * overwritten are moved to a BACKUP/ dir. If that succeeds, the
      51             :  * writes are done. If that also fully succeeds the BACKUP/ dir is
      52             :  * renamed to DELETE_ME/ and subsequently deleted.  If not, all files
      53             :  * in BACKUP/ are moved back to their original location.
      54             :  *
      55             :  * @item unloading
      56             :  * Bats which have a logical reference (ie. a lrefs > 0) but no memory
      57             :  * reference (refcnt == 0) can be unloaded. Unloading dirty bats
      58             :  * means, moving the original (committed version) to the BACKUP/ dir
      59             :  * and saving the bat. This complicates the commit and recovery/abort
      60             :  * issues.  The commit has to check if the bat is already moved. And
      61             :  * The recovery has to always move back the files from the BACKUP/
      62             :  * dir.
      63             :  *
      64             :  * @item reference counting
      65             :  * Bats use have two kinds of references: logical and physical
      66             :  * (pointer) ones.  The logical references are administered by
      67             :  * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix.
      68             :  */
      69             : 
      70             : #include "monetdb_config.h"
      71             : #include "gdk.h"
      72             : #include "gdk_private.h"
      73             : #include "mutils.h"
      74             : #ifdef HAVE_FCNTL_H
      75             : #include <fcntl.h>
      76             : #endif
      77             : 
      78             : #ifndef F_OK
      79             : #define F_OK 0
      80             : #endif
      81             : #ifndef S_ISDIR
      82             : #define S_ISDIR(mode)   (((mode) & _S_IFMT) == _S_IFDIR)
      83             : #endif
      84             : #ifndef O_CLOEXEC
      85             : #ifdef _O_NOINHERIT
      86             : #define O_CLOEXEC _O_NOINHERIT  /* Windows */
      87             : #else
      88             : #define O_CLOEXEC 0
      89             : #endif
      90             : #endif
      91             : #ifndef O_BINARY
      92             : #define O_BINARY 0
      93             : #endif
      94             : 
      95             : /*
      96             :  * The BBP has a fixed address, so re-allocation due to a growing BBP
      97             :  * caused by one thread does not disturb reads to the old entries by
      98             :  * another.  This is implemented using anonymous virtual memory;
      99             :  * extensions on the same address are guaranteed because a large
     100             :  * non-committed VM area is requested initially. New slots in the BBP
     101             :  * are found in O(1) by keeping a freelist that uses the 'next' field
     102             :  * in the BBPrec records.
     103             :  */
     104             : BBPrec *BBP[N_BBPINIT];         /* fixed base VM address of BBP array */
     105             : bat BBPlimit = 0;               /* current committed VM BBP array */
     106             : static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */
     107             : 
     108             : struct BBPfarm_t BBPfarms[MAXFARMS];
     109             : 
     110             : #define KITTENNAP 1             /* used to suspend processing */
     111             : #define BBPNONAME "."         /* filler for no name in BBP.dir */
     112             : /*
     113             :  * The hash index uses a bucket index (int array) of size mask that is
     114             :  * tuned for perfect hashing (1 lookup). The bucket chain uses the
     115             :  * 'next' field in the BBPrec records.
     116             :  */
     117             : static MT_Lock BBPnameLock = MT_LOCK_INITIALIZER(BBPnameLock);
     118             : #define BBP_mask        1023            /* number of buckets = & mask */
     119             : static bat BBP_hash[BBP_mask+1];        /* BBP logical name hash buckets */
     120             : static MT_Lock GDKcacheLock = MT_LOCK_INITIALIZER(GDKcacheLock);
     121             : static bat BBP_free;
     122             : static uint32_t BBP_nfree;
     123             : #define BBP_FREE_LOWATER        10
     124             : #define BBP_FREE_HIWATER        50
     125             : 
     126             : static gdk_return BBPfree(BAT *b);
     127             : static void BBPdestroy(BAT *b);
     128             : static void BBPuncacheit(bat bid, bool unloaddesc);
     129             : static gdk_return BBPprepare(bool subcommit);
     130             : static BAT *getBBPdescriptor(bat i);
     131             : static gdk_return BBPbackup(BAT *b, bool subcommit);
     132             : static gdk_return BBPdir_init(void);
     133             : static void BBPcallbacks(void);
     134             : 
     135             : /* two lngs of extra info in BBP.dir */
     136             : /* these two are atomic because of their use in log_new() */
     137             : static ATOMIC_TYPE BBPlogno = ATOMIC_VAR_INIT(0);
     138             : static ATOMIC_TYPE BBPtransid = ATOMIC_VAR_INIT(0);
     139             : 
     140             : #define BBPtmpcheck(s)  (strncmp(s, "tmp_", 4) == 0)
     141             : 
     142             : #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
     143             : 
     144             : #define BATno_shared_heap(b) \
     145             :         (!VIEWtparent(b) && (ATOMIC_GET(&(b)->theap->refs) & HEAPREFS) == 1)
     146             : 
     147             : #define BATshared(b) \
     148             :         ((!VIEWtparent(b) && (ATOMIC_GET(&(b)->theap->refs) & HEAPREFS) > 1) || \
     149             :          ((b)->tvheap && !VIEWvtparent(b) && (ATOMIC_GET(&(b)->tvheap->refs) & HEAPREFS) > 1))
     150             : 
     151             : static void
     152       26916 : BBP_insert(bat i)
     153             : {
     154       26916 :         bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
     155             : 
     156       26916 :         BBP_next(i) = BBP_hash[idx];
     157       26916 :         BBP_hash[idx] = i;
     158       26916 : }
     159             : 
     160             : static void
     161       14960 : BBP_delete(bat i)
     162             : {
     163       14960 :         const char *s = BBP_logical(i);
     164       14960 :         bat idx = (bat) (strHash(s) & BBP_mask);
     165             : 
     166       14960 :         for (bat *h = &BBP_hash[idx]; (i = *h) != 0; h = &BBP_next(i)) {
     167       14960 :                 if (strcmp(BBP_logical(i), s) == 0) {
     168       14960 :                         *h = BBP_next(i);
     169       14960 :                         break;
     170             :                 }
     171             :         }
     172       14960 : }
     173             : 
     174             : bat
     175   418055821 : getBBPsize(void)
     176             : {
     177   418055821 :         return (bat) ATOMIC_GET(&BBPsize);
     178             : }
     179             : 
     180             : lng
     181         368 : getBBPlogno(void)
     182             : {
     183         368 :         return (lng) ATOMIC_GET(&BBPlogno);
     184             : }
     185             : 
     186             : lng
     187         368 : getBBPtransid(void)
     188             : {
     189         368 :         return (lng) ATOMIC_GET(&BBPtransid);
     190             : }
     191             : 
     192             : 
     193             : /*
     194             :  * @+ BBP Consistency and Concurrency
     195             :  * While GDK provides the basic building blocks for an ACID system, in
     196             :  * itself it is not such a system, as we this would entail too much
     197             :  * overhead that is often not needed. Hence, some consistency control
     198             :  * is left to the user. The first important user constraint is that if
     199             :  * a user updates a BAT, (s)he himself must assure that no-one else
     200             :  * accesses this BAT.
     201             :  *
     202             :  * Concerning buffer management, the BBP carries out a swapping
     203             :  * policy.  BATs are kept in memory till the memory is full. If the
     204             :  * memory is full, the malloc functions initiate BBP trim actions,
     205             :  * that unload the coldest BATs that have a zero reference count. The
     206             :  * second important user constraint is therefore that a user may only
     207             :  * manipulate live BAT data in memory if it is sure that there is at
     208             :  * least one reference count to that BAT.
     209             :  *
     210             :  * The main BBP array is protected by two locks:
     211             :  * @table @code
     212             :  * @item GDKcacheLock]
     213             :  * this lock guards the free slot management in the BBP array.  The
     214             :  * BBP operations that allocate a new slot for a new BAT
     215             :  * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed
     216             :  * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold
     217             :  * this lock. It also protects all BAT (re)naming actions include
     218             :  * (read and write) in the hash table with BAT names.
     219             :  * @item GDKswapLock
     220             :  * this lock guards the swap (loaded/unloaded) status of the
     221             :  * BATs. Hence, all BBP routines that influence the swapping policy,
     222             :  * or actually carry out the swapping policy itself, acquire this lock
     223             :  * (e.g. @emph{BBPfix},@emph{BBPunfix}).  Note that this also means
     224             :  * that updates to the BBP_status indicator array must be protected by
     225             :  * GDKswapLock.
     226             :  *
     227             :  * To reduce contention GDKswapLock was split into multiple locks; it
     228             :  * is now an array of lock pointers which is accessed by
     229             :  * GDKswapLock(bat)
     230             :  * @end table
     231             :  *
     232             :  * Routines that need both locks should first acquire the locks in the
     233             :  * GDKswapLock array (in ascending order) and then GDKcacheLock (and
     234             :  * release them in reverse order).
     235             :  *
     236             :  * To obtain maximum speed, read operations to existing elements in
     237             :  * the BBP are unguarded. As said, it is the users responsibility that
     238             :  * the BAT that is being read is not being modified. BBP update
     239             :  * actions that modify the BBP data structure itself are locked by the
     240             :  * BBP functions themselves. Hence, multiple concurrent BBP read
     241             :  * operations may be ongoing while at the same time at most one BBP
     242             :  * write operation @strong{on a different BAT} is executing.  This
     243             :  * holds for accesses to the public (quasi-) arrays @emph{BBPcache},
     244             :  * @emph{BBPstatus} and @emph{BBPrefs}.
     245             :  * These arrays are called quasi as now they are
     246             :  * actually stored together in one big BBPrec array called BBP, that
     247             :  * is allocated in anonymous VM space, so we can reallocate this
     248             :  * structure without changing the base address (a crucial feature if
     249             :  * read actions are to go on unlocked while other entries in the BBP
     250             :  * may be modified).
     251             :  */
     252             : static volatile MT_Id locked_by = 0;
     253             : 
     254             : /* use a lock instead of atomic instructions so that we wait for
     255             :  * BBPlock/BBPunlock */
     256             : #define BBP_unload_inc()                        \
     257             :         do {                                    \
     258             :                 MT_lock_set(&GDKunloadLock);        \
     259             :                 BBPunloadCnt++;                 \
     260             :                 MT_lock_unset(&GDKunloadLock);      \
     261             :         } while (0)
     262             : 
     263             : #define BBP_unload_dec()                        \
     264             :         do {                                    \
     265             :                 MT_lock_set(&GDKunloadLock);        \
     266             :                 --BBPunloadCnt;                 \
     267             :                 assert(BBPunloadCnt >= 0);   \
     268             :                 MT_lock_unset(&GDKunloadLock);      \
     269             :         } while (0)
     270             : 
     271             : static int BBPunloadCnt = 0;
     272             : static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER(GDKunloadLock);
     273             : 
     274             : void
     275          16 : BBPlock(void)
     276             : {
     277          16 :         int i;
     278             : 
     279             :         /* wait for all pending unloads to finish */
     280          16 :         MT_lock_set(&GDKunloadLock);
     281          16 :         while (BBPunloadCnt > 0) {
     282           0 :                 MT_lock_unset(&GDKunloadLock);
     283           0 :                 MT_sleep_ms(1);
     284          16 :                 MT_lock_set(&GDKunloadLock);
     285             :         }
     286             : 
     287          16 :         BBPtmlock();
     288          16 :         MT_lock_set(&GDKcacheLock);
     289      131104 :         for (i = 0; i <= BBP_BATMASK; i++)
     290      131072 :                 MT_lock_set(&GDKswapLock(i));
     291          16 :         locked_by = MT_getpid();
     292             : 
     293          16 :         MT_lock_unset(&GDKunloadLock);
     294          16 : }
     295             : 
     296             : void
     297          16 : BBPunlock(void)
     298             : {
     299          16 :         int i;
     300             : 
     301      131088 :         for (i = BBP_BATMASK; i >= 0; i--)
     302      131072 :                 MT_lock_unset(&GDKswapLock(i));
     303          16 :         MT_lock_unset(&GDKcacheLock);
     304          16 :         locked_by = 0;
     305          16 :         BBPtmunlock();
     306          16 : }
     307             : 
     308             : int
     309    11705017 : BBPselectfarm(role_t role, int type, enum heaptype hptype)
     310             : {
     311    11705017 :         int i;
     312             : 
     313    11705017 :         (void) type;            /* may use in future */
     314    11705017 :         (void) hptype;          /* may use in future */
     315             : 
     316    11705017 :         if (GDKinmemory(0))
     317             :                 return 0;
     318             : 
     319             : #ifndef PERSISTENTHASH
     320             :         if (hptype == hashheap)
     321             :                 role = TRANSIENT;
     322             : #endif
     323             : #ifndef PERSISTENTIDX
     324             :         if (hptype == orderidxheap)
     325             :                 role = TRANSIENT;
     326             : #endif
     327    23044660 :         for (i = 0; i < MAXFARMS; i++)
     328    23044660 :                 if (BBPfarms[i].roles & (1U << (int) role))
     329    11684846 :                         return i;
     330             :         /* must be able to find farms for TRANSIENT and PERSISTENT */
     331           0 :         assert(role != TRANSIENT && role != PERSISTENT);
     332             :         return -1;
     333             : }
     334             : 
     335             : static gdk_return
     336         343 : BBPextend(bat newsize)
     337             : {
     338         343 :         if (newsize > N_BBPINIT * BBPINIT) {
     339           0 :                 GDKerror("trying to extend BAT pool beyond the "
     340             :                          "limit (%d)\n", N_BBPINIT * BBPINIT);
     341           0 :                 return GDK_FAIL;
     342             :         }
     343             : 
     344             :         /* make sure the new size is at least BBPsize large */
     345         686 :         while (BBPlimit < newsize) {
     346         343 :                 BUN limit = BBPlimit >> BBPINITLOG;
     347         343 :                 assert(BBP[limit] == NULL);
     348         343 :                 BBP[limit] = GDKzalloc(BBPINIT * sizeof(BBPrec));
     349         343 :                 if (BBP[limit] == NULL) {
     350           0 :                         GDKerror("failed to extend BAT pool\n");
     351           0 :                         return GDK_FAIL;
     352             :                 }
     353     5620055 :                 for (BUN i = 0; i < BBPINIT; i++) {
     354     5619712 :                         ATOMIC_INIT(&BBP[limit][i].status, 0);
     355     5619712 :                         BBP[limit][i].pid = ~(MT_Id)0;
     356             :                 }
     357         343 :                 BBPlimit += BBPINIT;
     358             :         }
     359             : 
     360             :         return GDK_SUCCEED;
     361             : }
     362             : 
     363             : static gdk_return
     364         112 : recover_dir(int farmid, bool direxists)
     365             : {
     366         112 :         if (direxists) {
     367             :                 /* just try; don't care about these non-vital files */
     368           0 :                 if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
     369           0 :                         GDKwarning("unlink of BBP.bak failed\n");
     370           0 :                 if (GDKmove(farmid, BATDIR, "BBP", "dir", BATDIR, "BBP", "bak", false) != GDK_SUCCEED)
     371           0 :                         GDKwarning("rename of BBP.dir to BBP.bak failed\n");
     372             :         }
     373         112 :         return GDKmove(farmid, BAKDIR, "BBP", "dir", BATDIR, "BBP", "dir", true);
     374             : }
     375             : 
     376             : static gdk_return BBPrecover(int farmid);
     377             : static gdk_return BBPrecover_subdir(void);
     378             : static bool BBPdiskscan(const char *, size_t);
     379             : 
     380             : static int
     381        8078 : vheapinit(BAT *b, const char *buf, unsigned bbpversion, const char *filename, int lineno)
     382             : {
     383        8078 :         int n = 0;
     384        8078 :         uint64_t free, size;
     385        8078 :         uint16_t storage;
     386             : 
     387        8078 :         (void) bbpversion;      /* could be used to implement compatibility */
     388             : 
     389        8078 :         size = 0;                             /* for GDKLIBRARY_HSIZE case */
     390        8078 :         storage = STORE_INVALID;              /* for GDKLIBRARY_HSIZE case */
     391       16156 :         if (bbpversion <= GDKLIBRARY_HSIZE ?
     392           0 :             sscanf(buf,
     393             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16
     394             :                    "%n",
     395             :                    &free, &size, &storage, &n) < 3 :
     396        8078 :             sscanf(buf,
     397             :                    " %" SCNu64
     398             :                    "%n",
     399             :                    &free, &n) < 1) {
     400           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     401           0 :                 return -1;
     402             :         }
     403        8078 :         if (b->batCount == 0)
     404        2321 :                 free = 0;
     405        8078 :         if (b->ttype >= 0 &&
     406        7864 :             ATOMstorage(b->ttype) == TYPE_str &&
     407        7810 :             free < GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN)
     408        6664 :                 size = GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN;
     409        1414 :         else if (free < 512)
     410         132 :                 size = 512;
     411             :         else
     412        1282 :                 size = free;
     413       16156 :         *b->tvheap = (Heap) {
     414        8078 :                 .free = (size_t) free,
     415        8078 :                 .size = (size_t) size,
     416             :                 .base = NULL,
     417             :                 .storage = STORE_INVALID,
     418             :                 .cleanhash = true,
     419             :                 .newstorage = STORE_INVALID,
     420             :                 .dirty = false,
     421        8078 :                 .parentid = b->batCacheid,
     422        8078 :                 .farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap),
     423        8078 :                 .hasfile = free > 0,
     424             :         };
     425        8078 :         strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
     426             :                       filename, ".theap", NULL);
     427        8078 :         return n;
     428             : }
     429             : 
     430             : static int
     431       30944 : heapinit(BAT *b, const char *buf,
     432             : #ifdef GDKLIBRARY_HASHASH
     433             :          int *hashash,
     434             : #endif
     435             :          unsigned bbpversion, const char *filename, int lineno)
     436             : {
     437       30944 :         int t;
     438       30944 :         char type[33];
     439       30944 :         uint16_t width;
     440       30944 :         uint16_t var;
     441       30944 :         uint16_t properties;
     442       30944 :         uint64_t nokey0;
     443       30944 :         uint64_t nokey1;
     444       30944 :         uint64_t nosorted;
     445       30944 :         uint64_t norevsorted;
     446       30944 :         uint64_t base;
     447       30944 :         uint64_t free;
     448       30944 :         uint64_t size;
     449       30944 :         uint16_t storage;
     450       30944 :         uint64_t minpos, maxpos;
     451       30944 :         int n;
     452             : 
     453       30944 :         (void) bbpversion;      /* could be used to implement compatibility */
     454             : 
     455       30944 :         minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case */
     456       30944 :         size = 0;                             /* for GDKLIBRARY_HSIZE case */
     457       30944 :         storage = STORE_INVALID;              /* for GDKLIBRARY_HSIZE case */
     458       61888 :         if (bbpversion <= GDKLIBRARY_MINMAX_POS ?
     459           0 :             sscanf(buf,
     460             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     461             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     462             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16
     463             :                    "%n",
     464             :                    type, &width, &var, &properties, &nokey0,
     465             :                    &nokey1, &nosorted, &norevsorted, &base,
     466             :                    &free, &size, &storage,
     467             :                    &n) < 12 :
     468             :             bbpversion <= GDKLIBRARY_HSIZE ?
     469           0 :             sscanf(buf,
     470             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     471             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     472             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16 " %" SCNu64 " %" SCNu64
     473             :                    "%n",
     474             :                    type, &width, &var, &properties, &nokey0,
     475             :                    &nokey1, &nosorted, &norevsorted, &base,
     476             :                    &free, &size, &storage, &minpos, &maxpos,
     477             :                    &n) < 14 :
     478       30944 :             sscanf(buf,
     479             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     480             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     481             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64
     482             :                    "%n",
     483             :                    type, &width, &var, &properties, &nokey0,
     484             :                    &nokey1, &nosorted, &norevsorted, &base,
     485             :                    &free, &minpos, &maxpos,
     486             :                    &n) < 12) {
     487           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     488           0 :                 return -1;
     489             :         }
     490             : 
     491       30944 :         if (strcmp(type, "wkba") == 0)
     492           0 :                 GDKwarning("type wkba (SQL name: GeometryA) is deprecated\n");
     493             : 
     494       30944 :         if (properties & ~0x0F81) {
     495           0 :                 TRC_CRITICAL(GDK, "unknown properties are set: incompatible database on line %d of BBP.dir\n", lineno);
     496           0 :                 return -1;
     497             :         }
     498             : #ifdef GDKLIBRARY_HASHASH
     499       30944 :         *hashash = var & 2;
     500             : #endif
     501       30944 :         var &= ~2;
     502       30944 :         if ((t = ATOMindex(type)) < 0) {
     503         241 :                 if ((t = ATOMunknown_find(type)) == 0) {
     504           0 :                         TRC_CRITICAL(GDK, "no space for atom %s", type);
     505           0 :                         return -1;
     506             :                 }
     507       38567 :         } else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) {
     508           0 :                 TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d on line %d\n", (int) b->batCacheid, lineno);
     509           0 :                 return -1;
     510       30703 :         } else if (var && t != 0 ?
     511        7864 :                    ATOMsize(t) < width ||
     512        7864 :                    (width != 1 && width != 2 && width != 4
     513             : #if SIZEOF_VAR_T == 8
     514          54 :                     && width != 8
     515             : #endif
     516             :                            ) :
     517       22839 :                    ATOMsize(t) != width) {
     518           0 :                 TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tsize mismatch for BAT %d on line %d\n", (int) b->batCacheid, lineno);
     519           0 :                 return -1;
     520             :         }
     521       30944 :         b->ttype = t;
     522       30944 :         b->twidth = width;
     523       30944 :         b->tshift = ATOMelmshift(width);
     524       30944 :         assert_shift_width(b->tshift,b->twidth);
     525       30944 :         b->tnokey[0] = (BUN) nokey0;
     526       30944 :         b->tnokey[1] = (BUN) nokey1;
     527       30944 :         b->tsorted = (bit) ((properties & 0x0001) != 0);
     528       30944 :         b->trevsorted = (bit) ((properties & 0x0080) != 0);
     529       30944 :         b->tkey = (properties & 0x0100) != 0;
     530       30944 :         b->tnonil = (properties & 0x0400) != 0;
     531       30944 :         b->tnil = (properties & 0x0800) != 0;
     532       30944 :         b->tnosorted = (BUN) nosorted;
     533       30944 :         b->tnorevsorted = (BUN) norevsorted;
     534       30944 :         b->tunique_est = 0.0;
     535             :         /* (properties & 0x0200) is the old tdense flag */
     536       30944 :         b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base;
     537       30944 :         b->theap->free = (size_t) free;
     538       30944 :         b->theap->hasfile = free > 0;
     539             :         /* set heap size to match capacity */
     540       30944 :         if (b->ttype == TYPE_msk) {
     541             :                 /* round up capacity to multiple of 32 */
     542        4716 :                 b->batCapacity = (b->batCapacity + 31) & ~((BUN) 31);
     543        4716 :                 b->theap->size = b->batCapacity / 8;
     544             :         } else {
     545       26228 :                 b->theap->size = (size_t) b->batCapacity << b->tshift;
     546             :         }
     547       30944 :         b->theap->base = NULL;
     548       30944 :         settailname(b->theap, filename, t, width);
     549       30944 :         b->theap->storage = STORE_INVALID;
     550       30944 :         b->theap->newstorage = STORE_INVALID;
     551       30944 :         b->theap->farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap);
     552       30944 :         b->theap->dirty = false;
     553       30944 :         b->theap->parentid = b->batCacheid;
     554       30944 :         if (minpos < b->batCount)
     555       11738 :                 b->tminpos = (BUN) minpos;
     556             :         else
     557       19206 :                 b->tminpos = BUN_NONE;
     558       30944 :         if (maxpos < b->batCount)
     559       11773 :                 b->tmaxpos = (BUN) maxpos;
     560             :         else
     561       19171 :                 b->tmaxpos = BUN_NONE;
     562       30944 :         if (t && var) {
     563        8078 :                 t = vheapinit(b, buf + n, bbpversion, filename, lineno);
     564        8078 :                 if (t < 0)
     565             :                         return t;
     566        8078 :                 n += t;
     567             :         } else {
     568       22866 :                 b->tvheap = NULL;
     569             :         }
     570       30944 :         return n;
     571             : }
     572             : 
     573             : /* read a single line from the BBP.dir file (file pointer fp) and fill
     574             :  * in the structure pointed to by bn and extra information through the
     575             :  * other pointers; this function does not allocate any memory; return 0
     576             :  * on end of file, 1 on success, and -1 on failure */
     577             : /* set to true during initialization, else always false; if false, do
     578             :  * not return any options (set pointer to NULL as if there aren't any);
     579             :  * if true and there are options, return them in freshly allocated
     580             :  * memory through *options */
     581             : static bool return_options = false;
     582             : int
     583       31284 : BBPreadBBPline(FILE *fp, unsigned bbpversion, int *lineno, BAT *bn,
     584             : #ifdef GDKLIBRARY_HASHASH
     585             :                int *hashash,
     586             : #endif
     587             :                char *batname, char *filename, char **options)
     588             : {
     589       31284 :         char buf[4096];
     590       31284 :         uint64_t batid;
     591       31284 :         unsigned int status;
     592       31284 :         unsigned int properties;
     593       31284 :         int nread, n;
     594       31284 :         char *s;
     595       31284 :         uint64_t count, capacity = 0, base = 0;
     596             : 
     597       31284 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
     598         340 :                 if (ferror(fp)) {
     599           0 :                         TRC_CRITICAL(GDK, "error reading BBP.dir on line %d\n", *lineno);
     600           0 :                         return -1;
     601             :                 }
     602             :                 return 0;       /* end of file */
     603             :         }
     604       30944 :         (*lineno)++;
     605       30944 :         if ((s = strpbrk(buf, "\r\n")) != NULL) {
     606       30944 :                 if (s[0] == '\r' && s[1] != '\n') {
     607           0 :                         TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
     608           0 :                         return -1;
     609             :                 }
     610             :                 /* zap the newline */
     611       30944 :                 *s = '\0';
     612             :         } else {
     613           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d: line too long\n", *lineno);
     614           0 :                 return -1;
     615             :         }
     616             : 
     617       61888 :         if (bbpversion <= GDKLIBRARY_HSIZE ?
     618           0 :             sscanf(buf,
     619             :                    "%" SCNu64 " %u %128s %23s %u %" SCNu64
     620             :                    " %" SCNu64 " %" SCNu64
     621             :                    "%n",
     622             :                    &batid, &status, batname, filename,
     623             :                    &properties, &count, &capacity, &base,
     624             :                    &nread) < 8 :
     625       30944 :             sscanf(buf,
     626             :                    "%" SCNu64 " %u %128s %23s %u %" SCNu64
     627             :                    " %" SCNu64
     628             :                    "%n",
     629             :                    &batid, &status, batname, filename,
     630             :                    &properties, &count, &base,
     631             :                    &nread) < 7) {
     632           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
     633           0 :                 return -1;
     634             :         }
     635             : 
     636       30944 :         if (batid >= N_BBPINIT * BBPINIT) {
     637           0 :                 TRC_CRITICAL(GDK, "bat ID (%" PRIu64 ") too large to accomodate (max %d), on line %d.", batid, N_BBPINIT * BBPINIT - 1, *lineno);
     638           0 :                 return -1;
     639             :         }
     640             : 
     641             :         /* convert both / and \ path separators to our own DIR_SEP */
     642             : #if DIR_SEP != '/'
     643             :         s = filename;
     644             :         while ((s = strchr(s, '/')) != NULL)
     645             :                 *s++ = DIR_SEP;
     646             : #endif
     647             : #if DIR_SEP != '\\'
     648             :         s = filename;
     649       30944 :         while ((s = strchr(s, '\\')) != NULL)
     650           0 :                 *s++ = DIR_SEP;
     651             : #endif
     652             : 
     653       30944 :         bn->batCacheid = (bat) batid;
     654       30944 :         bn->batTransient = false;
     655       30944 :         bn->batCopiedtodisk = true;
     656       30944 :         switch ((properties & 0x06) >> 1) {
     657         928 :         case 0:
     658         928 :                 bn->batRestricted = BAT_WRITE;
     659         928 :                 break;
     660       30016 :         case 1:
     661       30016 :                 bn->batRestricted = BAT_READ;
     662       30016 :                 break;
     663           0 :         case 2:
     664           0 :                 bn->batRestricted = BAT_APPEND;
     665           0 :                 break;
     666           0 :         default:
     667           0 :                 TRC_CRITICAL(GDK, "incorrect batRestricted value");
     668           0 :                 return -1;
     669             :         }
     670       30944 :         bn->batCount = (BUN) count;
     671       30944 :         bn->batInserted = bn->batCount;
     672             :         /* set capacity to at least count */
     673       30944 :         bn->batCapacity = (BUN) count <= BATTINY ? BATTINY : (BUN) count;
     674             : 
     675       30944 :         if (base > (uint64_t) GDK_oid_max) {
     676           0 :                 TRC_CRITICAL(GDK, "head seqbase out of range (ID = %" PRIu64 ", seq = %" PRIu64 ") on line %d.", batid, base, *lineno);
     677           0 :                 return -1;
     678             :         }
     679       30944 :         bn->hseqbase = (oid) base;
     680       30944 :         n = heapinit(bn, buf + nread,
     681             : #ifdef GDKLIBRARY_HASHASH
     682             :                      hashash,
     683             : #endif
     684             :                      bbpversion, filename, *lineno);
     685       30944 :         if (n < 0) {
     686             :                 return -1;
     687             :         }
     688       30944 :         nread += n;
     689             : 
     690       30944 :         if (nread >= (int) sizeof(buf) || (buf[nread] != '\0' && buf[nread] != ' ')) {
     691           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
     692           0 :                 return -1;
     693             :         }
     694       30944 :         if (options) {
     695       30944 :                 if (return_options && buf[nread] == ' ') {
     696           0 :                         if ((*options = GDKstrdup(buf + nread + 1)) == NULL) {
     697           0 :                                 TRC_CRITICAL(GDK, "GDKstrdup failed\n");
     698           0 :                                 return -1;
     699             :                         }
     700             :                 } else {
     701       30944 :                         *options = NULL;
     702             :                 }
     703             :         }
     704             :         return 1;
     705             : }
     706             : 
     707             : static gdk_return
     708         335 : BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno
     709             : #ifdef GDKLIBRARY_HASHASH
     710             :                , bat **hashbats, bat *nhashbats
     711             : #endif
     712             :         )
     713             : {
     714             : #ifdef GDKLIBRARY_HASHASH
     715         335 :         bat *hbats = NULL;
     716         335 :         bat nhbats = 0;
     717             : #endif
     718             : 
     719             :         /* read the BBP.dir and insert the BATs into the BBP */
     720         335 :         return_options = true;
     721         335 :         MT_lock_set(&BBPnameLock);
     722       29704 :         for (;;) {
     723       30039 :                 BAT b;
     724       30039 :                 Heap h;
     725       30039 :                 Heap vh;
     726       30039 :                 vh = h = (Heap) {
     727             :                         .free = 0,
     728             :                 };
     729       30039 :                 b = (BAT) {
     730             :                         .theap = &h,
     731             :                         .tvheap = &vh,
     732             :                 };
     733       30039 :                 char *options;
     734       30039 :                 char headname[129];
     735       30039 :                 char filename[sizeof(BBP_physical(0))];
     736       30039 :                 char logical[1024];
     737             : #ifdef GDKLIBRARY_HASHASH
     738       30039 :                 int Thashash;
     739             : #endif
     740             : 
     741       30039 :                 switch (BBPreadBBPline(fp, bbpversion, &lineno, &b,
     742             : #ifdef GDKLIBRARY_HASHASH
     743             :                                        &Thashash,
     744             : #endif
     745             :                                        headname, filename, &options)) {
     746         335 :                 case 0:
     747             :                         /* end of file */
     748             : #ifdef GDKLIBRARY_HASHASH
     749         335 :                         *hashbats = hbats;
     750         335 :                         *nhashbats = nhbats;
     751             : #endif
     752         335 :                         return_options = false;
     753         335 :                         MT_lock_unset(&BBPnameLock);
     754         335 :                         return GDK_SUCCEED;
     755             :                 case 1:
     756             :                         /* successfully read an entry */
     757       29704 :                         break;
     758           0 :                 default:
     759             :                         /* error */
     760           0 :                         goto bailout;
     761             :                 }
     762             : 
     763       29704 :                 if (b.batCacheid >= N_BBPINIT * BBPINIT) {
     764           0 :                         GDKfree(options);
     765           0 :                         TRC_CRITICAL(GDK, "bat ID (%d) too large to accommodate (max %d), on line %d.", b.batCacheid, N_BBPINIT * BBPINIT - 1, lineno);
     766           0 :                         goto bailout;
     767             :                 }
     768             : 
     769       29704 :                 if (b.batCacheid >= (bat) ATOMIC_GET(&BBPsize)) {
     770           0 :                         if ((bat) ATOMIC_GET(&BBPsize) + 1 >= BBPlimit &&
     771           0 :                             BBPextend(b.batCacheid + 1) != GDK_SUCCEED) {
     772           0 :                                 GDKfree(options);
     773           0 :                                 goto bailout;
     774             :                         }
     775           0 :                         ATOMIC_SET(&BBPsize, b.batCacheid + 1);
     776             :                 }
     777       29704 :                 if (BBP_desc(b.batCacheid) != NULL) {
     778           0 :                         GDKfree(options);
     779           0 :                         TRC_CRITICAL(GDK, "duplicate entry in BBP.dir (ID = "
     780             :                                      "%d) on line %d.", b.batCacheid, lineno);
     781           0 :                         goto bailout;
     782             :                 }
     783             : 
     784             : #ifdef GDKLIBRARY_HASHASH
     785       29704 :                 if (Thashash) {
     786           0 :                         assert(bbpversion <= GDKLIBRARY_HASHASH);
     787           0 :                         bat *sb = GDKrealloc(hbats, ++nhbats * sizeof(bat));
     788           0 :                         if (sb == NULL) {
     789           0 :                                 GDKfree(options);
     790           0 :                                 goto bailout;
     791             :                         }
     792           0 :                         hbats = sb;
     793           0 :                         hbats[nhbats - 1] = b.batCacheid;
     794             :                 }
     795             : #endif
     796             : 
     797       29704 :                 BAT *bn;
     798       29704 :                 Heap *hn;
     799       29704 :                 if ((bn = GDKmalloc(sizeof(BAT))) == NULL ||
     800       29704 :                     (hn = GDKmalloc(sizeof(Heap))) == NULL) {
     801           0 :                         GDKfree(bn);
     802           0 :                         GDKfree(options);
     803           0 :                         TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
     804           0 :                         goto bailout;
     805             :                 }
     806       29704 :                 *bn = b;
     807       29704 :                 *hn = h;
     808       29704 :                 bn->theap = hn;
     809       29704 :                 if (b.tvheap) {
     810        7753 :                         Heap *vhn;
     811        7753 :                         assert(b.tvheap == &vh);
     812        7753 :                         if ((vhn = GDKmalloc(sizeof(Heap))) == NULL) {
     813           0 :                                 GDKfree(hn);
     814           0 :                                 GDKfree(bn);
     815           0 :                                 GDKfree(options);
     816           0 :                                 TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
     817           0 :                                 goto bailout;
     818             :                         }
     819        7753 :                         *vhn = vh;
     820        7753 :                         bn->tvheap = vhn;
     821        7753 :                         ATOMIC_INIT(&bn->tvheap->refs, 1);
     822             :                 }
     823             : 
     824       29704 :                 char name[MT_NAME_LEN];
     825       29704 :                 snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
     826       29704 :                 MT_lock_init(&bn->theaplock, name);
     827       29704 :                 snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     828       29704 :                 MT_lock_init(&bn->batIdxLock, name);
     829       29704 :                 snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
     830       29704 :                 MT_rwlock_init(&bn->thashlock, name);
     831       29704 :                 ATOMIC_INIT(&bn->theap->refs, 1);
     832             : 
     833       29704 :                 if (snprintf(BBP_bak(b.batCacheid), sizeof(BBP_bak(b.batCacheid)), "tmp_%o", (unsigned) b.batCacheid) >= (int) sizeof(BBP_bak(b.batCacheid))) {
     834           0 :                         BATdestroy(bn);
     835           0 :                         GDKfree(options);
     836           0 :                         TRC_CRITICAL(GDK, "BBP logical filename directory is too large, on line %d\n", lineno);
     837           0 :                         goto bailout;
     838             :                 }
     839       29704 :                 char *s;
     840       29704 :                 if ((s = strchr(headname, '~')) != NULL && s == headname) {
     841             :                         /* sizeof(logical) > sizeof(BBP_bak(b.batCacheid)), so
     842             :                          * this fits */
     843           0 :                         strcpy(logical, BBP_bak(b.batCacheid));
     844             :                 } else {
     845           0 :                         if (s)
     846           0 :                                 *s = 0;
     847       29704 :                         strcpy_len(logical, headname, sizeof(logical));
     848             :                 }
     849       29704 :                 if (strcmp(logical, BBP_bak(b.batCacheid)) == 0) {
     850       28984 :                         BBP_logical(b.batCacheid) = BBP_bak(b.batCacheid);
     851             :                 } else {
     852         720 :                         BBP_logical(b.batCacheid) = GDKstrdup(logical);
     853         720 :                         if (BBP_logical(b.batCacheid) == NULL) {
     854           0 :                                 BATdestroy(bn);
     855           0 :                                 GDKfree(options);
     856           0 :                                 TRC_CRITICAL(GDK, "GDKstrdup failed\n");
     857           0 :                                 goto bailout;
     858             :                         }
     859             :                 }
     860       29704 :                 strcpy_len(BBP_physical(b.batCacheid), filename, sizeof(BBP_physical(b.batCacheid)));
     861             : #ifdef __COVERITY__
     862             :                 /* help coverity */
     863             :                 BBP_physical(b.batCacheid)[sizeof(BBP_physical(b.batCacheid)) - 1] = 0;
     864             : #endif
     865       29704 :                 BBP_options(b.batCacheid) = options;
     866       29704 :                 BBP_refs(b.batCacheid) = 0;
     867       29704 :                 BBP_lrefs(b.batCacheid) = 1;    /* any BAT we encounter here is persistent, so has a logical reference */
     868       29704 :                 BBP_desc(b.batCacheid) = bn;
     869       29704 :                 BBP_pid(b.batCacheid) = 0;
     870       29704 :                 BBP_status_set(b.batCacheid, BBPEXISTING);      /* do we need other status bits? */
     871       29704 :                 if (BBPnamecheck(BBP_logical(b.batCacheid)) == 0)
     872         720 :                         BBP_insert(b.batCacheid);
     873             :         }
     874             : 
     875           0 :   bailout:
     876           0 :         MT_lock_unset(&BBPnameLock);
     877           0 :         return_options = false;
     878             : #ifdef GDKLIBRARY_HASHASH
     879           0 :         GDKfree(hbats);
     880             : #endif
     881           0 :         return GDK_FAIL;
     882             : }
     883             : 
     884             : /* check that the necessary files for all BATs exist and are large
     885             :  * enough */
     886             : static gdk_return
     887         336 : BBPcheckbats(unsigned bbpversion)
     888             : {
     889         336 :         (void) bbpversion;
     890       68550 :         for (bat bid = 1, size = (bat) ATOMIC_GET(&BBPsize); bid < size; bid++) {
     891       68214 :                 struct stat statb;
     892       68214 :                 BAT *b;
     893       68214 :                 char *path;
     894             : 
     895       68214 :                 if ((b = BBP_desc(bid)) == NULL) {
     896             :                         /* not a valid BAT */
     897       38510 :                         continue;
     898             :                 }
     899       29704 :                 if (b->ttype == TYPE_void) {
     900             :                         /* no files needed */
     901           0 :                         continue;
     902             :                 }
     903       29704 :                 if (b->theap->free > 0) {
     904       19661 :                         path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
     905       19661 :                         if (path == NULL)
     906           0 :                                 return GDK_FAIL;
     907             :                         /* first check string offset heap with width,
     908             :                          * then without */
     909       19661 :                         if (MT_stat(path, &statb) < 0) {
     910             : #ifdef GDKLIBRARY_TAILN
     911           0 :                                 if (b->ttype == TYPE_str &&
     912           0 :                                     b->twidth < SIZEOF_VAR_T) {
     913           0 :                                         size_t taillen = strlen(path) - 1;
     914           0 :                                         char tailsave = path[taillen];
     915           0 :                                         path[taillen] = 0;
     916           0 :                                         if (MT_stat(path, &statb) < 0) {
     917           0 :                                                 GDKsyserror("cannot stat file %s%c or %s (expected size %zu)\n",
     918             :                                                             path, tailsave, path, b->theap->free);
     919           0 :                                                 GDKfree(path);
     920           0 :                                                 return GDK_FAIL;
     921             :                                         }
     922             :                                 } else
     923             : #endif
     924             :                                 {
     925           0 :                                         GDKsyserror("cannot stat file %s (expected size %zu)\n",
     926             :                                                     path, b->theap->free);
     927           0 :                                         GDKfree(path);
     928           0 :                                         return GDK_FAIL;
     929             :                                 }
     930             :                         }
     931       19661 :                         if ((size_t) statb.st_size < b->theap->free) {
     932           0 :                                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->theap->free, (size_t) statb.st_size);
     933           0 :                                 GDKfree(path);
     934           0 :                                 return GDK_FAIL;
     935             :                         }
     936       19661 :                         size_t hfree = b->theap->free;
     937       19661 :                         hfree = (hfree + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     938       19661 :                         if (hfree == 0)
     939           0 :                                 hfree = GDK_mmap_pagesize;
     940       19661 :                         if (statb.st_size > (off_t) hfree) {
     941           9 :                                 int fd;
     942           9 :                                 if ((fd = MT_open(path, O_RDWR | O_CLOEXEC | O_BINARY)) >= 0) {
     943           9 :                                         if (ftruncate(fd, hfree) == -1)
     944           0 :                                                 perror("ftruncate");
     945           9 :                                         (void) close(fd);
     946             :                                 }
     947             :                         }
     948       19661 :                         GDKfree(path);
     949             :                 }
     950       29704 :                 if (b->tvheap != NULL && b->tvheap->free > 0) {
     951        5537 :                         path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap");
     952        5537 :                         if (path == NULL)
     953             :                                 return GDK_FAIL;
     954        5537 :                         if (MT_stat(path, &statb) < 0) {
     955           0 :                                 GDKsyserror("cannot stat file %s\n",
     956             :                                             path);
     957           0 :                                 GDKfree(path);
     958           0 :                                 return GDK_FAIL;
     959             :                         }
     960        5537 :                         if ((size_t) statb.st_size < b->tvheap->free) {
     961           0 :                                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->tvheap->free, (size_t) statb.st_size);
     962           0 :                                 GDKfree(path);
     963           0 :                                 return GDK_FAIL;
     964             :                         }
     965        5537 :                         size_t hfree = b->tvheap->free;
     966        5537 :                         hfree = (hfree + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     967        5537 :                         if (hfree == 0)
     968           0 :                                 hfree = GDK_mmap_pagesize;
     969        5537 :                         if (statb.st_size > (off_t) hfree) {
     970           9 :                                 int fd;
     971           9 :                                 if ((fd = MT_open(path, O_RDWR | O_CLOEXEC | O_BINARY)) >= 0) {
     972           9 :                                         if (ftruncate(fd, hfree) == -1)
     973           0 :                                                 perror("ftruncate");
     974           9 :                                         (void) close(fd);
     975             :                                 }
     976             :                         }
     977        5537 :                         GDKfree(path);
     978             :                 }
     979             :         }
     980             :         return GDK_SUCCEED;
     981             : }
     982             : 
     983             : #ifdef HAVE_HGE
     984             : #define SIZEOF_MAX_INT SIZEOF_HGE
     985             : #else
     986             : #define SIZEOF_MAX_INT SIZEOF_LNG
     987             : #endif
     988             : 
     989             : unsigned
     990         340 : BBPheader(FILE *fp, int *lineno, bat *bbpsize, lng *logno, lng *transid, bool allow_hge_upgrade)
     991             : {
     992         340 :         char buf[BUFSIZ];
     993         340 :         int sz, ptrsize, oidsize, intsize;
     994         340 :         unsigned bbpversion;
     995             : 
     996         340 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
     997           0 :                 TRC_CRITICAL(GDK, "BBP.dir is empty");
     998           0 :                 return 0;
     999             :         }
    1000         340 :         ++*lineno;
    1001         340 :         if (sscanf(buf, "BBP.dir, GDKversion %u\n", &bbpversion) != 1) {
    1002           0 :                 GDKerror("old BBP without version number; "
    1003             :                          "dump the database using a compatible version, "
    1004             :                          "then restore into new database using this version.\n");
    1005           0 :                 return 0;
    1006             :         }
    1007         340 :         if (bbpversion != GDKLIBRARY &&
    1008             :             bbpversion != GDKLIBRARY_JSON &&
    1009             :             bbpversion != GDKLIBRARY_HSIZE &&
    1010             :             bbpversion != GDKLIBRARY_HASHASH &&
    1011         340 :             bbpversion != GDKLIBRARY_TAILN &&
    1012             :             bbpversion != GDKLIBRARY_MINMAX_POS) {
    1013           0 :                 TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got 0%o. "
    1014             :                              "This database was probably created by a %s version of MonetDB.",
    1015             :                              GDKLIBRARY, bbpversion,
    1016             :                              bbpversion > GDKLIBRARY ? "newer" : "too old");
    1017           0 :                 return 0;
    1018             :         }
    1019         340 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
    1020           0 :                 TRC_CRITICAL(GDK, "short BBP");
    1021           0 :                 return 0;
    1022             :         }
    1023         340 :         ++*lineno;
    1024         340 :         if (sscanf(buf, "%d %d %d", &ptrsize, &oidsize, &intsize) != 3) {
    1025           0 :                 TRC_CRITICAL(GDK, "BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing on line %d", *lineno);
    1026           0 :                 return 0;
    1027             :         }
    1028         340 :         if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) {
    1029           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
    1030             :                              "expected pointer size %d, got %d, expected OID size %d, got %d.",
    1031             :                              SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize);
    1032           0 :                 return 0;
    1033             :         }
    1034         340 :         if (intsize > SIZEOF_MAX_INT) {
    1035           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
    1036             :                              "expected max. integer size %d, got %d.",
    1037             :                              SIZEOF_MAX_INT, intsize);
    1038           0 :                 return 0;
    1039             :         }
    1040         340 :         if (intsize < SIZEOF_MAX_INT && !allow_hge_upgrade) {
    1041           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
    1042             :                              "expected max. integer size %d, got %d; "
    1043             :                              "use --set allow_hge_upgrade=yes to upgrade.",
    1044             :                              SIZEOF_MAX_INT, intsize);
    1045           0 :                 return 0;
    1046             :         }
    1047         340 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
    1048           0 :                 TRC_CRITICAL(GDK, "short BBP");
    1049           0 :                 return 0;
    1050             :         }
    1051         340 :         ++*lineno;
    1052         340 :         if (sscanf(buf, "BBPsize=%d", &sz) != 1) {
    1053           0 :                 TRC_CRITICAL(GDK, "no BBPsize value found\n");
    1054           0 :                 return 0;
    1055             :         }
    1056         340 :         if (sz > *bbpsize)
    1057         117 :                 *bbpsize = sz;
    1058         340 :         if (bbpversion > GDKLIBRARY_MINMAX_POS) {
    1059         340 :                 if (fgets(buf, sizeof(buf), fp) == NULL) {
    1060           0 :                         TRC_CRITICAL(GDK, "short BBP");
    1061           0 :                         return 0;
    1062             :                 }
    1063         340 :                 if (sscanf(buf, "BBPinfo=" LLSCN " " LLSCN, logno, transid) != 2) {
    1064           0 :                         TRC_CRITICAL(GDK, "no info value found\n");
    1065           0 :                         return 0;
    1066             :                 }
    1067             :         } else {
    1068           0 :                 *logno = *transid = 0;
    1069             :         }
    1070         340 :         return bbpversion;
    1071             : }
    1072             : 
    1073             : bool
    1074    66994625 : GDKinmemory(int farmid)
    1075             : {
    1076    66994625 :         if (farmid == NOFARM)
    1077             :                 farmid = 0;
    1078    65891305 :         assert(farmid >= 0 && farmid < MAXFARMS);
    1079    66994625 :         return BBPfarms[farmid].dirname == NULL;
    1080             : }
    1081             : 
    1082             : /* all errors are fatal */
    1083             : gdk_return
    1084        1002 : BBPaddfarm(const char *dirname, uint32_t rolemask, bool logerror)
    1085             : {
    1086        1002 :         struct stat st;
    1087        1002 :         int i;
    1088             : 
    1089        1002 :         if (dirname == NULL) {
    1090           1 :                 assert(BBPfarms[0].dirname == NULL);
    1091           1 :                 assert(rolemask & 1);
    1092           1 :                 assert(BBPfarms[0].roles == 0);
    1093           1 :                 BBPfarms[0].roles = rolemask;
    1094           1 :                 return GDK_SUCCEED;
    1095             :         }
    1096        1001 :         if (strchr(dirname, '\n') != NULL) {
    1097           0 :                 if (logerror)
    1098           0 :                         GDKerror("no newline allowed in directory name\n");
    1099           0 :                 return GDK_FAIL;
    1100             :         }
    1101        1001 :         if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].roles != 0)) {
    1102           0 :                 if (logerror)
    1103           0 :                         GDKerror("bad rolemask\n");
    1104           0 :                 return GDK_FAIL;
    1105             :         }
    1106        1001 :         if (strcmp(dirname, "in-memory") == 0 ||
    1107        1000 :             /* backward compatibility: */ strcmp(dirname, ":memory:") == 0) {
    1108             :                 dirname = NULL;
    1109        1000 :         } else if (MT_mkdir(dirname) < 0) {
    1110         913 :                 if (errno == EEXIST) {
    1111         913 :                         if (MT_stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) {
    1112           0 :                                 if (logerror)
    1113           0 :                                         GDKerror("%s: not a directory\n", dirname);
    1114           0 :                                 return GDK_FAIL;
    1115             :                         }
    1116             :                 } else {
    1117           0 :                         if (logerror)
    1118           0 :                                 GDKsyserror("%s: cannot create directory\n", dirname);
    1119           0 :                         return GDK_FAIL;
    1120             :                 }
    1121             :         }
    1122        1991 :         for (i = 0; i < MAXFARMS; i++) {
    1123        1991 :                 if (BBPfarms[i].roles == 0) {
    1124        1001 :                         if (dirname) {
    1125        1000 :                                 BBPfarms[i].dirname = GDKstrdup(dirname);
    1126        1000 :                                 if (BBPfarms[i].dirname == NULL)
    1127             :                                         return GDK_FAIL;
    1128             :                         }
    1129        1001 :                         BBPfarms[i].roles = rolemask;
    1130        1001 :                         if ((rolemask & 1) == 0 && dirname != NULL) {
    1131             :                                 char *bbpdir;
    1132             :                                 int j;
    1133             : 
    1134        1009 :                                 for (j = 0; j < i; j++)
    1135         836 :                                         if (BBPfarms[j].dirname != NULL &&
    1136         836 :                                             strcmp(BBPfarms[i].dirname,
    1137             :                                                    BBPfarms[j].dirname) == 0)
    1138             :                                                 return GDK_SUCCEED;
    1139             :                                 /* if an extra farm, make sure we
    1140             :                                  * don't find a BBP.dir there that
    1141             :                                  * might belong to an existing
    1142             :                                  * database */
    1143         173 :                                 bbpdir = GDKfilepath(i, BATDIR, "BBP", "dir");
    1144         173 :                                 if (bbpdir == NULL) {
    1145             :                                         return GDK_FAIL;
    1146             :                                 }
    1147         173 :                                 if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
    1148           0 :                                         GDKfree(bbpdir);
    1149           0 :                                         if (logerror)
    1150           0 :                                                 GDKerror("%s is a database\n", dirname);
    1151           0 :                                         return GDK_FAIL;
    1152             :                                 }
    1153         173 :                                 GDKfree(bbpdir);
    1154         173 :                                 bbpdir = GDKfilepath(i, BAKDIR, "BBP", "dir");
    1155         173 :                                 if (bbpdir == NULL) {
    1156             :                                         return GDK_FAIL;
    1157             :                                 }
    1158         173 :                                 if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
    1159           0 :                                         GDKfree(bbpdir);
    1160           0 :                                         if (logerror)
    1161           0 :                                                 GDKerror("%s is a database\n", dirname);
    1162           0 :                                         return GDK_FAIL;
    1163             :                                 }
    1164         173 :                                 GDKfree(bbpdir);
    1165             :                         }
    1166         511 :                         return GDK_SUCCEED;
    1167             :                 }
    1168             :         }
    1169           0 :         if (logerror)
    1170           0 :                 GDKerror("too many farms\n");
    1171             :         return GDK_FAIL;
    1172             : }
    1173             : 
    1174             : gdk_return
    1175         338 : BBPchkfarms(void)
    1176             : {
    1177         338 :         const char *dir = NULL;
    1178         338 :         uint32_t rolemask = 0;
    1179         338 :         if ((BBPfarms[0].roles & 1) == 0) {
    1180           0 :                 GDKerror("Must call BBPaddfarms at least once for persistent data\n");
    1181           0 :                 return GDK_FAIL;
    1182             :         }
    1183       11154 :         for (int i = 0; i < MAXFARMS; i++) {
    1184       10816 :                 if (BBPfarms[i].roles != 0) {
    1185         664 :                         dir = BBPfarms[i].dirname;
    1186         664 :                         rolemask |= BBPfarms[i].roles;
    1187             :                 }
    1188             :         }
    1189         338 :         if (dir == NULL)
    1190           1 :                 dir = "in-memory";
    1191         338 :         if ((rolemask & (1U << TRANSIENT)) == 0) {
    1192           0 :                 gdk_return rc = BBPaddfarm(dir, 1U << TRANSIENT, true);
    1193           0 :                 if (rc != GDK_SUCCEED)
    1194             :                         return rc;
    1195             :         }
    1196         338 :         if ((rolemask & (1U << SYSTRANS)) == 0) {
    1197         338 :                 gdk_return rc = BBPaddfarm(dir, 1U << SYSTRANS, true);
    1198         338 :                 if (rc != GDK_SUCCEED)
    1199             :                         return rc;
    1200             :         }
    1201             :         return GDK_SUCCEED;
    1202             : }
    1203             : 
    1204             : #ifdef GDKLIBRARY_HASHASH
    1205             : static gdk_return
    1206           0 : fixhashashbat(BAT *b)
    1207             : {
    1208           0 :         const char *nme = BBP_physical(b->batCacheid);
    1209           0 :         char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
    1210           0 :         if (srcdir == NULL) {
    1211           0 :                 TRC_CRITICAL(GDK, "GDKfilepath failed\n");
    1212           0 :                 return GDK_FAIL;
    1213             :         }
    1214           0 :         char *s;
    1215           0 :         if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
    1216           0 :                 *s = 0;
    1217           0 :         const char *bnme;
    1218           0 :         if ((bnme = strrchr(nme, DIR_SEP)) != NULL)
    1219           0 :                 bnme++;
    1220             :         else
    1221             :                 bnme = nme;
    1222           0 :         long_str filename;
    1223           0 :         snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
    1224             : 
    1225             :         /* we don't maintain index structures */
    1226           0 :         HASHdestroy(b);
    1227           0 :         IMPSdestroy(b);
    1228           0 :         OIDXdestroy(b);
    1229           0 :         PROPdestroy(b);
    1230           0 :         STRMPdestroy(b);
    1231           0 :         RTREEdestroy(b);
    1232             : 
    1233             :         /* make backup of heaps */
    1234           0 :         const char *t;
    1235           0 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "tail1",
    1236             :                     BAKDIR, bnme, "tail1", false) == GDK_SUCCEED)
    1237             :                 t = "tail1";
    1238           0 :         else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail2",
    1239             :                          BAKDIR, bnme, "tail2", false) == GDK_SUCCEED)
    1240             :                 t = "tail2";
    1241             : #if SIZEOF_VAR_T == 8
    1242           0 :         else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail4",
    1243             :                          BAKDIR, bnme, "tail4", false) == GDK_SUCCEED)
    1244             :                 t = "tail4";
    1245             : #endif
    1246           0 :         else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
    1247             :                          BAKDIR, bnme, "tail", true) == GDK_SUCCEED)
    1248             :                 t = "tail";
    1249             :         else {
    1250           0 :                 GDKfree(srcdir);
    1251           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
    1252           0 :                 return GDK_FAIL;
    1253             :         }
    1254           0 :         GDKclrerr();
    1255           0 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
    1256             :                     BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
    1257           0 :                 GDKfree(srcdir);
    1258           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
    1259           0 :                 return GDK_FAIL;
    1260             :         }
    1261             :         /* load old heaps */
    1262           0 :         Heap h1 = *b->theap; /* old heap */
    1263           0 :         h1.base = NULL;
    1264           0 :         h1.dirty = false;
    1265           0 :         strconcat_len(h1.filename, sizeof(h1.filename), filename, ".", t, NULL);
    1266           0 :         if (HEAPload(&h1, filename, t, false) != GDK_SUCCEED) {
    1267           0 :                 GDKfree(srcdir);
    1268           0 :                 TRC_CRITICAL(GDK, "loading old tail heap "
    1269             :                              "for BAT %d failed\n", b->batCacheid);
    1270           0 :                 return GDK_FAIL;
    1271             :         }
    1272           0 :         Heap vh1 = *b->tvheap;       /* old heap */
    1273           0 :         vh1.base = NULL;
    1274           0 :         vh1.dirty = false;
    1275           0 :         strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL);
    1276           0 :         if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
    1277           0 :                 GDKfree(srcdir);
    1278           0 :                 HEAPfree(&h1, false);
    1279           0 :                 TRC_CRITICAL(GDK, "loading old string heap "
    1280             :                              "for BAT %d failed\n", b->batCacheid);
    1281           0 :                 return GDK_FAIL;
    1282             :         }
    1283             : 
    1284             :         /* create new heaps */
    1285           0 :         Heap *h2 = GDKmalloc(sizeof(Heap));
    1286           0 :         Heap *vh2 = GDKmalloc(sizeof(Heap));
    1287           0 :         if (h2 == NULL || vh2 == NULL) {
    1288           0 :                 GDKfree(h2);
    1289           0 :                 GDKfree(vh2);
    1290           0 :                 GDKfree(srcdir);
    1291           0 :                 HEAPfree(&h1, false);
    1292           0 :                 HEAPfree(&vh1, false);
    1293           0 :                 TRC_CRITICAL(GDK, "allocating new heaps "
    1294             :                              "for BAT %d failed\n", b->batCacheid);
    1295           0 :                 return GDK_FAIL;
    1296             :         }
    1297           0 :         *h2 = *b->theap;
    1298           0 :         h2->base = NULL;
    1299           0 :         if (HEAPalloc(h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
    1300           0 :                 GDKfree(h2);
    1301           0 :                 GDKfree(vh2);
    1302           0 :                 GDKfree(srcdir);
    1303           0 :                 HEAPfree(&h1, false);
    1304           0 :                 HEAPfree(&vh1, false);
    1305           0 :                 TRC_CRITICAL(GDK, "allocating new tail heap "
    1306             :                              "for BAT %d failed\n", b->batCacheid);
    1307           0 :                 return GDK_FAIL;
    1308             :         }
    1309           0 :         h2->dirty = true;
    1310           0 :         h2->free = h1.free;
    1311             : 
    1312           0 :         *vh2 = *b->tvheap;
    1313           0 :         strconcat_len(vh2->filename, sizeof(vh2->filename), nme, ".theap", NULL);
    1314           0 :         strHeap(vh2, b->batCapacity);
    1315           0 :         if (vh2->base == NULL) {
    1316           0 :                 GDKfree(srcdir);
    1317           0 :                 HEAPfree(&h1, false);
    1318           0 :                 HEAPfree(&vh1, false);
    1319           0 :                 HEAPfree(h2, false);
    1320           0 :                 GDKfree(h2);
    1321           0 :                 GDKfree(vh2);
    1322           0 :                 TRC_CRITICAL(GDK, "allocating new string heap "
    1323             :                              "for BAT %d failed\n", b->batCacheid);
    1324           0 :                 return GDK_FAIL;
    1325             :         }
    1326           0 :         vh2->dirty = true;
    1327           0 :         ATOMIC_INIT(&h2->refs, 1);
    1328           0 :         ATOMIC_INIT(&vh2->refs, 1);
    1329           0 :         Heap *ovh = b->tvheap;
    1330           0 :         b->tvheap = vh2;
    1331           0 :         vh2 = NULL;             /* no longer needed */
    1332           0 :         for (BUN i = 0; i < b->batCount; i++) {
    1333           0 :                 var_t o;
    1334           0 :                 switch (b->twidth) {
    1335           0 :                 case 1:
    1336           0 :                         o = (var_t) ((uint8_t *) h1.base)[i] + GDK_VAROFFSET;
    1337           0 :                         break;
    1338           0 :                 case 2:
    1339           0 :                         o = (var_t) ((uint16_t *) h1.base)[i] + GDK_VAROFFSET;
    1340           0 :                         break;
    1341             : #if SIZEOF_VAR_T == 8
    1342           0 :                 case 4:
    1343           0 :                         o = (var_t) ((uint32_t *) h1.base)[i];
    1344           0 :                         break;
    1345             : #endif
    1346           0 :                 default:
    1347           0 :                         o = ((var_t *) h1.base)[i];
    1348           0 :                         break;
    1349             :                 }
    1350           0 :                 const char *s = vh1.base + o;
    1351           0 :                 var_t no = strPut(b, &o, s);
    1352           0 :                 if (no == 0) {
    1353           0 :                         HEAPfree(&h1, false);
    1354           0 :                         HEAPfree(&vh1, false);
    1355           0 :                         HEAPdecref(h2, false);
    1356           0 :                         HEAPdecref(b->tvheap, false);
    1357           0 :                         b->tvheap = ovh;
    1358           0 :                         GDKfree(srcdir);
    1359           0 :                         TRC_CRITICAL(GDK, "storing string value "
    1360             :                                      "for BAT %d failed\n", b->batCacheid);
    1361           0 :                         return GDK_FAIL;
    1362             :                 }
    1363           0 :                 assert(no >= GDK_VAROFFSET);
    1364           0 :                 switch (b->twidth) {
    1365           0 :                 case 1:
    1366           0 :                         no -= GDK_VAROFFSET;
    1367           0 :                         assert(no <= 0xFF);
    1368           0 :                         ((uint8_t *) h2->base)[i] = (uint8_t) no;
    1369           0 :                         break;
    1370           0 :                 case 2:
    1371           0 :                         no -= GDK_VAROFFSET;
    1372           0 :                         assert(no <= 0xFFFF);
    1373           0 :                         ((uint16_t *) h2->base)[i] = (uint16_t) no;
    1374           0 :                         break;
    1375             : #if SIZEOF_VAR_T == 8
    1376           0 :                 case 4:
    1377           0 :                         assert(no <= 0xFFFFFFFF);
    1378           0 :                         ((uint32_t *) h2->base)[i] = (uint32_t) no;
    1379           0 :                         break;
    1380             : #endif
    1381           0 :                 default:
    1382           0 :                         ((var_t *) h2->base)[i] = no;
    1383           0 :                         break;
    1384             :                 }
    1385             :         }
    1386             : 
    1387             :         /* cleanup */
    1388           0 :         HEAPfree(&h1, false);
    1389           0 :         HEAPfree(&vh1, false);
    1390           0 :         if (HEAPsave(h2, nme, BATtailname(b), true, h2->free, NULL) != GDK_SUCCEED) {
    1391           0 :                 HEAPdecref(h2, false);
    1392           0 :                 HEAPdecref(b->tvheap, false);
    1393           0 :                 b->tvheap = ovh;
    1394           0 :                 GDKfree(srcdir);
    1395           0 :                 TRC_CRITICAL(GDK, "saving heap failed\n");
    1396           0 :                 return GDK_FAIL;
    1397             :         }
    1398           0 :         if (HEAPsave(b->tvheap, nme, "theap", true, b->tvheap->free, &b->theaplock) != GDK_SUCCEED) {
    1399           0 :                 HEAPfree(b->tvheap, false);
    1400           0 :                 b->tvheap = ovh;
    1401           0 :                 GDKfree(srcdir);
    1402           0 :                 TRC_CRITICAL(GDK, "saving string heap failed\n");
    1403           0 :                 return GDK_FAIL;
    1404             :         }
    1405           0 :         HEAPdecref(b->theap, false);
    1406           0 :         b->theap = h2;
    1407           0 :         HEAPfree(h2, false);
    1408           0 :         HEAPdecref(ovh, false);
    1409           0 :         HEAPfree(b->tvheap, false);
    1410           0 :         GDKfree(srcdir);
    1411           0 :         return GDK_SUCCEED;
    1412             : }
    1413             : 
    1414             : static gdk_return
    1415           0 : fixhashash(bat *hashbats, bat nhashbats)
    1416             : {
    1417           0 :         for (bat i = 0; i < nhashbats; i++) {
    1418           0 :                 bat bid = hashbats[i];
    1419           0 :                 BAT *b;
    1420           0 :                 if ((b = BBP_desc(bid)) == NULL) {
    1421             :                         /* not a valid BAT (shouldn't happen) */
    1422           0 :                         continue;
    1423             :                 }
    1424           0 :                 if (fixhashashbat(b) != GDK_SUCCEED)
    1425             :                         return GDK_FAIL;
    1426             :         }
    1427             :         return GDK_SUCCEED;
    1428             : }
    1429             : #endif
    1430             : 
    1431             : #ifdef GDKLIBRARY_TAILN
    1432             : static gdk_return
    1433           0 : movestrbats(void)
    1434             : {
    1435           0 :         for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++) {
    1436           0 :                 BAT *b = BBP_desc(bid);
    1437           0 :                 if (b == NULL) {
    1438             :                         /* not a valid BAT */
    1439           0 :                         continue;
    1440             :                 }
    1441           0 :                 if (b->ttype != TYPE_str || b->twidth == SIZEOF_VAR_T || b->batCount == 0)
    1442           0 :                         continue;
    1443           0 :                 char *oldpath = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail");
    1444           0 :                 char *newpath = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
    1445           0 :                 int ret = -1;
    1446           0 :                 if (oldpath != NULL && newpath != NULL) {
    1447           0 :                         struct stat oldst, newst;
    1448           0 :                         bool oldexist = MT_stat(oldpath, &oldst) == 0;
    1449           0 :                         bool newexist = MT_stat(newpath, &newst) == 0;
    1450           0 :                         if (newexist) {
    1451           0 :                                 if (oldexist) {
    1452           0 :                                         if (oldst.st_mtime > newst.st_mtime) {
    1453           0 :                                                 GDKerror("both %s and %s exist with %s unexpectedly newer: manual intervention required\n", oldpath, newpath, oldpath);
    1454           0 :                                                 ret = -1;
    1455             :                                         } else {
    1456           0 :                                                 GDKwarning("both %s and %s exist, removing %s\n", oldpath, newpath, oldpath);
    1457           0 :                                                 ret = MT_remove(oldpath);
    1458             :                                         }
    1459             :                                 } else {
    1460             :                                         /* already good */
    1461             :                                         ret = 0;
    1462             :                                 }
    1463           0 :                         } else if (oldexist) {
    1464           0 :                                 TRC_DEBUG(IO_, "rename %s to %s\n", oldpath, newpath);
    1465           0 :                                 ret = MT_rename(oldpath, newpath);
    1466             :                         } else {
    1467             :                                 /* neither file exists: may be ok, but
    1468             :                                  * will be checked later */
    1469             :                                 ret = 0;
    1470             :                         }
    1471             :                 }
    1472           0 :                 GDKfree(oldpath);
    1473           0 :                 GDKfree(newpath);
    1474           0 :                 if (ret == -1)
    1475             :                         return GDK_FAIL;
    1476             :         }
    1477             :         return GDK_SUCCEED;
    1478             : }
    1479             : #endif
    1480             : 
    1481             : #ifdef GDKLIBRARY_JSON
    1482             : static gdk_return
    1483           2 : jsonupgradebat(BAT *b, json_storage_conversion fixJSONStorage)
    1484             : {
    1485           2 :         const char *nme = BBP_physical(b->batCacheid);
    1486           2 :         char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
    1487             : 
    1488           2 :         if (srcdir == NULL) {
    1489           0 :                 TRC_CRITICAL(GDK, "GDKfilepath failed\n");
    1490           0 :                 return GDK_FAIL;
    1491             :         }
    1492             : 
    1493           2 :         char *s;
    1494           2 :         if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
    1495           2 :                 *s = 0;
    1496           2 :         const char *bnme;
    1497           2 :         if ((bnme = strrchr(nme, DIR_SEP)) != NULL) {
    1498           2 :                 bnme++;
    1499             :         } else {
    1500             :                 bnme = nme;
    1501             :         }
    1502             : 
    1503           2 :         long_str filename;
    1504           2 :         snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
    1505             : 
    1506             :         /* A json column should not normally have any index structures */
    1507           2 :         HASHdestroy(b);
    1508           2 :         IMPSdestroy(b);
    1509           2 :         OIDXdestroy(b);
    1510           2 :         PROPdestroy(b);
    1511           2 :         STRMPdestroy(b);
    1512           2 :         RTREEdestroy(b);
    1513             : 
    1514             :         /* backup the current heaps */
    1515           2 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
    1516             :                     BAKDIR, bnme, "tail", false) != GDK_SUCCEED) {
    1517           0 :                 GDKfree(srcdir);
    1518           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
    1519           0 :                 return GDK_FAIL;
    1520             :         }
    1521           2 :         if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
    1522             :                     BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
    1523           0 :                 GDKfree(srcdir);
    1524           0 :                 TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
    1525           0 :                 return GDK_FAIL;
    1526             :         }
    1527             : 
    1528             :         /* load the old heaps */
    1529           2 :         Heap h1 = *b->theap;
    1530           2 :         h1.base = NULL;
    1531           2 :         h1.dirty = false;
    1532           2 :         strconcat_len(h1.filename, sizeof(h1.filename), filename, ".tail", NULL);
    1533           2 :         if (HEAPload(&h1, filename, "tail", false) != GDK_SUCCEED) {
    1534           0 :                 GDKfree(srcdir);
    1535           0 :                 TRC_CRITICAL(GDK, "loading old tail heap "
    1536             :                              "for BAT %d failed\n", b->batCacheid);
    1537           0 :                 return GDK_FAIL;
    1538             :         }
    1539             : 
    1540           2 :         Heap vh1 = *b->tvheap;
    1541           2 :         vh1.base = NULL;
    1542           2 :         vh1.dirty = false;
    1543           2 :         strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL);
    1544           2 :         if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
    1545           0 :                 GDKfree(srcdir);
    1546           0 :                 HEAPfree(&h1, false);
    1547           0 :                 TRC_CRITICAL(GDK, "loading old string heap "
    1548             :                              "for BAT %d failed\n", b->batCacheid);
    1549           0 :                 return GDK_FAIL;
    1550             :         }
    1551             : 
    1552             :         /* create the new heaps */
    1553           2 :         Heap *h2 = GDKmalloc(sizeof(Heap));
    1554           2 :         Heap *vh2 = GDKmalloc(sizeof(Heap));
    1555           2 :         if (h2 == NULL || vh2 == NULL) {
    1556           0 :                 GDKfree(h2);
    1557           0 :                 GDKfree(vh2);
    1558           0 :                 GDKfree(srcdir);
    1559           0 :                 HEAPfree(&h1, false);
    1560           0 :                 HEAPfree(&vh1, false);
    1561           0 :                 TRC_CRITICAL(GDK, "allocating new heaps "
    1562             :                              "for BAT %d failed\n", b->batCacheid);
    1563           0 :                 return GDK_FAIL;
    1564             :         }
    1565           2 :         *h2 = *b->theap;
    1566           2 :         h2->base = NULL;
    1567           2 :         if (HEAPalloc(h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
    1568           0 :                 GDKfree(h2);
    1569           0 :                 GDKfree(vh2);
    1570           0 :                 GDKfree(srcdir);
    1571           0 :                 HEAPfree(&h1, false);
    1572           0 :                 HEAPfree(&vh1, false);
    1573           0 :                 TRC_CRITICAL(GDK, "allocating new tail heap "
    1574             :                              "for BAT %d failed\n", b->batCacheid);
    1575           0 :                 return GDK_FAIL;
    1576             : 
    1577             :         }
    1578           2 :         h2->dirty = true;
    1579           2 :         h2->free = h1.free;
    1580             : 
    1581           2 :         *vh2 = *b->tvheap;
    1582           2 :         strconcat_len(vh2->filename, sizeof(vh2->filename), nme, ".theap", NULL);
    1583           2 :         strHeap(vh2, b->batCapacity);
    1584           2 :         if (vh2->base == NULL) {
    1585           0 :                 GDKfree(srcdir);
    1586           0 :                 HEAPfree(&h1, false);
    1587           0 :                 HEAPfree(&vh1, false);
    1588           0 :                 HEAPfree(h2, false);
    1589           0 :                 GDKfree(h2);
    1590           0 :                 GDKfree(vh2);
    1591           0 :                 TRC_CRITICAL(GDK, "allocating new string heap "
    1592             :                              "for BAT %d failed\n", b->batCacheid);
    1593           0 :                 return GDK_FAIL;
    1594             :         }
    1595           2 :         vh2->dirty = true;
    1596           2 :         ATOMIC_INIT(&h2->refs, 1);
    1597           2 :         ATOMIC_INIT(&vh2->refs, 1);
    1598           2 :         Heap *ovh = b->tvheap;
    1599           2 :         b->tvheap = vh2;
    1600           2 :         vh2 = NULL;
    1601             : 
    1602           6 :         for (BUN i = 0; i < b->batCount; i++) {
    1603           4 :                 var_t o = ((var_t *) h1.base)[i];
    1604           4 :                 const char *s = vh1.base + o;
    1605           4 :                 char *ns;
    1606           4 :                 if (fixJSONStorage(&ns, &s) != GDK_SUCCEED) {
    1607           0 :                         GDKfree(srcdir);
    1608           0 :                         HEAPfree(&h1, false);
    1609           0 :                         HEAPfree(&vh1, false);
    1610           0 :                         HEAPdecref(h2, false);
    1611           0 :                         HEAPdecref(b->tvheap, false);
    1612           0 :                         b->tvheap = ovh;
    1613           0 :                         TRC_CRITICAL(GDK, "converting value "
    1614             :                                      "in BAT %d failed\n", b->batCacheid);
    1615           0 :                         return GDK_FAIL;
    1616             :                 }
    1617           4 :                 var_t no = strPut(b, &o, ns);
    1618           4 :                 GDKfree(ns);
    1619           4 :                 if (no == 0) {
    1620           0 :                         GDKfree(srcdir);
    1621           0 :                         HEAPfree(&h1, false);
    1622           0 :                         HEAPfree(&vh1, false);
    1623           0 :                         HEAPdecref(h2, false);
    1624           0 :                         HEAPdecref(b->tvheap, false);
    1625           0 :                         b->tvheap = ovh;
    1626           0 :                         TRC_CRITICAL(GDK, "storing new value "
    1627             :                                      "in BAT %d failed\n", b->batCacheid);
    1628           0 :                         return GDK_FAIL;
    1629             : 
    1630             :                 }
    1631           4 :                 ((var_t *)h2->base)[i] = no;
    1632             :         }
    1633             : 
    1634             :         /* cleanup */
    1635           2 :         HEAPfree(&h1, false);
    1636           2 :         HEAPfree(&vh1, false);
    1637           2 :         if (HEAPsave(h2, nme, BATtailname(b), true, h2->free, NULL) !=
    1638             :             GDK_SUCCEED) {
    1639           0 :                 HEAPdecref(h2, false);
    1640           0 :                 HEAPdecref(b->tvheap, false);
    1641           0 :                 b->tvheap = ovh;
    1642           0 :                 GDKfree(srcdir);
    1643           0 :                 TRC_CRITICAL(GDK, "saving heap failed\n");
    1644           0 :                 return GDK_FAIL;
    1645             :         }
    1646             : 
    1647           2 :         if (HEAPsave(b->tvheap, nme, "theap", true, b->tvheap->free,
    1648             :                      &b->theaplock) != GDK_SUCCEED) {
    1649           0 :                 HEAPfree(b->tvheap, false);
    1650           0 :                 b->tvheap = ovh;
    1651           0 :                 GDKfree(srcdir);
    1652           0 :                 TRC_CRITICAL(GDK, "saving string failed\n");
    1653           0 :                 return GDK_FAIL;
    1654             :         }
    1655             : 
    1656           2 :         HEAPdecref(b->theap, false);
    1657           2 :         b->theap = h2;
    1658           2 :         HEAPfree(h2, false);
    1659           2 :         HEAPdecref(ovh, false);
    1660           2 :         HEAPfree(b->tvheap, false);
    1661           2 :         GDKfree(srcdir);
    1662             : 
    1663           2 :         return GDK_SUCCEED;
    1664             : }
    1665             : 
    1666             : gdk_return
    1667           8 : BBPjson_upgrade(json_storage_conversion fixJSONStorage)
    1668             : {
    1669           8 :         bat bid;
    1670           8 :         BAT *b;
    1671           8 :         int JSON_type = ATOMindex("json");
    1672           8 :         bat nbat = (bat) ATOMIC_GET(&BBPsize);
    1673           8 :         bat *upd = GDKmalloc(sizeof(bat) * (size_t) nbat);
    1674           8 :         int nupd = 0;
    1675             : 
    1676           8 :         if (upd == NULL) {
    1677           0 :                 TRC_CRITICAL(GDK, "could not create bat\n");
    1678           0 :                 return GDK_FAIL;
    1679             :         }
    1680           8 :         upd[nupd++] = 0;        /* first entry unused */
    1681             : 
    1682           8 :         BBPlock();
    1683             : 
    1684       10544 :         for (bid = 1; bid < nbat; bid++) {
    1685       10528 :                 if ((b = BBP_desc(bid)) == NULL) {
    1686             :                         /* not a valid BAT */
    1687        7954 :                         continue;
    1688             :                 }
    1689             : 
    1690        2574 :                 if (b->ttype < 0) {
    1691          56 :                         const char *nme;
    1692             : 
    1693          56 :                         nme = ATOMunknown_name(b->ttype);
    1694          56 :                         if (strcmp(nme, "json") != 0)
    1695          54 :                                 continue;
    1696        2518 :                 } else if (b->ttype != JSON_type) {
    1697        2518 :                         continue;
    1698             :                 }
    1699           2 :                 fprintf(stderr, "Upgrading json bat %d\n", bid);
    1700           2 :                 if (jsonupgradebat(b, fixJSONStorage) != GDK_SUCCEED) {
    1701           0 :                         BBPunlock();
    1702           0 :                         GDKfree(upd);
    1703           0 :                         return GDK_FAIL;
    1704             :                 }
    1705           2 :                 upd[nupd++] = bid;
    1706             :         }
    1707           8 :         BBPunlock();
    1708          10 :         if (nupd > 1 &&
    1709           2 :             TMsubcommit_list(upd, NULL, nupd, -1, -1) != GDK_SUCCEED) {
    1710           0 :                 TRC_CRITICAL(GDK, "failed to commit changes\n");
    1711           0 :                 GDKfree(upd);
    1712           0 :                 return GDK_FAIL;
    1713             :         }
    1714           8 :         GDKfree(upd);
    1715           8 :         return GDK_SUCCEED;
    1716             : }
    1717             : #endif
    1718             : 
    1719             : static bool
    1720         113 : BBPtrim(bool aggressive, bat nbat)
    1721             : {
    1722         113 :         int n = 0;
    1723         113 :         int waitctr = 0;
    1724         113 :         bool changed = false;
    1725         113 :         unsigned flag = BBPUNLOADING | BBPSYNCING | BBPSAVING;
    1726         113 :         if (!aggressive)
    1727         113 :                 flag |= BBPHOT;
    1728         113 :         lng t0 = GDKusec();
    1729      139823 :         for (bat bid = 1; bid < nbat && !GDKexiting(); bid++) {
    1730             :                 /* quick check to see if we might possibly have to do
    1731             :                  * work (includes free bats) */
    1732      139710 :                 if ((BBP_status(bid) & BBPLOADED) == 0)
    1733       37271 :                         continue;
    1734             :                 /* don't do this during a (sub)commit */
    1735      102439 :                 BBPtmlock();
    1736      102439 :                 MT_lock_set(&GDKswapLock(bid));
    1737      102439 :                 BAT *b = NULL;
    1738      102439 :                 bool swap = false;
    1739      102439 :                 if (!(BBP_status(bid) & flag) &&
    1740       13064 :                     BBP_refs(bid) == 0 &&
    1741       13064 :                     BBP_lrefs(bid) != 0 &&
    1742       13062 :                     (b = BBP_cache(bid)) != NULL) {
    1743       13062 :                         MT_lock_set(&b->theaplock);
    1744       13062 :                         if (!BATshared(b) &&
    1745       12969 :                             !isVIEW(b) &&
    1746       12794 :                             (!BATdirty(b) ||
    1747           0 :                              (aggressive &&
    1748           0 :                               b->theap->storage == STORE_MMAP &&
    1749           0 :                               (b->tvheap == NULL ||
    1750           0 :                                b->tvheap->storage == STORE_MMAP)) ||
    1751        9431 :                              (b->batRole == PERSISTENT &&
    1752        8993 :                               BBP_lrefs(bid) <= 2))) {
    1753        4866 :                                 BBP_status_on(bid, BBPUNLOADING);
    1754        4866 :                                 swap = true;
    1755        6369 :                                 waitctr += BATdirty(b) ? 9 : 1;
    1756             :                         }
    1757       13062 :                         MT_lock_unset(&b->theaplock);
    1758             :                 }
    1759      102439 :                 MT_lock_unset(&GDKswapLock(bid));
    1760      102439 :                 if (swap) {
    1761        4866 :                         TRC_DEBUG(BAT_, "unload and free bat %d\n", bid);
    1762        4866 :                         if (BBPfree(b) != GDK_SUCCEED)
    1763           0 :                                 GDKerror("unload failed for bat %d", bid);
    1764        4866 :                         n++;
    1765        4866 :                         changed = true;
    1766             :                 }
    1767      102439 :                 BBPtmunlock();
    1768             :                 /* every once in a while, give others a chance */
    1769      102439 :                 if (++waitctr >= 1000) {
    1770          85 :                         waitctr = 0;
    1771          85 :                         MT_sleep_ms(2);
    1772             :                 }
    1773             :         }
    1774         113 :         if (n > 0)
    1775          42 :                 TRC_INFO(BAT_, "unloaded %d bats in "LLFMT" usec%s\n", n, GDKusec() - t0, aggressive ? " (also hot)" : "");
    1776         113 :         return changed;
    1777             : }
    1778             : 
    1779             : static void
    1780         336 : BBPmanager(void *dummy)
    1781             : {
    1782         336 :         (void) dummy;
    1783         336 :         bool changed = true;
    1784             : 
    1785         449 :         for (;;) {
    1786         449 :                 int n = 0;
    1787         449 :                 bat nbat = (bat) ATOMIC_GET(&BBPsize);
    1788         449 :                 MT_thread_setworking("clearing HOT bits");
    1789      430833 :                 for (bat bid = 1; bid < nbat; bid++) {
    1790      430384 :                         MT_lock_set(&GDKswapLock(bid));
    1791      430384 :                         if (BBP_refs(bid) == 0 && BBP_lrefs(bid) != 0) {
    1792      204507 :                                 n += (BBP_status(bid) & BBPHOT) != 0;
    1793      204507 :                                 BBP_status_off(bid, BBPHOT);
    1794             :                         }
    1795      430384 :                         MT_lock_unset(&GDKswapLock(bid));
    1796             :                 }
    1797         449 :                 TRC_DEBUG(BAT_, "cleared HOT bit from %d bats\n", n);
    1798         449 :                 size_t cur = GDKvm_cursize();
    1799         449 :                 MT_thread_setworking("sleeping");
    1800       20237 :                 for (int i = 0, n = changed && cur > GDK_vm_maxsize / 2 ? 1 : cur > GDK_vm_maxsize / 4 ? 10 : 100; i < n; i++) {
    1801       19683 :                         MT_sleep_ms(100);
    1802       19681 :                         if (GDKexiting())
    1803             :                                 return;
    1804             :                 }
    1805         113 :                 MT_thread_setworking("BBPtrim");
    1806         113 :                 changed = BBPtrim(false, nbat);
    1807         113 :                 MT_thread_setworking("BBPcallbacks");
    1808         113 :                 BBPcallbacks();
    1809         113 :                 if (GDKexiting())
    1810             :                         return;
    1811             :         }
    1812             : }
    1813             : 
    1814             : static MT_Id manager;
    1815             : 
    1816             : gdk_return
    1817         336 : BBPinit(bool allow_hge_upgrade)
    1818             : {
    1819         336 :         FILE *fp = NULL;
    1820         336 :         struct stat st;
    1821         336 :         unsigned bbpversion = 0;
    1822         336 :         int i;
    1823         336 :         int lineno = 0;
    1824             : #ifdef GDKLIBRARY_HASHASH
    1825         336 :         bat *hashbats = NULL;
    1826         336 :         bat nhashbats = 0;
    1827         336 :         gdk_return res = GDK_SUCCEED;
    1828             : #endif
    1829         336 :         ATOMIC_BASE_TYPE dbg = ATOMIC_GET(&GDKdebug);
    1830             : 
    1831         336 :         ATOMIC_AND(&GDKdebug, ~TAILCHKMASK);
    1832             : 
    1833             :         /* the maximum number of BATs allowed in the system and the
    1834             :          * size of the "physical" array are linked in a complicated
    1835             :          * manner.  The expression below shows the relationship */
    1836         336 :         static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * ((sizeof(BBP[0][0].physical) + 2) * 2 / 5))), "\"physical\" array in BBPrec is too small");
    1837             :         /* similarly, the maximum number of BATs allowed also has a
    1838             :          * (somewhat simpler) relation with the size of the "bak"
    1839             :          * array */
    1840         336 :         static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * (sizeof(BBP[0][0].bak) - 5))), "\"bak\" array in BBPrec is too small");
    1841             : 
    1842         336 :         if (!GDKinmemory(0)) {
    1843         335 :                 str bbpdirstr, backupbbpdirstr;
    1844             : 
    1845         335 :                 BBPtmlock();
    1846             : 
    1847         335 :                 if ((bbpdirstr = GDKfilepath(0, BATDIR, "BBP", "dir")) == NULL) {
    1848           0 :                         TRC_CRITICAL(GDK, "GDKmalloc failed\n");
    1849           0 :                         BBPtmunlock();
    1850           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1851           0 :                         return GDK_FAIL;
    1852             :                 }
    1853             : 
    1854         335 :                 if ((backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP", "dir")) == NULL) {
    1855           0 :                         GDKfree(bbpdirstr);
    1856           0 :                         TRC_CRITICAL(GDK, "GDKmalloc failed\n");
    1857           0 :                         BBPtmunlock();
    1858           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1859           0 :                         return GDK_FAIL;
    1860             :                 }
    1861             : 
    1862         335 :                 if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) {
    1863           0 :                         GDKfree(bbpdirstr);
    1864           0 :                         GDKfree(backupbbpdirstr);
    1865           0 :                         TRC_CRITICAL(GDK, "cannot remove directory %s\n", TEMPDIR);
    1866           0 :                         BBPtmunlock();
    1867           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1868           0 :                         return GDK_FAIL;
    1869             :                 }
    1870             : 
    1871         335 :                 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) {
    1872           0 :                         GDKfree(bbpdirstr);
    1873           0 :                         GDKfree(backupbbpdirstr);
    1874           0 :                         TRC_CRITICAL(GDK, "cannot remove directory %s\n", DELDIR);
    1875           0 :                         BBPtmunlock();
    1876           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1877           0 :                         return GDK_FAIL;
    1878             :                 }
    1879             : 
    1880             :                 /* first move everything from SUBDIR to BAKDIR (its parent) */
    1881         335 :                 if (BBPrecover_subdir() != GDK_SUCCEED) {
    1882           0 :                         GDKfree(bbpdirstr);
    1883           0 :                         GDKfree(backupbbpdirstr);
    1884           0 :                         TRC_CRITICAL(GDK, "cannot properly recover_subdir process %s.", SUBDIR);
    1885           0 :                         BBPtmunlock();
    1886           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1887           0 :                         return GDK_FAIL;
    1888             :                 }
    1889             : 
    1890             :                 /* try to obtain a BBP.dir from bakdir */
    1891         335 :                 if (MT_stat(backupbbpdirstr, &st) == 0) {
    1892             :                         /* backup exists; *must* use it */
    1893         112 :                         if (recover_dir(0, MT_stat(bbpdirstr, &st) == 0) != GDK_SUCCEED) {
    1894           0 :                                 GDKfree(bbpdirstr);
    1895           0 :                                 GDKfree(backupbbpdirstr);
    1896           0 :                                 BBPtmunlock();
    1897           0 :                                 goto bailout;
    1898             :                         }
    1899         112 :                         if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1900           0 :                                 GDKfree(bbpdirstr);
    1901           0 :                                 GDKfree(backupbbpdirstr);
    1902           0 :                                 TRC_CRITICAL(GDK, "cannot open recovered BBP.dir.");
    1903           0 :                                 BBPtmunlock();
    1904           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    1905           0 :                                 return GDK_FAIL;
    1906             :                         }
    1907         223 :                 } else if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1908             :                         /* there was no BBP.dir either. Panic! try to use a
    1909             :                          * BBP.bak */
    1910         223 :                         if (MT_stat(backupbbpdirstr, &st) < 0) {
    1911             :                                 /* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir):
    1912             :                                  * create a new one */
    1913         223 :                                 TRC_DEBUG(IO_, "initializing BBP.\n");
    1914         223 :                                 if (BBPdir_init() != GDK_SUCCEED) {
    1915           0 :                                         GDKfree(bbpdirstr);
    1916           0 :                                         GDKfree(backupbbpdirstr);
    1917           0 :                                         BBPtmunlock();
    1918           0 :                                         goto bailout;
    1919             :                                 }
    1920           0 :                         } else if (GDKmove(0, BATDIR, "BBP", "bak", BATDIR, "BBP", "dir", true) == GDK_SUCCEED)
    1921           0 :                                 TRC_DEBUG(IO_, "reverting to dir saved in BBP.bak.\n");
    1922             : 
    1923         223 :                         if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1924           0 :                                 GDKsyserror("cannot open BBP.dir");
    1925           0 :                                 GDKfree(bbpdirstr);
    1926           0 :                                 GDKfree(backupbbpdirstr);
    1927           0 :                                 BBPtmunlock();
    1928           0 :                                 goto bailout;
    1929             :                         }
    1930             :                 }
    1931             :                 assert(fp != NULL);
    1932         335 :                 GDKfree(bbpdirstr);
    1933         335 :                 GDKfree(backupbbpdirstr);
    1934         335 :                 BBPtmunlock();
    1935             :         }
    1936             : 
    1937             :         /* scan the BBP.dir to obtain current size */
    1938         336 :         BBPlimit = 0;
    1939         336 :         memset(BBP, 0, sizeof(BBP));
    1940             : 
    1941         336 :         bat bbpsize;
    1942         336 :         bbpsize = 1;
    1943         336 :         if (GDKinmemory(0)) {
    1944             :                 bbpversion = GDKLIBRARY;
    1945             :         } else {
    1946         335 :                 lng logno, transid;
    1947         335 :                 bbpversion = BBPheader(fp, &lineno, &bbpsize, &logno, &transid, allow_hge_upgrade);
    1948         335 :                 if (bbpversion == 0) {
    1949           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1950           0 :                         return GDK_FAIL;
    1951             :                 }
    1952         335 :                 assert(bbpversion > GDKLIBRARY_MINMAX_POS || logno == 0);
    1953           0 :                 assert(bbpversion > GDKLIBRARY_MINMAX_POS || transid == 0);
    1954         335 :                 ATOMIC_SET(&BBPlogno, logno);
    1955         335 :                 ATOMIC_SET(&BBPtransid, transid);
    1956             :         }
    1957             : 
    1958             :         /* allocate BBP records */
    1959         336 :         if (BBPextend(bbpsize) != GDK_SUCCEED) {
    1960           0 :                 ATOMIC_SET(&GDKdebug, dbg);
    1961           0 :                 return GDK_FAIL;
    1962             :         }
    1963         336 :         ATOMIC_SET(&BBPsize, bbpsize);
    1964             : 
    1965         336 :         if (!GDKinmemory(0)) {
    1966         335 :                 if (BBPreadEntries(fp, bbpversion, lineno
    1967             : #ifdef GDKLIBRARY_HASHASH
    1968             :                                    , &hashbats, &nhashbats
    1969             : #endif
    1970             :                             ) != GDK_SUCCEED) {
    1971           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    1972           0 :                         return GDK_FAIL;
    1973             :                 }
    1974         335 :                 fclose(fp);
    1975             :         }
    1976             : 
    1977             :         /* remove trailing free bats from potential free list (they will
    1978             :          * get added when needed) */
    1979        8507 :         for (bat i = (bat) ATOMIC_GET(&BBPsize) - 1; i > 0; i--) {
    1980        8283 :                 if (BBP_desc(i) != NULL)
    1981             :                         break;
    1982        8171 :                 bbpsize--;
    1983             :         }
    1984         336 :         ATOMIC_SET(&BBPsize, bbpsize);
    1985             : 
    1986             :         /* add free bats to free list in such a way that low numbered
    1987             :          * ones are at the head of the list */
    1988       68550 :         for (bat i = (bat) ATOMIC_GET(&BBPsize) - 1; i > 0; i--) {
    1989       68214 :                 if (BBP_desc(i) == NULL) {
    1990       38510 :                         BBP_next(i) = BBP_free;
    1991       38510 :                         BBP_free = i;
    1992       38510 :                         BBP_nfree++;
    1993             :                 }
    1994             :         }
    1995             : 
    1996             :         /* will call BBPrecover if needed */
    1997         336 :         if (!GDKinmemory(0)) {
    1998         335 :                 BBPtmlock();
    1999         335 :                 gdk_return rc = BBPprepare(false);
    2000         335 :                 BBPtmunlock();
    2001         335 :                 if (rc != GDK_SUCCEED) {
    2002             : #ifdef GDKLIBRARY_HASHASH
    2003           0 :                         GDKfree(hashbats);
    2004             : #endif
    2005           0 :                         TRC_CRITICAL(GDK, "cannot properly prepare process %s.", BAKDIR);
    2006           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    2007           0 :                         return rc;
    2008             :                 }
    2009             :         }
    2010             : 
    2011         336 :         if (BBPcheckbats(bbpversion) != GDK_SUCCEED) {
    2012             : #ifdef GDKLIBRARY_HASHASH
    2013           0 :                 GDKfree(hashbats);
    2014             : #endif
    2015           0 :                 ATOMIC_SET(&GDKdebug, dbg);
    2016           0 :                 return GDK_FAIL;
    2017             :         }
    2018             : 
    2019             : #ifdef GDKLIBRARY_TAILN
    2020         336 :         char *needstrbatmove;
    2021         336 :         if (GDKinmemory(0)) {
    2022             :                 needstrbatmove = NULL;
    2023             :         } else {
    2024         335 :                 if ((needstrbatmove = GDKfilepath(0, BATDIR, "needstrbatmove", NULL)) == NULL) {
    2025             : #ifdef GDKLIBRARY_HASHASH
    2026           0 :                         GDKfree(hashbats);
    2027             : #endif
    2028           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    2029           0 :                         return GDK_FAIL;
    2030             :                 }
    2031         335 :                 if (bbpversion <= GDKLIBRARY_TAILN) {
    2032             :                         /* create signal file that we need to rename string
    2033             :                          * offset heaps */
    2034           0 :                         int fd = MT_open(needstrbatmove, O_WRONLY | O_CREAT);
    2035           0 :                         if (fd < 0) {
    2036           0 :                                 TRC_CRITICAL(GDK, "cannot create signal file needstrbatmove.\n");
    2037           0 :                                 GDKfree(needstrbatmove);
    2038             : #ifdef GDKLIBRARY_HASHASH
    2039           0 :                                 GDKfree(hashbats);
    2040             : #endif
    2041           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2042           0 :                                 return GDK_FAIL;
    2043             :                         }
    2044           0 :                         close(fd);
    2045             :                 } else {
    2046             :                         /* check signal file whether we need to rename string
    2047             :                          * offset heaps */
    2048         335 :                         int fd = MT_open(needstrbatmove, O_RDONLY);
    2049         335 :                         if (fd >= 0) {
    2050             :                                 /* yes, we do */
    2051           0 :                                 close(fd);
    2052         335 :                         } else if (errno == ENOENT) {
    2053             :                                 /* no, we don't: set var to NULL */
    2054         335 :                                 GDKfree(needstrbatmove);
    2055         335 :                                 needstrbatmove = NULL;
    2056             :                         } else {
    2057           0 :                                 GDKsyserror("unexpected error opening %s\n", needstrbatmove);
    2058           0 :                                 GDKfree(needstrbatmove);
    2059             : #ifdef GDKLIBRARY_HASHASH
    2060           0 :                                 GDKfree(hashbats);
    2061             : #endif
    2062           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2063           0 :                                 return GDK_FAIL;
    2064             :                         }
    2065             :                 }
    2066             :         }
    2067             : #endif
    2068             : 
    2069             : #ifdef GDKLIBRARY_HASHASH
    2070         336 :         if (nhashbats > 0)
    2071           0 :                 res = fixhashash(hashbats, nhashbats);
    2072         336 :         GDKfree(hashbats);
    2073         336 :         if (res != GDK_SUCCEED)
    2074             :                 return res;
    2075             : #endif
    2076             : 
    2077             : #ifdef GDKLIBRARY_JSON
    2078         336 :         if (bbpversion <= GDKLIBRARY_JSON) {
    2079           8 :                 char *jsonupgradestr;
    2080           8 :                 if (GDKinmemory(0)) {
    2081         344 :                         jsonupgradestr = NULL;
    2082             :                 } else {
    2083           8 :                         if ((jsonupgradestr = GDKfilepath(0, BATDIR, "jsonupgradeneeded", NULL)) == NULL) {
    2084           0 :                                 TRC_CRITICAL(GDK, "GDKfilepath failed\n");
    2085           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2086           0 :                                 return GDK_FAIL;
    2087             :                         }
    2088             : 
    2089             :                         /* create signal file that we need to upgrade
    2090             :                          * stored json strings. This will be performed
    2091             :                          * by an upgrade function in the GDK that will
    2092             :                          * be called at the end of the json module
    2093             :                          * initialzation with a callback that actually
    2094             :                          * knows how to perform the upgrade. */
    2095           8 :                         int fd = MT_open(jsonupgradestr, O_WRONLY | O_CREAT);
    2096           8 :                         GDKfree(jsonupgradestr);
    2097           8 :                         if (fd < 0) {
    2098           0 :                                 TRC_CRITICAL(GDK, "cannot create signal file jsonupgradeneeded");
    2099           0 :                                 ATOMIC_SET(&GDKdebug, dbg);
    2100           0 :                                 return GDK_FAIL;
    2101             :                         }
    2102             : 
    2103           8 :                         close(fd);
    2104             :                 }
    2105             :         }
    2106             : #endif
    2107             : 
    2108           8 :         if (bbpversion < GDKLIBRARY && TMcommit() != GDK_SUCCEED) {
    2109           0 :                 TRC_CRITICAL(GDK, "TMcommit failed\n");
    2110           0 :                 ATOMIC_SET(&GDKdebug, dbg);
    2111           0 :                 return GDK_FAIL;
    2112             :         }
    2113             : 
    2114             : #ifdef GDKLIBRARY_TAILN
    2115             :         /* we rename the offset heaps after the above commit: in this
    2116             :          * version we accept both the old and new names, but we want to
    2117             :          * convert so that future versions only have the new name */
    2118         336 :         if (needstrbatmove) {
    2119             :                 /* note, if renaming fails, nothing is lost: a next
    2120             :                  * invocation will just try again; an older version of
    2121             :                  * mserver will not work because of the TMcommit
    2122             :                  * above */
    2123           0 :                 if (movestrbats() != GDK_SUCCEED) {
    2124           0 :                         GDKfree(needstrbatmove);
    2125           0 :                         ATOMIC_SET(&GDKdebug, dbg);
    2126           0 :                         return GDK_FAIL;
    2127             :                 }
    2128           0 :                 MT_remove(needstrbatmove);
    2129           0 :                 GDKfree(needstrbatmove);
    2130           0 :                 needstrbatmove = NULL;
    2131             :         }
    2132             : #endif
    2133         336 :         ATOMIC_SET(&GDKdebug, dbg);
    2134             : 
    2135             :         /* cleanup any leftovers (must be done after BBPrecover) */
    2136        1330 :         for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) {
    2137             :                 int j;
    2138        1340 :                 for (j = 0; j < i; j++) {
    2139             :                         /* don't clean a directory twice */
    2140         832 :                         if (BBPfarms[j].dirname &&
    2141         832 :                             strcmp(BBPfarms[i].dirname,
    2142             :                                    BBPfarms[j].dirname) == 0)
    2143             :                                 break;
    2144             :                 }
    2145         994 :                 if (j == i) {
    2146         508 :                         char *d = GDKfilepath(i, NULL, BATDIR, NULL);
    2147         508 :                         if (d == NULL) {
    2148             :                                 return GDK_FAIL;
    2149             :                         }
    2150         508 :                         BBPdiskscan(d, strlen(d) - strlen(BATDIR));
    2151         508 :                         GDKfree(d);
    2152             :                 }
    2153             :         }
    2154             : 
    2155         336 :         if (MT_create_thread(&manager, BBPmanager, NULL, MT_THR_DETACHED, "BBPmanager") < 0) {
    2156           0 :                 TRC_CRITICAL(GDK, "Could not start BBPmanager thread.");
    2157           0 :                 return GDK_FAIL;
    2158             :         }
    2159             :         return GDK_SUCCEED;
    2160             : 
    2161           0 :   bailout:
    2162             :         /* now it is time for real panic */
    2163           0 :         TRC_CRITICAL(GDK, "could not write %s%cBBP.dir.", BATDIR, DIR_SEP);
    2164           0 :         return GDK_FAIL;
    2165             : }
    2166             : 
    2167             : /*
    2168             :  * During the exit phase all non-persistent BATs are removed.  Upon
    2169             :  * exit the status of the BBP tables is saved on disk.  This function
    2170             :  * is called once and during the shutdown of the server. Since
    2171             :  * shutdown may be issued from any thread (dangerous) it may lead to
    2172             :  * interference in a parallel session.
    2173             :  */
    2174             : 
    2175             : static int backup_files = 0, backup_dir = 0, backup_subdir = 0;
    2176             : static char *lockfile = NULL;
    2177             : 
    2178             : void
    2179         334 : BBPexit(void)
    2180             : {
    2181         334 :         bat i;
    2182         334 :         bool skipped;
    2183             : 
    2184             :         //BBPlock();    /* stop all threads ever touching more descriptors */
    2185             : 
    2186             :         /* free all memory (just for leak-checking in Purify) */
    2187         334 :         do {
    2188         334 :                 skipped = false;
    2189      546575 :                 for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
    2190      546241 :                         if (BBPvalid(i)) {
    2191      438278 :                                 BAT *b = BBP_desc(i);
    2192             : 
    2193      438278 :                                 if (b) {
    2194      438278 :                                         if (BATshared(b)) {
    2195           0 :                                                 skipped = true;
    2196           0 :                                                 continue;
    2197             :                                         }
    2198      438278 :                                         MT_lock_set(&b->theaplock);
    2199      438278 :                                         bat tp = VIEWtparent(b);
    2200           0 :                                         if (tp != 0) {
    2201           0 :                                                 --BBP_lrefs(tp);
    2202           0 :                                                 HEAPdecref(b->theap, false);
    2203           0 :                                                 b->theap = NULL;
    2204             :                                         }
    2205      438278 :                                         tp = VIEWvtparent(b);
    2206           0 :                                         if (tp != 0) {
    2207           0 :                                                 --BBP_lrefs(tp);
    2208           0 :                                                 HEAPdecref(b->tvheap, false);
    2209           0 :                                                 b->tvheap = NULL;
    2210             :                                         }
    2211      438278 :                                         if (b->oldtail) {
    2212           7 :                                                 Heap *h = b->oldtail;
    2213           7 :                                                 b->oldtail = NULL;
    2214           7 :                                                 ATOMIC_AND(&h->refs, ~DELAYEDREMOVE);
    2215           7 :                                                 HEAPdecref(h, false);
    2216             :                                         }
    2217      438278 :                                         PROPdestroy_nolock(b);
    2218      438278 :                                         MT_lock_unset(&b->theaplock);
    2219      438278 :                                         BATfree(b);
    2220             :                                 }
    2221      438278 :                                 BBP_pid(i) = 0;
    2222      438278 :                                 BBPuncacheit(i, true);
    2223      438278 :                                 if (BBP_logical(i) != BBP_bak(i))
    2224       11875 :                                         GDKfree(BBP_logical(i));
    2225      438278 :                                 BBP_logical(i) = NULL;
    2226             :                         }
    2227             :                 }
    2228         334 :         } while (skipped);
    2229             :         /* these need to be NULL, otherwise no new ones get created */
    2230         334 :         memset(BBP_hash, 0, sizeof(BBP_hash));
    2231         334 :         backup_files = 0;
    2232         334 :         backup_dir = 0;
    2233         334 :         backup_subdir = 0;
    2234         334 :         if (lockfile) {
    2235         333 :                 GDKfree(lockfile);
    2236         333 :                 lockfile = NULL;
    2237             :         }
    2238         334 : }
    2239             : 
    2240             : /*
    2241             :  * The routine BBPdir creates the BAT pool dictionary file.  It
    2242             :  * includes some information about the current state of affair in the
    2243             :  * pool.  The location in the buffer pool is saved for later use as
    2244             :  * well.  This is merely done for ease of debugging and of no
    2245             :  * importance to front-ends.  The tail of non-used entries is
    2246             :  * reclaimed as well.
    2247             :  */
    2248             : static inline int
    2249     1625486 : heap_entry(FILE *fp, BATiter *bi, BUN size)
    2250             : {
    2251     1625486 :         size_t free = bi->hfree;
    2252     1625486 :         if (size < BUN_NONE) {
    2253     1625486 :                 if ((bi->type >= 0 && ATOMstorage(bi->type) == TYPE_msk))
    2254      254952 :                         free = ((size + 31) / 32) * 4;
    2255     1370534 :                 else if (bi->width > 0)
    2256     1370534 :                         free = size << bi->shift;
    2257             :                 else
    2258             :                         free = 0;
    2259             :         }
    2260             : 
    2261     5196101 :         return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
    2262             :                        BUNFMT " " OIDFMT " %zu %" PRIu64" %" PRIu64,
    2263     1625486 :                        bi->type >= 0 ? BATatoms[bi->type].name : ATOMunknown_name(bi->type),
    2264     1625486 :                        bi->width,
    2265     1625486 :                        bi->type == TYPE_void || bi->vh != NULL,
    2266     1625486 :                        (unsigned short) bi->sorted |
    2267     1625486 :                            ((unsigned short) bi->revsorted << 7) |
    2268     3250972 :                            ((unsigned short) bi->key << 8) |
    2269     1625486 :                            ((unsigned short) BATtdensebi(bi) << 9) |
    2270     1625486 :                            ((unsigned short) bi->nonil << 10) |
    2271     1625486 :                            ((unsigned short) bi->nil << 11),
    2272      971940 :                        bi->nokey[0] >= size || bi->nokey[1] >= size ? 0 : bi->nokey[0],
    2273     1625486 :                        bi->nokey[0] >= size || bi->nokey[1] >= size ? 0 : bi->nokey[1],
    2274     1625486 :                        bi->nosorted >= size ? 0 : bi->nosorted,
    2275     1625486 :                        bi->norevsorted >= size ? 0 : bi->norevsorted,
    2276             :                        bi->tseq,
    2277             :                        free,
    2278     1625486 :                        bi->minpos < size ? (uint64_t) bi->minpos : (uint64_t) oid_nil,
    2279     1625486 :                        bi->maxpos < size ? (uint64_t) bi->maxpos : (uint64_t) oid_nil);
    2280             : }
    2281             : 
    2282             : static inline int
    2283     1625486 : vheap_entry(FILE *fp, BATiter *bi, BUN size)
    2284             : {
    2285     1625486 :         (void) size;
    2286     1625486 :         if (bi->vh == NULL)
    2287             :                 return 0;
    2288      360971 :         return fprintf(fp, " %zu", size == 0 ? 0 : bi->vhfree);
    2289             : }
    2290             : 
    2291             : static gdk_return
    2292     1625486 : new_bbpentry(FILE *fp, bat i, BUN size, BATiter *bi)
    2293             : {
    2294             : #ifndef NDEBUG
    2295     1625486 :         assert(i > 0);
    2296     1625486 :         assert(i < (bat) ATOMIC_GET(&BBPsize));
    2297     1625486 :         assert(bi->b);
    2298     1625486 :         assert(bi->b->batCacheid == i);
    2299     1625486 :         assert(bi->b->batRole == PERSISTENT);
    2300     1625486 :         assert(0 <= bi->h->farmid && bi->h->farmid < MAXFARMS);
    2301     1625486 :         assert(BBPfarms[bi->h->farmid].roles & (1U << PERSISTENT));
    2302     1625486 :         if (bi->vh) {
    2303      360971 :                 assert(0 <= bi->vh->farmid && bi->vh->farmid < MAXFARMS);
    2304      360971 :                 assert(BBPfarms[bi->vh->farmid].roles & (1U << PERSISTENT));
    2305             :         }
    2306     1625486 :         assert(size <= bi->count || size == BUN_NONE);
    2307     1625486 :         assert(BBP_options(i) == NULL || strpbrk(BBP_options(i), "\r\n") == NULL);
    2308             : #endif
    2309             : 
    2310     1625486 :         if (BBP_options(i) != NULL && strpbrk(BBP_options(i), "\r\n") != NULL) {
    2311           0 :                 GDKerror("options for bat %d contains a newline\n", i);
    2312           0 :                 return GDK_FAIL;
    2313             :         }
    2314     1625486 :         if (size > bi->count)
    2315             :                 size = bi->count;
    2316     1625486 :         if (fprintf(fp, "%d %u %s %s %d " BUNFMT " " OIDFMT,
    2317             :                     /* BAT info */
    2318             :                     (int) i,
    2319     1625486 :                     BBP_status(i) & BBPPERSISTENT,
    2320             :                     BBP_logical(i),
    2321     1625486 :                     BBP_physical(i),
    2322     1625486 :                     (unsigned) bi->restricted << 1,
    2323             :                     size,
    2324     1625486 :                     bi->b->hseqbase) < 0 ||
    2325     3250972 :             heap_entry(fp, bi, size) < 0 ||
    2326     1625486 :             vheap_entry(fp, bi, size) < 0 ||
    2327     3250972 :             (BBP_options(i) && fprintf(fp, " %s", BBP_options(i)) < 0) ||
    2328     1625486 :             fprintf(fp, "\n") < 0) {
    2329           0 :                 GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n");
    2330           0 :                 return GDK_FAIL;
    2331             :         }
    2332             : 
    2333             :         return GDK_SUCCEED;
    2334             : }
    2335             : 
    2336             : static gdk_return
    2337       12156 : BBPdir_header(FILE *f, int n, lng logno, lng transid)
    2338             : {
    2339       12156 :         if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\nBBPinfo=" LLFMT " " LLFMT "\n",
    2340             :                     GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID,
    2341             : #ifdef HAVE_HGE
    2342             :                     SIZEOF_HGE
    2343             : #else
    2344             :                     SIZEOF_LNG
    2345             : #endif
    2346       12156 :                     , n, logno, transid) < 0 ||
    2347       12156 :             ferror(f)) {
    2348           0 :                 GDKsyserror("Writing BBP.dir header failed\n");
    2349           0 :                 return GDK_FAIL;
    2350             :         }
    2351             :         return GDK_SUCCEED;
    2352             : }
    2353             : 
    2354             : static gdk_return
    2355       12156 : BBPdir_first(bool subcommit, lng logno, lng transid,
    2356             :              FILE **obbpfp, FILE **nbbpfp)
    2357             : {
    2358       12156 :         FILE *obbpf = NULL, *nbbpf = NULL;
    2359       12156 :         int n = 0;
    2360       12156 :         lng ologno, otransid;
    2361             : 
    2362       12156 :         if (obbpfp)
    2363       11933 :                 *obbpfp = NULL;
    2364       12156 :         *nbbpfp = NULL;
    2365             : 
    2366       12156 :         if ((nbbpf = GDKfilelocate(0, "BBP", "w", "dir")) == NULL) {
    2367             :                 return GDK_FAIL;
    2368             :         }
    2369             : 
    2370       12156 :         if (subcommit) {
    2371       11925 :                 char buf[512];
    2372             : 
    2373       11925 :                 assert(obbpfp != NULL);
    2374             :                 /* we need to copy the backup BBP.dir to the new, but
    2375             :                  * replacing the entries for the subcommitted bats */
    2376       11925 :                 if ((obbpf = GDKfileopen(0, SUBDIR, "BBP", "dir", "r")) == NULL &&
    2377           0 :                     (obbpf = GDKfileopen(0, BAKDIR, "BBP", "dir", "r")) == NULL) {
    2378           0 :                         GDKsyserror("subcommit attempted without backup BBP.dir");
    2379           0 :                         goto bailout;
    2380             :                 }
    2381             :                 /* read first three lines */
    2382       23850 :                 if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */
    2383       23850 :                     fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */
    2384       11925 :                     fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */
    2385           0 :                         GDKerror("subcommit attempted with invalid backup BBP.dir.");
    2386           0 :                         goto bailout;
    2387             :                 }
    2388             :                 /* third line contains BBPsize */
    2389       11925 :                 if (sscanf(buf, "BBPsize=%d", &n) != 1) {
    2390           0 :                         GDKerror("cannot read BBPsize in backup BBP.dir.");
    2391           0 :                         goto bailout;
    2392             :                 }
    2393             :                 /* fourth line contains BBPinfo */
    2394       11925 :                 if (fgets(buf, sizeof(buf), obbpf) == NULL ||
    2395       11925 :                     sscanf(buf, "BBPinfo=" LLSCN " " LLSCN, &ologno, &otransid) != 2) {
    2396           0 :                         GDKerror("cannot read BBPinfo in backup BBP.dir.");
    2397           0 :                         goto bailout;
    2398             :                 }
    2399             :         }
    2400             : 
    2401       12156 :         if (n < (bat) ATOMIC_GET(&BBPsize))
    2402        3193 :                 n = (bat) ATOMIC_GET(&BBPsize);
    2403             : 
    2404       12156 :         TRC_DEBUG(IO_, "writing BBP.dir (%d bats).\n", n);
    2405             : 
    2406       12156 :         if (BBPdir_header(nbbpf, n, logno, transid) != GDK_SUCCEED) {
    2407           0 :                 goto bailout;
    2408             :         }
    2409             : 
    2410       12156 :         if (obbpfp)
    2411       11933 :                 *obbpfp = obbpf;
    2412       12156 :         *nbbpfp = nbbpf;
    2413             : 
    2414       12156 :         return GDK_SUCCEED;
    2415             : 
    2416           0 :   bailout:
    2417           0 :         if (obbpf != NULL)
    2418           0 :                 fclose(obbpf);
    2419           0 :         if (nbbpf != NULL)
    2420           0 :                 fclose(nbbpf);
    2421           0 :         return GDK_FAIL;
    2422             : }
    2423             : 
    2424             : static bat
    2425     1816975 : BBPdir_step(bat bid, BUN size, int n, char *buf, size_t bufsize,
    2426             :             FILE **obbpfp, FILE *nbbpf, BATiter *bi)
    2427             : {
    2428     1816975 :         if (n < -1)          /* safety catch */
    2429             :                 return n;
    2430     4842570 :         while (n >= 0 && n < bid) {
    2431     3025595 :                 if (n > 0) {
    2432     1458797 :                         if (fputs(buf, nbbpf) == EOF) {
    2433           0 :                                 GDKerror("Writing BBP.dir file failed.\n");
    2434           0 :                                 goto bailout;
    2435             :                         }
    2436             :                 }
    2437     3025595 :                 if (fgets(buf, (int) bufsize, *obbpfp) == NULL) {
    2438        3280 :                         if (ferror(*obbpfp)) {
    2439           0 :                                 GDKerror("error reading backup BBP.dir.");
    2440           0 :                                 goto bailout;
    2441             :                         }
    2442        3280 :                         n = -1;
    2443        3280 :                         if (fclose(*obbpfp) == EOF) {
    2444           0 :                                 GDKsyserror("Closing backup BBP.dir file failed\n");
    2445           0 :                                 GDKclrerr(); /* ignore error */
    2446             :                         }
    2447        3280 :                         *obbpfp = NULL;
    2448             :                 } else {
    2449     3022315 :                         if (sscanf(buf, "%d", &n) != 1 || n <= 0 || n >= N_BBPINIT * BBPINIT) {
    2450           0 :                                 GDKerror("subcommit attempted with invalid backup BBP.dir.");
    2451           0 :                                 goto bailout;
    2452             :                         }
    2453             :                 }
    2454             :         }
    2455     1816975 :         if (BBP_status(bid) & BBPPERSISTENT) {
    2456     1625486 :                 if (new_bbpentry(nbbpf, bid, size, bi) != GDK_SUCCEED)
    2457           0 :                         goto bailout;
    2458             :         }
    2459     1816975 :         return n == -1 ? -1 : n == bid ? 0 : n;
    2460             : 
    2461           0 :   bailout:
    2462           0 :         if (*obbpfp)
    2463           0 :                 fclose(*obbpfp);
    2464           0 :         fclose(nbbpf);
    2465           0 :         return -2;
    2466             : }
    2467             : 
    2468             : static gdk_return
    2469       12156 : BBPdir_last(int n, char *buf, size_t bufsize, FILE *obbpf, FILE *nbbpf)
    2470             : {
    2471       12156 :         if (n > 0 && fputs(buf, nbbpf) == EOF) {
    2472           0 :                 GDKerror("Writing BBP.dir file failed.\n");
    2473           0 :                 goto bailout;
    2474             :         }
    2475      278764 :         while (obbpf) {
    2476      275253 :                 if (fgets(buf, (int) bufsize, obbpf) == NULL) {
    2477        8645 :                         if (ferror(obbpf)) {
    2478           0 :                                 GDKerror("error reading backup BBP.dir.");
    2479           0 :                                 goto bailout;
    2480             :                         }
    2481        8645 :                         if (fclose(obbpf) == EOF) {
    2482           0 :                                 GDKsyserror("Closing backup BBP.dir file failed\n");
    2483           0 :                                 GDKclrerr(); /* ignore error */
    2484             :                         }
    2485             :                         obbpf = NULL;
    2486             :                 } else {
    2487      266608 :                         if (fputs(buf, nbbpf) == EOF) {
    2488           0 :                                 GDKerror("Writing BBP.dir file failed.\n");
    2489           0 :                                 goto bailout;
    2490             :                         }
    2491             :                 }
    2492             :         }
    2493       12156 :         if (fflush(nbbpf) == EOF ||
    2494       12156 :             (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
    2495             : #if defined(NATIVE_WIN32)
    2496             :              && _commit(_fileno(nbbpf)) < 0
    2497             : #elif defined(HAVE_FDATASYNC)
    2498          10 :              && fdatasync(fileno(nbbpf)) < 0
    2499             : #elif defined(HAVE_FSYNC)
    2500             :              && fsync(fileno(nbbpf)) < 0
    2501             : #endif
    2502             :                     )) {
    2503           0 :                 GDKsyserror("Syncing BBP.dir file failed\n");
    2504           0 :                 goto bailout;
    2505             :         }
    2506       12156 :         if (fclose(nbbpf) == EOF) {
    2507           0 :                 GDKsyserror("Closing BBP.dir file failed\n");
    2508           0 :                 nbbpf = NULL;   /* can't close again */
    2509           0 :                 goto bailout;
    2510             :         }
    2511             : 
    2512       12156 :         TRC_DEBUG(IO_, "end\n");
    2513             : 
    2514             :         return GDK_SUCCEED;
    2515             : 
    2516           0 :   bailout:
    2517           0 :         if (obbpf != NULL)
    2518           0 :                 fclose(obbpf);
    2519           0 :         if (nbbpf != NULL)
    2520           0 :                 fclose(nbbpf);
    2521             :         return GDK_FAIL;
    2522             : }
    2523             : 
    2524             : gdk_return
    2525         223 : BBPdir_init(void)
    2526             : {
    2527         223 :         FILE *fp;
    2528         223 :         gdk_return rc;
    2529             : 
    2530         223 :         rc = BBPdir_first(false, 0, 0, NULL, &fp);
    2531         223 :         if (rc == GDK_SUCCEED)
    2532         223 :                 rc = BBPdir_last(-1, NULL, 0, NULL, fp);
    2533         223 :         return rc;
    2534             : }
    2535             : 
    2536             : /* function used for debugging */
    2537             : void
    2538           0 : BBPdump(void)
    2539             : {
    2540           0 :         size_t mem = 0, vm = 0;
    2541           0 :         int n = 0;
    2542             : 
    2543           0 :         for (bat i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
    2544           0 :                 if (BBP_refs(i) == 0 && BBP_lrefs(i) == 0)
    2545           0 :                         continue;
    2546           0 :                 BAT *b = BBP_desc(i);
    2547           0 :                 unsigned status = BBP_status(i);
    2548           0 :                 printf("# %d: " ALGOOPTBATFMT " refs=%d lrefs=%d status=%u%s",
    2549             :                        i,
    2550           0 :                        ALGOOPTBATPAR(b),
    2551             :                        BBP_refs(i),
    2552             :                        BBP_lrefs(i),
    2553             :                        status,
    2554           0 :                        BBP_cache(i) ? "" : " not cached");
    2555           0 :                 if (b == NULL) {
    2556           0 :                         printf(", no descriptor\n");
    2557           0 :                         continue;
    2558             :                 }
    2559           0 :                 if (b->theap) {
    2560           0 :                         if (b->theap->parentid != b->batCacheid) {
    2561           0 :                                 printf(" Theap -> %d", b->theap->parentid);
    2562             :                         } else {
    2563           0 :                                 printf(" Theap=[%zu,%zu,f=%d]%s%s",
    2564             :                                        b->theap->free,
    2565             :                                        b->theap->size,
    2566           0 :                                        b->theap->farmid,
    2567           0 :                                        b->theap->base == NULL ? "X" : b->theap->storage == STORE_MMAP ? "M" : "",
    2568           0 :                                        status & BBPSWAPPED ? "(Swapped)" : b->theap->dirty ? "(Dirty)" : "");
    2569           0 :                                 mem += HEAPmemsize(b->theap);
    2570           0 :                                 vm += HEAPvmsize(b->theap);
    2571           0 :                                 n++;
    2572             :                         }
    2573             :                 }
    2574           0 :                 if (b->tvheap) {
    2575           0 :                         if (b->tvheap->parentid != b->batCacheid) {
    2576           0 :                                 printf(" Tvheap -> %d",
    2577             :                                        b->tvheap->parentid);
    2578             :                         } else {
    2579           0 :                                 printf(" Tvheap=[%zu,%zu,f=%d]%s%s",
    2580             :                                        b->tvheap->free,
    2581             :                                        b->tvheap->size,
    2582           0 :                                        b->tvheap->farmid,
    2583           0 :                                        b->tvheap->base == NULL ? "X" : b->tvheap->storage == STORE_MMAP ? "M" : "",
    2584           0 :                                        b->tvheap->dirty ? "(Dirty)" : "");
    2585           0 :                                 mem += HEAPmemsize(b->tvheap);
    2586           0 :                                 vm += HEAPvmsize(b->tvheap);
    2587             :                         }
    2588             :                 }
    2589           0 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    2590           0 :                         if (b->thash && b->thash != (Hash *) 1) {
    2591           0 :                                 size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt);
    2592           0 :                                 size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt);
    2593           0 :                                 printf(" Thash=[%zu,%zu,f=%d/%d]", m, v,
    2594           0 :                                        b->thash->heaplink.farmid,
    2595           0 :                                        b->thash->heapbckt.farmid);
    2596           0 :                                 mem += m;
    2597           0 :                                 vm += v;
    2598             :                         }
    2599           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    2600             :                 }
    2601           0 :                 printf(" role: %s\n",
    2602           0 :                        b->batRole == PERSISTENT ? "persistent" : "transient");
    2603             :         }
    2604           0 :         printf("# %d bats: mem=%zu, vm=%zu\n", n, mem, vm);
    2605           0 :         fflush(stdout);
    2606           0 : }
    2607             : 
    2608             : /*
    2609             :  * @+ BBP Readonly Interface
    2610             :  *
    2611             :  * These interface functions do not change the BBP tables. If they
    2612             :  * only access one specific BAT, the caller must have ensured that no
    2613             :  * other thread is modifying that BAT, therefore such functions do not
    2614             :  * need locking.
    2615             :  *
    2616             :  * BBP index lookup by BAT name:
    2617             :  */
    2618             : static inline bat
    2619       39965 : BBP_find(const char *nme, bool lock)
    2620             : {
    2621       39965 :         bat i = BBPnamecheck(nme);
    2622             : 
    2623       12278 :         if (i != 0) {
    2624             :                 /* for tmp_X BATs, we already know X */
    2625       12278 :                 const char *s;
    2626             : 
    2627       12278 :                 if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) {
    2628       12278 :                         i = 0;
    2629             :                 }
    2630       27687 :         } else if (*nme != '.') {
    2631             :                 /* must lock since hash-lookup traverses other BATs */
    2632       27687 :                 if (lock)
    2633        1490 :                         MT_lock_set(&BBPnameLock);
    2634       28043 :                 for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) {
    2635        1144 :                         if (strcmp(BBP_logical(i), nme) == 0)
    2636             :                                 break;
    2637             :                 }
    2638       27687 :                 if (lock)
    2639        1490 :                         MT_lock_unset(&BBPnameLock);
    2640             :         }
    2641       39965 :         return i;
    2642             : }
    2643             : 
    2644             : bat
    2645        1490 : BBPindex(const char *nme)
    2646             : {
    2647        1490 :         return BBP_find(nme, true);
    2648             : }
    2649             : 
    2650             : /*
    2651             :  * @+ BBP Update Interface
    2652             :  * Operations to insert, delete, clear, and modify BBP entries.
    2653             :  * Our policy for the BBP is to provide unlocked BBP access for
    2654             :  * speed, but still write operations have to be locked.
    2655             :  * #ifdef DEBUG_THREADLOCAL_BATS
    2656             :  * Create the shadow version (reversed) of a bat.
    2657             :  *
    2658             :  * An existing BAT is inserted into the BBP
    2659             :  */
    2660             : static inline str
    2661      691551 : BBPsubdir_recursive(str s, bat i)
    2662             : {
    2663      691551 :         i >>= 6;
    2664      691551 :         if (i >= 0100) {
    2665      185170 :                 s = BBPsubdir_recursive(s, i);
    2666      185165 :                 *s++ = DIR_SEP;
    2667             :         }
    2668      691546 :         i &= 077;
    2669      691546 :         *s++ = '0' + (i >> 3);
    2670      691546 :         *s++ = '0' + (i & 7);
    2671      691546 :         return s;
    2672             : }
    2673             : 
    2674             : static inline void
    2675      540143 : BBPgetsubdir(str s, bat i)
    2676             : {
    2677      540143 :         if (i >= 0100) {
    2678      506388 :                 s = BBPsubdir_recursive(s, i);
    2679             :         }
    2680      540143 :         *s = 0;
    2681      540143 : }
    2682             : 
    2683             : /* The free list is empty.  We create a new entry by either just
    2684             :  * increasing BBPsize (up to BBPlimit) or extending the BBP (which
    2685             :  * increases BBPlimit).
    2686             :  *
    2687             :  * Note that this is the only place in normal, multi-threaded operation
    2688             :  * where BBPsize is assigned a value (never decreasing) and that the
    2689             :  * assignment happens after any necessary memory was allocated and
    2690             :  * initialized. */
    2691             : static gdk_return
    2692       47814 : maybeextend(void)
    2693             : {
    2694       47814 :         bat size = (bat) ATOMIC_GET(&BBPsize);
    2695       47821 :         if (size + BBP_FREE_LOWATER > BBPlimit &&
    2696           7 :             BBPextend(size + BBP_FREE_LOWATER) != GDK_SUCCEED) {
    2697             :                 /* nothing available */
    2698             :                 return GDK_FAIL;
    2699             :         }
    2700       47814 :         ATOMIC_SET(&BBPsize, size + BBP_FREE_LOWATER);
    2701       47814 :         assert(BBP_free == 0);
    2702       47814 :         BBP_free = size;
    2703      478140 :         for (int i = 1; i < BBP_FREE_LOWATER; i++) {
    2704      430326 :                 bat sz = size;
    2705      430326 :                 BBP_next(sz) = ++size;
    2706             :         }
    2707       47814 :         BBP_next(size) = 0;
    2708       47814 :         BBP_nfree += BBP_FREE_LOWATER;
    2709       47814 :         return GDK_SUCCEED;
    2710             : }
    2711             : 
    2712             : /* return new BAT id (> 0); return 0 on failure */
    2713             : bat
    2714    15695953 : BBPinsert(BAT *bn)
    2715             : {
    2716    15695953 :         MT_Id pid = MT_getpid();
    2717    15693928 :         bool lock = locked_by == 0 || locked_by != pid;
    2718    15693928 :         char dirname[24];
    2719    15693928 :         bat i;
    2720    15693928 :         int len = 0;
    2721    15693928 :         struct freebats *t = MT_thread_getfreebats();
    2722             : 
    2723    15692428 :         if (t->freebats == 0) {
    2724             :                 /* critical section: get a new BBP entry */
    2725      192975 :                 assert(t->nfreebats == 0);
    2726      192975 :                 if (lock) {
    2727      192975 :                         MT_lock_set(&GDKcacheLock);
    2728             :                 }
    2729             : 
    2730             :                 /* get a global bat, perhaps extend */
    2731      192983 :                 if (BBP_free <= 0) {
    2732             :                         /* we need to extend the BBP */
    2733       47814 :                         gdk_return r;
    2734       47814 :                         r = maybeextend();
    2735       47814 :                         if (r != GDK_SUCCEED) {
    2736           0 :                                 if (lock) {
    2737           0 :                                         MT_lock_unset(&GDKcacheLock);
    2738             :                                 }
    2739             :                                 /* failed */
    2740           0 :                                 return 0;
    2741             :                         }
    2742             :                 }
    2743      192983 :                 t->freebats = i = BBP_free;
    2744      192983 :                 bat l = 0;
    2745     2089626 :                 for (int x = 0; x < BBP_FREE_LOWATER && i; x++) {
    2746     1896643 :                         assert(BBP_next(i) == 0 || BBP_next(i) > i);
    2747     1896643 :                         t->nfreebats++;
    2748     1896643 :                         BBP_nfree--;
    2749     1896643 :                         l = i;
    2750     1896643 :                         i = BBP_next(i);
    2751             :                 }
    2752      192983 :                 BBP_next(l) = 0;
    2753      192983 :                 BBP_free = i;
    2754             : 
    2755      192983 :                 if (lock) {
    2756      192983 :                         MT_lock_unset(&GDKcacheLock);
    2757             :                 }
    2758             :                 /* rest of the work outside the lock */
    2759             :         }
    2760    15692436 :         if (t->nfreebats > 0) {
    2761    15692436 :                 assert(t->freebats > 0);
    2762    15692436 :                 i = t->freebats;
    2763    15692436 :                 t->freebats = BBP_next(i);
    2764    15692436 :                 assert(t->freebats == 0 || t->freebats > i);
    2765    15692436 :                 BBP_next(i) = 0;
    2766    15692436 :                 t->nfreebats--;
    2767             :         } else {
    2768           0 :                 assert(t->nfreebats == 0);
    2769           0 :                 assert(t->freebats == 0);
    2770             :                 return 0;
    2771             :         }
    2772             : 
    2773             :         /* fill in basic BBP fields for the new bat */
    2774             : 
    2775    15692436 :         bn->batCacheid = i;
    2776    15692436 :         bn->creator_tid = pid;
    2777             : 
    2778    15692436 :         MT_lock_set(&GDKswapLock(i));
    2779    15687069 :         BBP_status_set(i, BBPDELETING|BBPHOT);
    2780    15687069 :         BBP_cache(i) = NULL;
    2781    15687069 :         BBP_desc(i) = bn;
    2782    15687069 :         BBP_refs(i) = 1;        /* new bats have 1 pin */
    2783    15687069 :         BBP_lrefs(i) = 0;       /* ie. no logical refs */
    2784    15687069 :         BBP_pid(i) = pid;
    2785    15687069 :         MT_lock_unset(&GDKswapLock(i));
    2786             : 
    2787    15695252 :         if (*BBP_bak(i) == 0)
    2788      501990 :                 len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o", (unsigned) i);
    2789    15695252 :         if (len == -1 || len >= FILENAME_MAX) {
    2790           0 :                 GDKerror("impossible error\n");
    2791           0 :                 return 0;
    2792             :         }
    2793    15695252 :         BBP_logical(i) = BBP_bak(i);
    2794             : 
    2795             :         /* Keep the physical location around forever */
    2796    15695252 :         if (!GDKinmemory(0) && *BBP_physical(i) == 0) {
    2797      501612 :                 BBPgetsubdir(dirname, i);
    2798             : 
    2799      501681 :                 if (*dirname)   /* i.e., i >= 0100 */
    2800      483544 :                         len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
    2801             :                                        "%s%c%o", dirname, DIR_SEP, (unsigned) i);
    2802             :                 else
    2803       18137 :                         len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
    2804             :                                        "%o", (unsigned) i);
    2805      501681 :                 if (len == -1 || len >= FILENAME_MAX)
    2806             :                         return 0;
    2807             : 
    2808      501682 :                 TRC_DEBUG(BAT_, "%d = new %s(%s)\n", (int) i, BBP_logical(i), ATOMname(bn->ttype));
    2809             :         }
    2810             : 
    2811             :         return i;
    2812             : }
    2813             : 
    2814             : gdk_return
    2815    15714239 : BBPcacheit(BAT *bn, bool lock)
    2816             : {
    2817    15714239 :         bat i = bn->batCacheid;
    2818    15714239 :         unsigned mode;
    2819             : 
    2820    15714239 :         if (lock)
    2821    31388150 :                 lock = locked_by == 0 || locked_by != MT_getpid();
    2822             : 
    2823    15714239 :         assert(i > 0);
    2824             : 
    2825    15714239 :         if (lock)
    2826    15694086 :                 MT_lock_set(&GDKswapLock(i));
    2827    15705620 :         mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING | BBPSWAPPED);
    2828             : 
    2829             :         /* cache it! */
    2830    15705620 :         BBP_cache(i) = bn;
    2831             : 
    2832    15705620 :         BBP_status_set(i, mode);
    2833             : 
    2834    15705620 :         if (lock)
    2835    15690041 :                 MT_lock_unset(&GDKswapLock(i));
    2836    15711459 :         return GDK_SUCCEED;
    2837             : }
    2838             : 
    2839             : /*
    2840             :  * BBPuncacheit changes the BBP status to swapped out.  Currently only
    2841             :  * used in BBPfree (bat swapped out) and BBPclear (bat destroyed
    2842             :  * forever).
    2843             :  */
    2844             : 
    2845             : static void
    2846    15734249 : BBPuncacheit(bat i, bool unloaddesc)
    2847             : {
    2848    15734249 :         if (i < 0)
    2849             :                 i = -i;
    2850    15734249 :         if (BBPcheck(i)) {
    2851    15733276 :                 BAT *b = BBP_desc(i);
    2852             : 
    2853    15733276 :                 assert(unloaddesc || BBP_refs(i) == 0);
    2854             : 
    2855    15733276 :                 if (b) {
    2856    15733276 :                         if (BBP_cache(i)) {
    2857    15711448 :                                 TRC_DEBUG(BAT_, "uncache %d (%s)\n", (int) i, BBP_logical(i));
    2858             : 
    2859             :                                 /* clearing bits can be done without the lock */
    2860    15711448 :                                 BBP_status_off(i, BBPLOADED);
    2861             : 
    2862    15711448 :                                 BBP_cache(i) = NULL;
    2863             :                         }
    2864    15733276 :                         if (unloaddesc) {
    2865    15723298 :                                 BBP_desc(i) = NULL;
    2866    15723298 :                                 BATdestroy(b);
    2867             :                         }
    2868             :                 }
    2869             :         }
    2870    15736449 : }
    2871             : 
    2872             : /*
    2873             :  * @- BBPclear
    2874             :  * BBPclear removes a BAT from the BBP directory forever.
    2875             :  */
    2876             : static inline void
    2877       68898 : BBPhandover(struct freebats *t, uint32_t n)
    2878             : {
    2879       68898 :         bat *p, bid;
    2880             :         /* take one bat from our private free list and hand it over to
    2881             :          * the global free list */
    2882       68898 :         if (n >= t->nfreebats) {
    2883       45387 :                 bid = t->freebats;
    2884       45387 :                 t->freebats = 0;
    2885       45387 :                 BBP_nfree += t->nfreebats;
    2886       45387 :                 t->nfreebats = 0;
    2887             :         } else {
    2888       23511 :                 p = &t->freebats;
    2889      258621 :                 for (uint32_t i = n; i < t->nfreebats; i++)
    2890      235110 :                         p = &BBP_next(*p);
    2891       23511 :                 bid = *p;
    2892       23511 :                 *p = 0;
    2893       23511 :                 BBP_nfree += n;
    2894       23511 :                 t->nfreebats -= n;
    2895             :         }
    2896             :         p = &BBP_free;
    2897     1549523 :         while (bid != 0) {
    2898     7851921 :                 while (*p && *p < bid)
    2899     6371296 :                         p = &BBP_next(*p);
    2900     1480625 :                 bat i = BBP_next(bid);
    2901     1480625 :                 BBP_next(bid) = *p;
    2902     1480625 :                 *p = bid;
    2903     1480625 :                 bid = i;
    2904             :         }
    2905       68898 : }
    2906             : 
    2907             : #ifndef NDEBUG
    2908             : extern void printlist(bat bid) __attribute__((__cold__));
    2909             : /* print a bat free list, pass start of free list as argument
    2910             :  * to be used from the debugger */
    2911             : void
    2912           0 : printlist(bat bid)
    2913             : {
    2914           0 :         int n = 0;
    2915           0 :         while (bid) {
    2916           0 :                 printf("%d ", bid);
    2917           0 :                 bid = BBP_next(bid);
    2918           0 :                 n++;
    2919             :         }
    2920           0 :         printf("(%d)\n", n);
    2921           0 : }
    2922             : #endif
    2923             : 
    2924             : static inline void
    2925    15284740 : bbpclear(bat i, bool lock)
    2926             : {
    2927    15284740 :         struct freebats *t = MT_thread_getfreebats();
    2928             : 
    2929    15282929 :         TRC_DEBUG(BAT_, "clear %d (%s)\n", (int) i, BBP_logical(i));
    2930    15282929 :         BBPuncacheit(i, true);
    2931    15292605 :         TRC_DEBUG(BAT_, "set to unloading %d\n", i);
    2932    15292605 :         if (lock) {
    2933    15287945 :                 MT_lock_set(&GDKswapLock(i));
    2934             :         }
    2935             : 
    2936    15288387 :         BBP_status_set(i, BBPUNLOADING);
    2937    15288387 :         BBP_refs(i) = 0;
    2938    15288387 :         BBP_lrefs(i) = 0;
    2939    15288387 :         if (lock)
    2940    15287466 :                 MT_lock_unset(&GDKswapLock(i));
    2941    15287939 :         if (!BBPtmpcheck(BBP_logical(i))) {
    2942        2682 :                 MT_lock_set(&BBPnameLock);
    2943        2682 :                 BBP_delete(i);
    2944        2682 :                 MT_lock_unset(&BBPnameLock);
    2945             :         }
    2946    15287939 :         if (BBP_logical(i) != BBP_bak(i))
    2947        2682 :                 GDKfree(BBP_logical(i));
    2948    15287775 :         BBP_status_set(i, 0);
    2949    15287775 :         BBP_logical(i) = NULL;
    2950    15287775 :         bat *p;
    2951    47597817 :         for (p = &t->freebats; *p && *p < i; p = &BBP_next(*p))
    2952             :                 ;
    2953    15287775 :         BBP_next(i) = *p;
    2954    15287775 :         *p = i;
    2955    15287775 :         t->nfreebats++;
    2956    15287775 :         BBP_pid(i) = ~(MT_Id)0; /* not zero, not a valid thread id */
    2957    15287775 :         if (t->nfreebats > BBP_FREE_HIWATER) {
    2958       23511 :                 if (lock)
    2959       23511 :                         MT_lock_set(&GDKcacheLock);
    2960       23511 :                 BBPhandover(t, t->nfreebats - BBP_FREE_LOWATER);
    2961       23511 :                 if (lock)
    2962       23511 :                         MT_lock_unset(&GDKcacheLock);
    2963             :         }
    2964    15287775 : }
    2965             : 
    2966             : void
    2967    15286837 : BBPclear(bat i)
    2968             : {
    2969    15286837 :         if (BBPcheck(i)) {
    2970    15284812 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    2971    15284812 :                 bbpclear(i, lock);
    2972             :         }
    2973    15286173 : }
    2974             : 
    2975             : void
    2976       49789 : BBPrelinquishbats(void)
    2977             : {
    2978       49789 :         struct freebats *t = MT_thread_getfreebats();
    2979       49789 :         if (t == NULL || t->nfreebats == 0)
    2980             :                 return;
    2981       45387 :         MT_lock_set(&GDKcacheLock);
    2982       90774 :         while (t->nfreebats > 0) {
    2983       45387 :                 BBPhandover(t, t->nfreebats);
    2984             :         }
    2985       45387 :         MT_lock_unset(&GDKcacheLock);
    2986             : }
    2987             : 
    2988             : /*
    2989             :  * @- BBP rename
    2990             :  *
    2991             :  * Each BAT has a logical name that is globally unique.
    2992             :  * The batId is the same as the logical BAT name.
    2993             :  *
    2994             :  * The default logical name of a BAT is tmp_X, where X is the
    2995             :  * batCacheid.  Apart from being globally unique, new logical bat
    2996             :  * names cannot be of the form tmp_X, unless X is the batCacheid.
    2997             :  *
    2998             :  * Physical names consist of a directory name followed by a logical
    2999             :  * name suffix.  The directory name is derived from the batCacheid,
    3000             :  * and is currently organized in a hierarchy that puts max 64 bats in
    3001             :  * each directory (see BBPgetsubdir).
    3002             :  *
    3003             :  * Concerning the physical suffix: it is almost always bat_X. This
    3004             :  * saves us a whole lot of trouble, as bat_X is always unique and no
    3005             :  * conflicts can occur.  Other suffixes are only supported in order
    3006             :  * just for backward compatibility with old repositories (you won't
    3007             :  * see them anymore in new repositories).
    3008             :  */
    3009             : int
    3010       38475 : BBPrename(BAT *b, const char *nme)
    3011             : {
    3012       38475 :         if (b == NULL)
    3013             :                 return 0;
    3014             : 
    3015       38475 :         char dirname[24];
    3016       38475 :         bat bid = b->batCacheid;
    3017       38475 :         bat tmpid = 0, i;
    3018             : 
    3019       38475 :         if (nme == NULL) {
    3020       12278 :                 if (BBP_bak(bid)[0] == 0 &&
    3021           0 :                     snprintf(BBP_bak(bid), sizeof(BBP_bak(bid)), "tmp_%o", (unsigned) bid) >= (int) sizeof(BBP_bak(bid))) {
    3022             :                         /* cannot happen */
    3023           0 :                         TRC_CRITICAL(GDK, "BBP default filename too long\n");
    3024           0 :                         return BBPRENAME_LONG;
    3025             :                 }
    3026       12278 :                 nme = BBP_bak(bid);
    3027             :         }
    3028             : 
    3029             :         /* If name stays same, do nothing */
    3030       38475 :         if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0)
    3031             :                 return 0;
    3032             : 
    3033       38475 :         BBPgetsubdir(dirname, bid);
    3034             : 
    3035       38475 :         if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) {
    3036           0 :                 GDKerror("illegal temporary name: '%s'\n", nme);
    3037           0 :                 return BBPRENAME_ILLEGAL;
    3038             :         }
    3039       38475 :         if (strlen(dirname) + strLen(nme) + 1 >= IDLENGTH) {
    3040           0 :                 GDKerror("illegal temporary name: '%s'\n", nme);
    3041           0 :                 return BBPRENAME_LONG;
    3042             :         }
    3043             : 
    3044       38475 :         MT_lock_set(&BBPnameLock);
    3045       38475 :         i = BBP_find(nme, false);
    3046       38475 :         if (i != 0) {
    3047           1 :                 MT_lock_unset(&BBPnameLock);
    3048           1 :                 GDKerror("name is in use: '%s'.\n", nme);
    3049           1 :                 return BBPRENAME_ALREADY;
    3050             :         }
    3051             : 
    3052       38474 :         char *nnme;
    3053       38474 :         if (nme == BBP_bak(bid) || strcmp(nme, BBP_bak(bid)) == 0) {
    3054       38474 :                 nnme = BBP_bak(bid);
    3055             :         } else {
    3056       26196 :                 nnme = GDKstrdup(nme);
    3057       26196 :                 if (nnme == NULL) {
    3058           0 :                         MT_lock_unset(&BBPnameLock);
    3059           0 :                         return BBPRENAME_MEMORY;
    3060             :                 }
    3061             :         }
    3062             : 
    3063             :         /* carry through the name change */
    3064       38474 :         if (BBP_logical(bid) && !BBPtmpcheck(BBP_logical(bid))) {
    3065       12278 :                 BBP_delete(bid);
    3066             :         }
    3067       38474 :         if (BBP_logical(bid) != BBP_bak(bid))
    3068       12278 :                 GDKfree(BBP_logical(bid));
    3069       38474 :         BBP_logical(bid) = nnme;
    3070       38474 :         if (tmpid == 0) {
    3071       26196 :                 BBP_insert(bid);
    3072             :         }
    3073       38474 :         MT_lock_set(&b->theaplock);
    3074       38474 :         bool transient = b->batTransient;
    3075       38474 :         MT_lock_unset(&b->theaplock);
    3076       38474 :         if (!transient) {
    3077        8978 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    3078             : 
    3079        8978 :                 if (lock)
    3080        8978 :                         MT_lock_set(&GDKswapLock(i));
    3081        8978 :                 BBP_status_on(bid, BBPRENAMED);
    3082        8978 :                 if (lock)
    3083        8978 :                         MT_lock_unset(&GDKswapLock(i));
    3084             :         }
    3085       38474 :         MT_lock_unset(&BBPnameLock);
    3086       38474 :         return 0;
    3087             : }
    3088             : 
    3089             : /*
    3090             :  * @+ BBP swapping Policy
    3091             :  * The BAT can be moved back to disk using the routine BBPfree.  It
    3092             :  * frees the storage for other BATs. After this call BAT* references
    3093             :  * maintained for the BAT are wrong.  We should keep track of dirty
    3094             :  * unloaded BATs. They may have to be committed later on, which may
    3095             :  * include reading them in again.
    3096             :  *
    3097             :  * BBPswappable: may this bat be unloaded?  Only real bats without
    3098             :  * memory references can be unloaded.
    3099             :  */
    3100             : static inline void
    3101     4618131 : BBPspin(bat i, const char *s, unsigned event)
    3102             : {
    3103     4618131 :         if (BBPcheck(i) && (BBP_status(i) & event)) {
    3104             :                 lng spin = LL_CONSTANT(0);
    3105             : 
    3106       56942 :                 do {
    3107       56942 :                         MT_sleep_ms(KITTENNAP);
    3108       56941 :                         spin++;
    3109       56941 :                 } while (BBP_status(i) & event);
    3110         330 :                 TRC_DEBUG(BAT_, "%d,%s,%u: " LLFMT " loops\n", (int) i, s, event, spin);
    3111             :         }
    3112     4617948 : }
    3113             : 
    3114             : void
    3115     6765129 : BBPcold(bat i)
    3116             : {
    3117     6765129 :         if (!is_bat_nil(i)) {
    3118     6765459 :                 BAT *b = BBP_desc(i);
    3119     6765459 :                 if (b == NULL || b->batRole == PERSISTENT)
    3120         695 :                         BBP_status_off(i, BBPHOT);
    3121             :         }
    3122     6765129 : }
    3123             : 
    3124             : /* This function can fail if the input parameter (i) is incorrect
    3125             :  * (unlikely). */
    3126             : static inline int
    3127   122277480 : incref(bat i, bool logical, bool lock)
    3128             : {
    3129   122277480 :         int refs;
    3130   122277480 :         BAT *b;
    3131             : 
    3132   122277480 :         if (!BBPcheck(i))
    3133             :                 return 0;
    3134             : 
    3135   122202985 :         if (lock) {
    3136    32150022 :                 for (;;) {
    3137    32150022 :                         MT_lock_set(&GDKswapLock(i));
    3138    32249175 :                         if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
    3139             :                                 break;
    3140             :                         /* the BATs is "unstable", try again */
    3141           0 :                         MT_lock_unset(&GDKswapLock(i));
    3142           0 :                         BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
    3143             :                 }
    3144             :         }
    3145             :         /* we have the lock */
    3146             : 
    3147   122302138 :         b = BBP_desc(i);
    3148   122302138 :         if (b == NULL) {
    3149             :                 /* should not have happened */
    3150           0 :                 if (lock)
    3151           0 :                         MT_lock_unset(&GDKswapLock(i));
    3152           0 :                 return 0;
    3153             :         }
    3154             : 
    3155   122302138 :         assert(BBP_refs(i) + BBP_lrefs(i) ||
    3156             :                BBP_status(i) & (BBPDELETED | BBPSWAPPED));
    3157   122302138 :         if (logical) {
    3158    32153869 :                 refs = ++BBP_lrefs(i);
    3159    32153869 :                 BBP_pid(i) = 0;
    3160             :         } else {
    3161    90148269 :                 refs = ++BBP_refs(i);
    3162    90148269 :                 BBP_status_on(i, BBPHOT);
    3163             :         }
    3164   122302138 :         if (lock)
    3165    32152930 :                 MT_lock_unset(&GDKswapLock(i));
    3166             : 
    3167             :         return refs;
    3168             : }
    3169             : 
    3170             : /* increment the physical reference counter for the given bat
    3171             :  * returns the new reference count
    3172             :  * also increments the physical reference count of the parent bat(s) (if
    3173             :  * any) */
    3174             : int
    3175       48431 : BBPfix(bat i)
    3176             : {
    3177       48431 :         return BATdescriptor(i) ? 1 : 0;
    3178             : }
    3179             : 
    3180             : /* increment the logical reference count for the given bat
    3181             :  * returns the new reference count */
    3182             : int
    3183    27184042 : BBPretain(bat i)
    3184             : {
    3185    27184042 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    3186             : 
    3187    27184042 :         return incref(i, true, lock);
    3188             : }
    3189             : 
    3190             : static inline int
    3191   137200496 : decref(bat i, bool logical, bool lock, const char *func)
    3192             : {
    3193   137200496 :         int refs = 0, lrefs;
    3194   137200496 :         bool swap = false;
    3195   137200496 :         bool locked = false;
    3196   137200496 :         int farmid = 0;
    3197   137200496 :         BAT *b;
    3198             : 
    3199   137200496 :         if (is_bat_nil(i))
    3200             :                 return -1;
    3201   137200471 :         assert(i > 0);
    3202   137200471 :         if (BBPcheck(i) == 0)
    3203             :                 return -1;
    3204             : 
    3205   137100121 :         if (lock)
    3206   137100032 :                 MT_lock_set(&GDKswapLock(i));
    3207             : 
    3208   136955397 :         while (BBP_status(i) & BBPUNLOADING) {
    3209           0 :                 if (lock)
    3210           0 :                         MT_lock_unset(&GDKswapLock(i));
    3211           0 :                 BBPspin(i, func, BBPUNLOADING);
    3212           0 :                 if (lock)
    3213   136955397 :                         MT_lock_set(&GDKswapLock(i));
    3214             :         }
    3215             : 
    3216   137081711 :         b = BBP_cache(i);
    3217             : 
    3218             :         /* decrement references by one */
    3219   137081711 :         if (logical) {
    3220    31954474 :                 if (BBP_lrefs(i) == 0) {
    3221           0 :                         GDKerror("%s: %s does not have logical references.\n", func, BBP_logical(i));
    3222           0 :                         assert(0);
    3223             :                 } else {
    3224    31954474 :                         refs = --BBP_lrefs(i);
    3225             :                 }
    3226             :                 /* cannot release last logical ref if still shared */
    3227             :                 // but we could still have a bat iterator on it
    3228             :                 //assert(!BATshared(BBP_desc(i)) || refs > 0);
    3229             :         } else {
    3230   105127237 :                 if (BBP_refs(i) == 0) {
    3231           0 :                         GDKerror("%s: %s does not have pointer fixes.\n", func, BBP_logical(i));
    3232           0 :                         assert(0);
    3233             :                 } else {
    3234   105127237 :                         refs = --BBP_refs(i);
    3235   105127237 :                         if (b && refs == 0) {
    3236    87493865 :                                 MT_lock_set(&b->theaplock);
    3237    87584816 :                                 locked = true;
    3238    87584816 :                                 if (VIEWtparent(b) || VIEWvtparent(b))
    3239    11000078 :                                         BBP_status_on(i, BBPHOT);
    3240             :                         }
    3241             :                 }
    3242             :         }
    3243   137172662 :         if (b) {
    3244   136972190 :                 if (!locked) {
    3245    49616807 :                         MT_lock_set(&b->theaplock);
    3246    49658181 :                         locked = true;
    3247             :                 }
    3248             : #if 0
    3249             :                 if (b->batCount > b->batInserted && !isVIEW(b)) {
    3250             :                         /* if batCount is larger than batInserted and
    3251             :                          * the dirty bits are off, it may be that a
    3252             :                          * (sub)commit happened in parallel to an
    3253             :                          * update; we must undo the turning off of the
    3254             :                          * dirty bits */
    3255             :                         if (b->theap && b->theap->parentid == i)
    3256             :                                 b->theap->dirty = true;
    3257             :                         if (b->tvheap && b->tvheap->parentid == i)
    3258             :                                 b->tvheap->dirty = true;
    3259             :                 }
    3260             : #endif
    3261   136985315 :                 if (b->theap)
    3262   136985315 :                         farmid = b->theap->farmid;
    3263             :         }
    3264             : 
    3265             :         /* we destroy transients asap and unload persistent bats only
    3266             :          * if they have been made cold or are not dirty */
    3267   137185787 :         unsigned chkflag = BBPSYNCING;
    3268   137185787 :         bool swapdirty = false;
    3269   137185787 :         if (b) {
    3270   136990332 :                 size_t cursize;
    3271   136990332 :                 if ((cursize = GDKvm_cursize()) < (size_t) (GDK_vm_maxsize * 0.75)) {
    3272   136982571 :                         if (!locked) {
    3273           0 :                                 MT_lock_set(&b->theaplock);
    3274           0 :                                 locked = true;
    3275             :                         }
    3276   136982571 :                         if (((b->theap ? b->theap->size : 0) + (b->tvheap ? b->tvheap->size : 0)) < (GDK_vm_maxsize - cursize) / 32)
    3277   136981045 :                                 chkflag |= BBPHOT;
    3278           0 :                 } else if (cursize > (size_t) (GDK_vm_maxsize * 0.85))
    3279   137178026 :                         swapdirty = true;
    3280             :         }
    3281             :         /* only consider unloading if refs is 0; if, in addition, lrefs
    3282             :          * is 0, we can definitely unload, else only if some more
    3283             :          * conditions are met */
    3284   240018881 :         if (BBP_refs(i) == 0 &&
    3285   118122499 :             (BBP_lrefs(i) == 0 ||
    3286   102807894 :              (b != NULL && b->theap != NULL
    3287   102810963 :               ? ((swapdirty || !BATdirty(b)) &&
    3288    10119324 :                  !(BBP_status(i) & chkflag) &&
    3289       15527 :                  (BBP_status(i) & BBPPERSISTENT) &&
    3290             :                  /* cannot unload in-memory data */
    3291        7536 :                  !GDKinmemory(farmid) &&
    3292             :                  /* do not unload views or parents of views */
    3293        7536 :                  !BATshared(b) &&
    3294   102815247 :                  b->batCacheid == b->theap->parentid &&
    3295        7353 :                  (b->tvheap == NULL || b->batCacheid == b->tvheap->parentid))
    3296       32961 :               : (BBP_status(i) & BBPTMP)))) {
    3297             :                 /* bat will be unloaded now. set the UNLOADING bit
    3298             :                  * while locked so no other thread thinks it's
    3299             :                  * available anymore */
    3300    15293945 :                 assert((BBP_status(i) & BBPUNLOADING) == 0);
    3301    15293945 :                 TRC_DEBUG(BAT_, "%s set to unloading BAT %d (status %u, lrefs %d)\n", func, i, BBP_status(i), BBP_lrefs(i));
    3302    15293945 :                 BBP_status_on(i, BBPUNLOADING);
    3303    15293945 :                 swap = true;
    3304             :         } /* else: bat cannot be swapped out */
    3305   137178026 :         lrefs = BBP_lrefs(i);
    3306   137178026 :         if (locked)
    3307   137014323 :                 MT_lock_unset(&b->theaplock);
    3308             : 
    3309             :         /* unlock before re-locking in unload; as saving a dirty
    3310             :          * persistent bat may take a long time */
    3311   137207816 :         if (lock)
    3312   137107553 :                 MT_lock_unset(&GDKswapLock(i));
    3313             : 
    3314   137240975 :         if (swap) {
    3315    15296375 :                 if (b != NULL) {
    3316    15285533 :                         if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
    3317             :                                 /* free memory (if loaded) and delete from
    3318             :                                  * disk (if transient but saved) */
    3319    15278366 :                                 BBPdestroy(b);
    3320             :                         } else {
    3321        7167 :                                 TRC_DEBUG(BAT_, "%s unload and free bat %d\n", func, i);
    3322             :                                 /* free memory of transient */
    3323        7167 :                                 if (BBPfree(b) != GDK_SUCCEED)
    3324             :                                         return -1;      /* indicate failure */
    3325             :                         }
    3326       10842 :                 } else if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
    3327        5894 :                         if ((b = BBP_desc(i)) != NULL)
    3328        5894 :                                 BATdelete(b);
    3329        5894 :                         BBPclear(i);
    3330             :                 } else {
    3331        4948 :                         BBP_status_off(i, BBPUNLOADING);
    3332             :                 }
    3333             :         }
    3334             :         return refs;
    3335             : }
    3336             : 
    3337             : int
    3338   101373117 : BBPunfix(bat i)
    3339             : {
    3340   101373117 :         return decref(i, false, true, __func__);
    3341             : }
    3342             : 
    3343             : int
    3344    31969120 : BBPrelease(bat i)
    3345             : {
    3346    31969120 :         return decref(i, true, true, __func__);
    3347             : }
    3348             : 
    3349             : void
    3350     4980336 : BBPkeepref(BAT *b)
    3351             : {
    3352     4980336 :         assert(b != NULL);
    3353     4980336 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    3354     4980336 :         int i = b->batCacheid;
    3355     4980336 :         int refs = incref(i, true, lock);
    3356     4982927 :         if (refs == 1) {
    3357     4738835 :                 MT_lock_set(&b->theaplock);
    3358     4736398 :                 BATsettrivprop(b);
    3359     4734937 :                 MT_lock_unset(&b->theaplock);
    3360             :         }
    3361     4979614 :         if (ATOMIC_GET(&GDKdebug) & CHECKMASK)
    3362     4935676 :                 BATassertProps(b);
    3363     4982369 :         if (BATsetaccess(b, BAT_READ) == NULL)
    3364             :                 return;         /* already decreffed */
    3365             : 
    3366     3956441 :         refs = decref(i, false, lock, __func__);
    3367     3955721 :         (void) refs;
    3368     3955721 :         assert(refs >= 0);
    3369             : }
    3370             : 
    3371             : BAT *
    3372    90286711 : BATdescriptor(bat i)
    3373             : {
    3374    90286711 :         BAT *b = NULL;
    3375             : 
    3376    90286711 :         if (BBPcheck(i)) {
    3377    90222545 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    3378             :                 if (lock) {
    3379    90222545 :                         for (;;) {
    3380    90222545 :                                 MT_lock_set(&GDKswapLock(i));
    3381    90177696 :                                 if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
    3382             :                                         break;
    3383             :                                 /* the BATs is "unstable", try again */
    3384           0 :                                 MT_lock_unset(&GDKswapLock(i));
    3385           0 :                                 BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
    3386             :                         }
    3387             :                 }
    3388    90177696 :                 if (incref(i, false, false) > 0) {
    3389    90312874 :                         b = BBP_cache(i);
    3390    90312874 :                         if (b == NULL) {
    3391       20140 :                                 b = getBBPdescriptor(i);
    3392       20139 :                                 if (b == NULL) {
    3393             :                                         /* if loading failed, we need to
    3394             :                                          * compensate for the incref */
    3395           0 :                                         decref(i, false, false, __func__);
    3396             :                                 }
    3397             :                         }
    3398             :                 }
    3399    90312873 :                 if (lock)
    3400    90305670 :                         MT_lock_unset(&GDKswapLock(i));
    3401             :         }
    3402    90224515 :         return b;
    3403             : }
    3404             : 
    3405             : /*
    3406             :  * BBPdescriptor checks whether BAT needs loading and does so if
    3407             :  * necessary. You must have at least one fix on the BAT before calling
    3408             :  * this.
    3409             :  */
    3410             : static BAT *
    3411       20139 : getBBPdescriptor(bat i)
    3412             : {
    3413       20139 :         bool load = false;
    3414       20139 :         BAT *b = NULL;
    3415             : 
    3416       20139 :         assert(i > 0);
    3417       20139 :         if (!BBPcheck(i)) {
    3418           0 :                 GDKerror("BBPcheck failed for bat id %d\n", i);
    3419           0 :                 return NULL;
    3420             :         }
    3421       20139 :         assert(BBP_refs(i));
    3422       20139 :         if ((b = BBP_cache(i)) == NULL || BBP_status(i) & BBPWAITING) {
    3423             : 
    3424       20140 :                 while (BBP_status(i) & BBPWAITING) {        /* wait for bat to be loaded by other thread */
    3425           1 :                         MT_lock_unset(&GDKswapLock(i));
    3426           1 :                         BBPspin(i, __func__, BBPWAITING);
    3427       20140 :                         MT_lock_set(&GDKswapLock(i));
    3428             :                 }
    3429       20139 :                 if (BBPvalid(i)) {
    3430       20139 :                         b = BBP_cache(i);
    3431       20139 :                         if (b == NULL) {
    3432       20139 :                                 load = true;
    3433       20139 :                                 TRC_DEBUG(BAT_, "set to loading BAT %d\n", i);
    3434       20139 :                                 BBP_status_on(i, BBPLOADING);
    3435             :                         }
    3436             :                 }
    3437             :         }
    3438       20139 :         if (load) {
    3439       20139 :                 TRC_DEBUG(IO_, "load %s\n", BBP_logical(i));
    3440             : 
    3441       20139 :                 b = BATload_intern(i, false);
    3442             : 
    3443             :                 /* clearing bits can be done without the lock */
    3444       20139 :                 BBP_status_off(i, BBPLOADING);
    3445       20139 :                 CHECKDEBUG if (b != NULL)
    3446       16713 :                         BATassertProps(b);
    3447             :         }
    3448             :         return b;
    3449             : }
    3450             : 
    3451             : /*
    3452             :  * In BBPsave executes unlocked; it just marks the BBP_status of the
    3453             :  * BAT to BBPsaving, so others that want to save or unload this BAT
    3454             :  * must spin lock on the BBP_status field.
    3455             :  */
    3456             : gdk_return
    3457       12353 : BBPsave(BAT *b)
    3458             : {
    3459       12353 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    3460       12353 :         bat bid = b->batCacheid;
    3461       12353 :         gdk_return ret = GDK_SUCCEED;
    3462             : 
    3463       12353 :         MT_lock_set(&b->theaplock);
    3464       12353 :         if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirty(b)) {
    3465             :                 /* do nothing */
    3466       10850 :                 MT_lock_unset(&b->theaplock);
    3467       10850 :                 MT_rwlock_rdlock(&b->thashlock);
    3468       10850 :                 if (b->thash && b->thash != (Hash *) 1 &&
    3469         185 :                     (b->thash->heaplink.dirty || b->thash->heapbckt.dirty))
    3470         112 :                         BAThashsave(b, (BBP_status(bid) & BBPPERSISTENT) != 0);
    3471       10850 :                 MT_rwlock_rdunlock(&b->thashlock);
    3472       10850 :                 return GDK_SUCCEED;
    3473             :         }
    3474        1503 :         MT_lock_unset(&b->theaplock);
    3475        1503 :         if (lock)
    3476        1503 :                 MT_lock_set(&GDKswapLock(bid));
    3477             : 
    3478        1503 :         if (BBP_status(bid) & BBPSAVING) {
    3479             :                 /* wait until save in other thread completes */
    3480           0 :                 if (lock)
    3481           0 :                         MT_lock_unset(&GDKswapLock(bid));
    3482           0 :                 BBPspin(bid, __func__, BBPSAVING);
    3483             :         } else {
    3484             :                 /* save it */
    3485        1503 :                 unsigned flags = BBPSAVING;
    3486             : 
    3487        1503 :                 MT_lock_set(&b->theaplock);
    3488        1503 :                 if (DELTAdirty(b)) {
    3489         441 :                         flags |= BBPSWAPPED;
    3490             :                 }
    3491        1503 :                 if (b->batTransient) {
    3492        1493 :                         flags |= BBPTMP;
    3493             :                 }
    3494        1503 :                 MT_lock_unset(&b->theaplock);
    3495        1503 :                 BBP_status_on(bid, flags);
    3496        1503 :                 if (lock)
    3497        1503 :                         MT_lock_unset(&GDKswapLock(bid));
    3498             : 
    3499        1503 :                 TRC_DEBUG(IO_, "save " ALGOBATFMT "\n", ALGOBATPAR(b));
    3500             : 
    3501             :                 /* do the time-consuming work unlocked */
    3502        1503 :                 if (BBP_status(bid) & BBPEXISTING && b->batInserted > 0)
    3503           2 :                         ret = BBPbackup(b, false);
    3504           2 :                 if (ret == GDK_SUCCEED) {
    3505        1503 :                         ret = BATsave(b);
    3506             :                 }
    3507             :                 /* clearing bits can be done without the lock */
    3508        1503 :                 BBP_status_off(bid, BBPSAVING);
    3509             :         }
    3510             :         return ret;
    3511             : }
    3512             : 
    3513             : /*
    3514             :  * TODO merge BBPfree with BATfree? Its function is to prepare a BAT
    3515             :  * for being unloaded (or even destroyed, if the BAT is not
    3516             :  * persistent).
    3517             :  */
    3518             : static void
    3519    15277867 : BBPdestroy(BAT *b)
    3520             : {
    3521    15277867 :         bat tp = VIEWtparent(b);
    3522    15277867 :         bat vtp = VIEWvtparent(b);
    3523             : 
    3524    15277867 :         if (tp == 0) {
    3525             :                 /* bats that get destroyed must unfix their atoms */
    3526     8472868 :                 gdk_return (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix;
    3527     8472868 :                 if (tunfix) {
    3528           0 :                         BUN p, q;
    3529           0 :                         BATiter bi = bat_iterator_nolock(b);
    3530             : 
    3531           0 :                         BATloop(b, p, q) {
    3532             :                                 /* ignore errors */
    3533           0 :                                 (void) (*tunfix)(BUNtail(bi, p));
    3534             :                         }
    3535             :                 }
    3536             :         }
    3537    15277867 :         if (b->theap) {
    3538    15278356 :                 HEAPdecref(b->theap, tp == 0);
    3539    15280946 :                 b->theap = NULL;
    3540    15280946 :                 if (tp != 0)
    3541     6807827 :                         BBPrelease(tp);
    3542             :         }
    3543    15280175 :         if (b->tvheap) {
    3544     2236250 :                 HEAPdecref(b->tvheap, vtp == 0);
    3545     2236277 :                 b->tvheap = NULL;
    3546     2236277 :                 if (vtp != 0)
    3547     1544163 :                         BBPrelease(vtp);
    3548             :         }
    3549    15280179 :         if (b->oldtail) {
    3550           2 :                 ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
    3551           2 :                 HEAPdecref(b->oldtail, true);
    3552           2 :                 b->oldtail = NULL;
    3553             :         }
    3554    15280179 :         BATdelete(b);
    3555             : 
    3556    15279535 :         BBPclear(b->batCacheid);     /* if destroyed; de-register from BBP */
    3557    15280085 : }
    3558             : 
    3559             : static gdk_return
    3560       12353 : BBPfree(BAT *b)
    3561             : {
    3562       12353 :         bat bid = b->batCacheid;
    3563       12353 :         gdk_return ret;
    3564             : 
    3565       12353 :         assert(bid > 0);
    3566       12353 :         assert(BBPswappable(b));
    3567       12353 :         assert(!isVIEW(b));
    3568             : 
    3569       12353 :         BBP_unload_inc();
    3570             :         /* write dirty BATs before unloading */
    3571       12353 :         ret = BBPsave(b);
    3572       12353 :         if (ret == GDK_SUCCEED) {
    3573       12353 :                 if (BBP_cache(bid))
    3574       12353 :                         BATfree(b);     /* free memory */
    3575       12353 :                 BBPuncacheit(bid, false);
    3576             :         }
    3577       12353 :         TRC_DEBUG(BAT_, "turn off unloading %d\n", bid);
    3578       12353 :         BBP_status_off(bid, BBPUNLOADING);
    3579       12353 :         BBP_unload_dec();
    3580       12353 :         return ret;
    3581             : }
    3582             : 
    3583             : /*
    3584             :  * BBPquickdesc loads a BAT descriptor without loading the entire BAT,
    3585             :  * of which the result be used only for a *limited* number of
    3586             :  * purposes. Specifically, during the global sync/commit, we do not
    3587             :  * want to load any BATs that are not already loaded, both because
    3588             :  * this costs performance, and because getting into memory shortage
    3589             :  * during a commit is extremely dangerous. Loading a BAT tends not to
    3590             :  * be required, since the commit actions mostly involve moving some
    3591             :  * pointers in the BAT descriptor.
    3592             :  */
    3593             : BAT *
    3594     1183955 : BBPquickdesc(bat bid)
    3595             : {
    3596     1183955 :         BAT *b;
    3597             : 
    3598     1183955 :         if (!BBPcheck(bid)) {
    3599           0 :                 if (!is_bat_nil(bid)) {
    3600           0 :                         GDKerror("called with invalid batid.\n");
    3601           0 :                         assert(0);
    3602             :                 }
    3603             :                 return NULL;
    3604             :         }
    3605     1183733 :         BBPspin(bid, __func__, BBPWAITING);
    3606     1183611 :         b = BBP_desc(bid);
    3607     1183611 :         if (b && b->ttype < 0) {
    3608         241 :                 const char *aname = ATOMunknown_name(b->ttype);
    3609         241 :                 int tt = ATOMindex(aname);
    3610         241 :                 if (tt < 0) {
    3611           0 :                         GDKwarning("atom '%s' unknown in bat '%s'.\n",
    3612             :                                    aname, BBP_physical(bid));
    3613             :                 } else {
    3614         241 :                         b->ttype = tt;
    3615             :                 }
    3616             :         }
    3617             :         return b;
    3618             : }
    3619             : 
    3620             : /*
    3621             :  * @+ Global Commit
    3622             :  */
    3623             : static BAT *
    3624     3442461 : dirty_bat(bat *i, bool subcommit)
    3625             : {
    3626     3442461 :         if (BBPvalid(*i)) {
    3627     3434459 :                 BAT *b;
    3628     3434459 :                 BBPspin(*i, __func__, BBPSAVING);
    3629     3434459 :                 b = BBP_cache(*i);
    3630     3434459 :                 if (b != NULL) {
    3631     3255343 :                         MT_lock_set(&b->theaplock);
    3632     3493649 :                         if ((BBP_status(*i) & BBPNEW) &&
    3633      238306 :                             BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */
    3634           0 :                                 *i = -*i;       /* error */
    3635     3255343 :                         else if ((BBP_status(*i) & BBPPERSISTENT) &&
    3636           0 :                                  (subcommit || BATdirty(b))) {
    3637     3076936 :                                 MT_lock_unset(&b->theaplock);
    3638     3076936 :                                 return b;       /* the bat is loaded, persistent and dirty */
    3639             :                         }
    3640      178407 :                         MT_lock_unset(&b->theaplock);
    3641      179116 :                 } else if (subcommit)
    3642      174064 :                         return BBP_desc(*i);
    3643             :         }
    3644             :         return NULL;
    3645             : }
    3646             : 
    3647             : /*
    3648             :  * @- backup-bat
    3649             :  * Backup-bat moves all files of a BAT to a backup directory. Only
    3650             :  * after this succeeds, it may be saved. If some failure occurs
    3651             :  * halfway saving, we can thus always roll back.
    3652             :  */
    3653             : static gdk_return
    3654      204720 : file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext)
    3655             : {
    3656      204720 :         if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext, false) == GDK_SUCCEED) {
    3657             :                 return GDK_SUCCEED;
    3658             :         } else {
    3659           0 :                 char *path;
    3660           0 :                 struct stat st;
    3661             : 
    3662           0 :                 path = GDKfilepath(farmid, srcdir, name, ext);
    3663           0 :                 if (path == NULL)
    3664           0 :                         return GDK_FAIL;
    3665           0 :                 if (MT_stat(path, &st)) {
    3666             :                         /* source file does not exist; the best
    3667             :                          * recovery is to give an error but continue
    3668             :                          * by considering the BAT as not saved; making
    3669             :                          * sure that this time it does get saved.
    3670             :                          */
    3671           0 :                         GDKsyserror("file_move: cannot stat %s\n", path);
    3672           0 :                         GDKfree(path);
    3673           0 :                         return GDK_FAIL;        /* fishy, but not fatal */
    3674             :                 }
    3675           0 :                 GDKfree(path);
    3676             :         }
    3677           0 :         return GDK_FAIL;
    3678             : }
    3679             : 
    3680             : /* returns true if the file exists */
    3681             : static bool
    3682     2814554 : file_exists(int farmid, const char *dir, const char *name, const char *ext)
    3683             : {
    3684     2814554 :         char *path;
    3685     2814554 :         struct stat st;
    3686     2814554 :         int ret = -1;
    3687             : 
    3688     2814554 :         path = GDKfilepath(farmid, dir, name, ext);
    3689     2814554 :         if (path) {
    3690     2814554 :                 ret = MT_stat(path, &st);
    3691     2814554 :                 TRC_DEBUG(IO_, "stat(%s) = %d\n", path, ret);
    3692     2814554 :                 GDKfree(path);
    3693             :         }
    3694     2814554 :         return (ret == 0);
    3695             : }
    3696             : 
    3697             : static gdk_return
    3698      204716 : heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext)
    3699             : {
    3700             :         /* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap
    3701             :          * heap modes */
    3702      204716 :         if (file_exists(hp->farmid, dstdir, nme, ext)) {
    3703             :                 /* dont overwrite heap with the committed state
    3704             :                  * already in dstdir */
    3705             :                 return GDK_SUCCEED;
    3706      204716 :         } else if (hp->newstorage == STORE_PRIV &&
    3707           0 :                    !file_exists(hp->farmid, srcdir, nme, ext)) {
    3708             : 
    3709             :                 /* In order to prevent half-saved X.new files
    3710             :                  * surviving a recover we create a dummy file in the
    3711             :                  * BACKUP(dstdir) whose presence will trigger
    3712             :                  * BBPrecover to remove them.  Thus, X will prevail
    3713             :                  * where it otherwise wouldn't have.  If X already has
    3714             :                  * a saved X.new, that one is backed up as normal.
    3715             :                  */
    3716             : 
    3717           0 :                 FILE *fp;
    3718           0 :                 long_str kill_ext;
    3719           0 :                 char *path;
    3720             : 
    3721           0 :                 strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill", NULL);
    3722           0 :                 path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext);
    3723           0 :                 if (path == NULL)
    3724             :                         return GDK_FAIL;
    3725           0 :                 fp = MT_fopen(path, "w");
    3726           0 :                 if (fp == NULL)
    3727           0 :                         GDKsyserror("heap_move: cannot open file %s\n", path);
    3728           0 :                 TRC_DEBUG(IO_, "open %s = %d\n", path, fp ? 0 : -1);
    3729           0 :                 GDKfree(path);
    3730             : 
    3731           0 :                 if (fp != NULL) {
    3732           0 :                         fclose(fp);
    3733           0 :                         return GDK_SUCCEED;
    3734             :                 } else {
    3735             :                         return GDK_FAIL;
    3736             :                 }
    3737             :         }
    3738      204716 :         return file_move(hp->farmid, srcdir, dstdir, nme, ext);
    3739             : }
    3740             : 
    3741             : /*
    3742             :  * @- BBPprepare
    3743             :  *
    3744             :  * this routine makes sure there is a BAKDIR/, and initiates one if
    3745             :  * not.  For subcommits, it does the same with SUBDIR.
    3746             :  *
    3747             :  * It is now locked, to get proper file counters, and also to prevent
    3748             :  * concurrent BBPrecovers, etc.
    3749             :  *
    3750             :  * backup_dir == 0 => no backup BBP.dir
    3751             :  * backup_dir == 1 => BBP.dir saved in BACKUP/
    3752             :  * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/
    3753             :  */
    3754             : 
    3755             : static gdk_return
    3756       24201 : BBPprepare(bool subcommit)
    3757             : {
    3758       24201 :         bool start_subcommit;
    3759       24201 :         int set = 1 + subcommit;
    3760       24201 :         gdk_return ret = GDK_SUCCEED;
    3761             : 
    3762       24201 :         start_subcommit = (subcommit && backup_subdir == 0);
    3763       11925 :         if (start_subcommit) {
    3764             :                 /* starting a subcommit. Make sure SUBDIR and DELDIR
    3765             :                  * are clean */
    3766       11925 :                 ret = BBPrecover_subdir();
    3767       11925 :                 if (ret != GDK_SUCCEED)
    3768             :                         return ret;
    3769             :         }
    3770       24201 :         if (backup_files == 0) {
    3771         343 :                 backup_dir = 0;
    3772         343 :                 ret = BBPrecover(0);
    3773         343 :                 if (ret != GDK_SUCCEED)
    3774             :                         return ret;
    3775         343 :                 str bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL);
    3776         343 :                 if (bakdirpath == NULL) {
    3777             :                         return GDK_FAIL;
    3778             :                 }
    3779             : 
    3780         343 :                 if (MT_mkdir(bakdirpath) < 0 && errno != EEXIST) {
    3781           0 :                         GDKsyserror("cannot create directory %s\n", bakdirpath);
    3782           0 :                         GDKfree(bakdirpath);
    3783           0 :                         return GDK_FAIL;
    3784             :                 }
    3785             :                 /* if BAKDIR already exists, don't signal error */
    3786         343 :                 TRC_DEBUG(IO_, "mkdir %s = %d\n", bakdirpath, (int) ret);
    3787         343 :                 GDKfree(bakdirpath);
    3788             :         }
    3789       24201 :         if (start_subcommit) {
    3790             :                 /* make a new SUBDIR (subdir of BAKDIR) */
    3791       11925 :                 str subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
    3792       11925 :                 if (subdirpath == NULL) {
    3793             :                         return GDK_FAIL;
    3794             :                 }
    3795             : 
    3796       11925 :                 if (MT_mkdir(subdirpath) < 0) {
    3797           0 :                         GDKsyserror("cannot create directory %s\n", subdirpath);
    3798           0 :                         GDKfree(subdirpath);
    3799           0 :                         return GDK_FAIL;
    3800             :                 }
    3801       11925 :                 TRC_DEBUG(IO_, "mkdir %s\n", subdirpath);
    3802       11925 :                 GDKfree(subdirpath);
    3803             :         }
    3804       24201 :         if (backup_dir != set) {
    3805             :                 /* a valid backup dir *must* at least contain BBP.dir */
    3806       48729 :                 if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP", "dir", subcommit ? SUBDIR : BAKDIR, "BBP", "dir", true)) != GDK_SUCCEED)
    3807             :                         return ret;
    3808       24193 :                 backup_dir = set;
    3809             :         }
    3810             :         /* increase counters */
    3811       24201 :         backup_subdir += subcommit;
    3812       24201 :         backup_files++;
    3813             : 
    3814       24201 :         return ret;
    3815             : }
    3816             : 
    3817             : static gdk_return
    3818     1109190 : do_backup(Heap *h, bool dirty, bool subcommit)
    3819             : {
    3820     1109190 :         gdk_return ret = GDK_SUCCEED;
    3821     1109190 :         char extnew[16];
    3822             : 
    3823     1109190 :         if (h->wasempty) {
    3824             :                 return GDK_SUCCEED;
    3825             :         }
    3826             : 
    3827             :         /* direct mmap is unprotected (readonly usage, or has WAL
    3828             :          * protection) */
    3829     1109190 :         if (h->storage != STORE_MMAP) {
    3830             :                 /* STORE_PRIV saves into X.new files. Two cases could
    3831             :                  * happen. The first is when a valid X.new exists
    3832             :                  * because of an access change or a previous
    3833             :                  * commit. This X.new should be backed up as
    3834             :                  * usual. The second case is when X.new doesn't
    3835             :                  * exist. In that case we could have half written
    3836             :                  * X.new files (after a crash). To protect against
    3837             :                  * these we write X.new.kill files in the backup
    3838             :                  * directory (see heap_move). */
    3839     1100203 :                 gdk_return mvret = GDK_SUCCEED;
    3840             : 
    3841     1100203 :                 char *srcdir = GDKfilepath(NOFARM, BATDIR, h->filename, NULL);
    3842     1100203 :                 if (srcdir == NULL)
    3843             :                         return GDK_FAIL;
    3844     1100203 :                 char *nme = strrchr(srcdir, DIR_SEP);
    3845     1100203 :                 assert(nme != NULL);
    3846     1100203 :                 *nme++ = '\0';
    3847     1100203 :                 char *ext = strchr(nme, '.');
    3848     1100203 :                 assert(ext != NULL);
    3849     1100203 :                 *ext++ = '\0';
    3850             : 
    3851     1100203 :                 strconcat_len(extnew, sizeof(extnew), ext, ".new", NULL);
    3852     1304919 :                 if (dirty &&
    3853      409432 :                     !file_exists(h->farmid, BAKDIR, nme, extnew) &&
    3854      204716 :                     !file_exists(h->farmid, BAKDIR, nme, ext)) {
    3855             :                         /* if the heap is dirty and there is no heap
    3856             :                          * file (with or without .new extension) in
    3857             :                          * the BAKDIR, move the heap (preferably with
    3858             :                          * .new extension) to the correct backup
    3859             :                          * directory */
    3860      204716 :                         if (file_exists(h->farmid, srcdir, nme, extnew)) {
    3861           0 :                                 mvret = heap_move(h, srcdir,
    3862             :                                                   subcommit ? SUBDIR : BAKDIR,
    3863             :                                                   nme, extnew);
    3864      204716 :                         } else if (file_exists(h->farmid, srcdir, nme, ext)) {
    3865      204718 :                                 mvret = heap_move(h, srcdir,
    3866             :                                                   subcommit ? SUBDIR : BAKDIR,
    3867             :                                                   nme, ext);
    3868      204716 :                                 if (mvret == GDK_SUCCEED) {
    3869             :                                         /* file no longer in "standard"
    3870             :                                          * location */
    3871      204716 :                                         h->hasfile = false;
    3872             :                                 }
    3873             :                         }
    3874      895487 :                 } else if (subcommit) {
    3875             :                         /* if subcommit, we may need to move an
    3876             :                          * already made backup from BAKDIR to
    3877             :                          * SUBDIR */
    3878      895487 :                         if (file_exists(h->farmid, BAKDIR, nme, extnew))
    3879           0 :                                 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew);
    3880      895487 :                         else if (file_exists(h->farmid, BAKDIR, nme, ext))
    3881           4 :                                 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext);
    3882             :                 }
    3883             :                 /* there is a situation where the move may fail,
    3884             :                  * namely if this heap was not supposed to be existing
    3885             :                  * before, i.e. after a BATmaterialize on a persistent
    3886             :                  * bat; as a workaround, do not complain about move
    3887             :                  * failure if the source file is nonexistent
    3888             :                  */
    3889      204720 :                 if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) {
    3890     1100203 :                         ret = GDK_FAIL;
    3891             :                 }
    3892     1100203 :                 if (subcommit &&
    3893     1100201 :                     (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) {
    3894           0 :                         long_str kill_ext;
    3895             : 
    3896           0 :                         strconcat_len(kill_ext, sizeof(kill_ext),
    3897             :                                       ext, ".new.kill", NULL);
    3898           0 :                         if (file_exists(h->farmid, BAKDIR, nme, kill_ext) &&
    3899           0 :                             file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) {
    3900           0 :                                 ret = GDK_FAIL;
    3901             :                         }
    3902             :                 }
    3903     1100203 :                 GDKfree(srcdir);
    3904             :         }
    3905             :         return ret;
    3906             : }
    3907             : 
    3908             : static gdk_return
    3909      883531 : BBPbackup(BAT *b, bool subcommit)
    3910             : {
    3911      883531 :         gdk_return rc = GDK_SUCCEED;
    3912             : 
    3913      883531 :         MT_lock_set(&b->theaplock);
    3914      883531 :         BATiter bi = bat_iterator_nolock(b);
    3915      883531 :         if (!bi.copiedtodisk || bi.transient) {
    3916           1 :                 MT_lock_unset(&b->theaplock);
    3917           1 :                 return GDK_SUCCEED;
    3918             :         }
    3919      883530 :         assert(b->theap->parentid == b->batCacheid);
    3920      883530 :         if (b->oldtail && b->oldtail != (Heap *) 1) {
    3921        1685 :                 bi.h = b->oldtail;
    3922        1685 :                 bi.hdirty = b->oldtail->dirty;
    3923             :         }
    3924             : #ifndef NDEBUG
    3925      883530 :         bi.locked = true;
    3926             : #endif
    3927      883530 :         HEAPincref(bi.h);
    3928      883530 :         if (bi.vh)
    3929      225660 :                 HEAPincref(bi.vh);
    3930      883530 :         MT_lock_unset(&b->theaplock);
    3931             : 
    3932             :         /* determine location dir and physical suffix */
    3933      883530 :         if (bi.type != TYPE_void) {
    3934      883530 :                 rc = do_backup(bi.h, bi.hdirty, subcommit);
    3935      883530 :                 if (rc == GDK_SUCCEED && bi.vh != NULL)
    3936      225660 :                         rc = do_backup(bi.vh, bi.vhdirty, subcommit);
    3937             :         }
    3938      883530 :         bat_iterator_end(&bi);
    3939      883530 :         return rc;
    3940             : }
    3941             : 
    3942             : static inline void
    3943           0 : BBPcheckHeap(Heap *h)
    3944             : {
    3945           0 :         struct stat statb;
    3946           0 :         char *path;
    3947             : 
    3948           0 :         char *s = strrchr(h->filename, DIR_SEP);
    3949           0 :         if (s)
    3950           0 :                 s++;
    3951             :         else
    3952             :                 s = h->filename;
    3953           0 :         path = GDKfilepath(0, BAKDIR, s, NULL);
    3954           0 :         if (path == NULL)
    3955           0 :                 return;
    3956           0 :         if (MT_stat(path, &statb) < 0) {
    3957           0 :                 GDKfree(path);
    3958           0 :                 path = GDKfilepath(0, BATDIR, h->filename, NULL);
    3959           0 :                 if (path == NULL)
    3960             :                         return;
    3961           0 :                 if (MT_stat(path, &statb) < 0) {
    3962           0 :                         GDKsyserror("cannot stat file %s (expected size %zu)\n",
    3963             :                                     path, h->free);
    3964           0 :                         assert(0);
    3965             :                         GDKfree(path);
    3966             :                         return;
    3967             :                 }
    3968             :         }
    3969           0 :         assert((statb.st_mode & S_IFMT) == S_IFREG);
    3970           0 :         assert((size_t) statb.st_size >= h->free);
    3971           0 :         if ((size_t) statb.st_size < h->free) {
    3972             :                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, h->free, (size_t) statb.st_size);
    3973             :                 GDKfree(path);
    3974             :                 return;
    3975             :         }
    3976           0 :         GDKfree(path);
    3977             : }
    3978             : 
    3979             : static void
    3980           0 : BBPcheckBBPdir(void)
    3981             : {
    3982           0 :         FILE *fp;
    3983           0 :         int lineno = 0;
    3984           0 :         bat bbpsize = 0;
    3985           0 :         unsigned bbpversion;
    3986           0 :         lng logno, transid;
    3987             : 
    3988           0 :         fp = GDKfileopen(0, BAKDIR, "BBP", "dir", "r");
    3989           0 :         assert(fp != NULL);
    3990           0 :         if (fp == NULL) {
    3991             :                 fp = GDKfileopen(0, BATDIR, "BBP", "dir", "r");
    3992             :                 assert(fp != NULL);
    3993             :                 if (fp == NULL)
    3994             :                         return;
    3995             :         }
    3996           0 :         bbpversion = BBPheader(fp, &lineno, &bbpsize, &logno, &transid, false);
    3997           0 :         if (bbpversion == 0) {
    3998           0 :                 fclose(fp);
    3999           0 :                 return;         /* error reading file */
    4000             :         }
    4001           0 :         assert(bbpversion == GDKLIBRARY);
    4002             : 
    4003           0 :         for (;;) {
    4004           0 :                 BAT b;
    4005           0 :                 Heap h;
    4006           0 :                 Heap vh;
    4007           0 :                 vh = h = (Heap) {
    4008             :                         .free = 0,
    4009             :                 };
    4010           0 :                 b = (BAT) {
    4011             :                         .theap = &h,
    4012             :                         .tvheap = &vh,
    4013             :                 };
    4014           0 :                 char filename[sizeof(BBP_physical(0))];
    4015           0 :                 char batname[129];
    4016             : #ifdef GDKLIBRARY_HASHASH
    4017           0 :                 int hashash;
    4018             : #endif
    4019             : 
    4020           0 :                 switch (BBPreadBBPline(fp, bbpversion, &lineno, &b,
    4021             : #ifdef GDKLIBRARY_HASHASH
    4022             :                                        &hashash,
    4023             : #endif
    4024             :                                        batname, filename, NULL)) {
    4025           0 :                 case 0:
    4026             :                         /* end of file */
    4027           0 :                         fclose(fp);
    4028             :                         /* don't leak errors, this is just debug code */
    4029           0 :                         GDKclrerr();
    4030           0 :                         return;
    4031             :                 case 1:
    4032             :                         /* successfully read an entry */
    4033           0 :                         break;
    4034           0 :                 default:
    4035             :                         /* error */
    4036           0 :                         fclose(fp);
    4037           0 :                         return;
    4038             :                 }
    4039             : #ifdef GDKLIBRARY_HASHASH
    4040           0 :                 assert(hashash == 0);
    4041             : #endif
    4042           0 :                 assert(b.batCacheid < (bat) ATOMIC_GET(&BBPsize));
    4043           0 :                 assert(BBP_desc(b.batCacheid) != NULL);
    4044           0 :                 assert(b.hseqbase <= GDK_oid_max);
    4045           0 :                 if (b.ttype == TYPE_void) {
    4046             :                         /* no files needed */
    4047           0 :                         continue;
    4048             :                 }
    4049           0 :                 if (b.theap->free > 0)
    4050           0 :                         BBPcheckHeap(b.theap);
    4051           0 :                 if (b.tvheap != NULL && b.tvheap->free > 0)
    4052           0 :                         BBPcheckHeap(b.tvheap);
    4053             :         }
    4054             : }
    4055             : 
    4056             : /*
    4057             :  * @+ Atomic Write
    4058             :  * The atomic BBPsync() function first safeguards the old images of
    4059             :  * all files to be written in BAKDIR. It then saves all files. If that
    4060             :  * succeeds fully, BAKDIR is renamed to DELDIR. The rename is
    4061             :  * considered an atomic action. If it succeeds, the DELDIR is removed.
    4062             :  * If something fails, the pre-sync status can be obtained by moving
    4063             :  * back all backed up files; this is done by BBPrecover().
    4064             :  *
    4065             :  * The BBP.dir is also moved into the BAKDIR.
    4066             :  */
    4067             : gdk_return
    4068       11933 : BBPsync(int cnt, bat *restrict subcommit, BUN *restrict sizes, lng logno, lng transid)
    4069             : {
    4070       11933 :         gdk_return ret = GDK_SUCCEED;
    4071       11933 :         lng t0 = 0, t1 = 0;
    4072       11933 :         str bakdir, deldir;
    4073       11933 :         const bool lock = locked_by == 0 || locked_by != MT_getpid();
    4074       11933 :         char buf[3000];
    4075       11933 :         int n = subcommit ? 0 : -1;
    4076           8 :         FILE *obbpf, *nbbpf;
    4077             : 
    4078       11933 :         if ((bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL)) == NULL)
    4079             :                 return GDK_FAIL;
    4080       11933 :         if ((deldir = GDKfilepath(0, NULL, DELDIR, NULL)) == NULL) {
    4081           0 :                 GDKfree(bakdir);
    4082           0 :                 return GDK_FAIL;
    4083             :         }
    4084             : 
    4085       11933 :         TRC_DEBUG_IF(PERF) t0 = t1 = GDKusec();
    4086             : 
    4087       11933 :         if ((ATOMIC_GET(&GDKdebug) & TAILCHKMASK) && !GDKinmemory(0))
    4088           0 :                 BBPcheckBBPdir();
    4089             : 
    4090       11933 :         ret = BBPprepare(subcommit != NULL);
    4091             : 
    4092             :         /* PHASE 1: safeguard everything in a backup-dir */
    4093     1828908 :         for (int idx = 1; ret == GDK_SUCCEED && idx < cnt; idx++) {
    4094     1816975 :                 bat i = subcommit ? subcommit[idx] : idx;
    4095     1816975 :                 const bat bid = i;
    4096     1816975 :                 if (lock)
    4097     1806447 :                         MT_lock_set(&GDKswapLock(bid));
    4098             :                 /* set flag that we're syncing, i.e. that we'll
    4099             :                  * be between moving heap to backup dir and
    4100             :                  * saving the new version, in other words, the
    4101             :                  * heap may not exist in the usual location */
    4102     1816975 :                 BBP_status_on(bid, BBPSYNCING);
    4103             :                 /* wait until unloading is finished before
    4104             :                  * attempting to make a backup */
    4105     1816975 :                 while (BBP_status(bid) & BBPUNLOADING) {
    4106           0 :                         if (lock)
    4107           0 :                                 MT_lock_unset(&GDKswapLock(bid));
    4108           0 :                         BBPspin(bid, __func__, BBPUNLOADING);
    4109           0 :                         if (lock)
    4110     1816975 :                                 MT_lock_set(&GDKswapLock(bid));
    4111             :                 }
    4112     1816975 :                 BAT *b = dirty_bat(&i, subcommit != NULL);
    4113     1816975 :                 if (i <= 0 ||
    4114     1816975 :                     (BBP_status(bid) & BBPEXISTING &&
    4115     1503329 :                      b != NULL &&
    4116     2386858 :                      b->batInserted > 0 &&
    4117      883529 :                      BBPbackup(b, subcommit != NULL) != GDK_SUCCEED)) {
    4118             :                         ret = GDK_FAIL;
    4119             :                 }
    4120     1816975 :                 if (lock)
    4121     1816975 :                         MT_lock_unset(&GDKswapLock(bid));
    4122             :         }
    4123       11933 :         TRC_DEBUG(PERF, "move time "LLFMT" usec, %d files\n", (t1 = GDKusec()) - t0, backup_files);
    4124             : 
    4125             :         /* PHASE 2: save the repository and write new BBP.dir file */
    4126       11933 :         if (ret == GDK_SUCCEED) {
    4127       11933 :                 ret = BBPdir_first(subcommit != NULL, logno, transid,
    4128             :                                    &obbpf, &nbbpf);
    4129             :         }
    4130             : 
    4131     1828908 :         for (int idx = 1; ret == GDK_SUCCEED && idx < cnt; idx++) {
    4132     1816975 :                 bat i = subcommit ? subcommit[idx] : idx;
    4133             :                 /* BBP_desc(i) may be NULL */
    4134     1816975 :                 BUN size = sizes ? sizes[idx] : BUN_NONE;
    4135     1816975 :                 BATiter bi;
    4136             : 
    4137     1816975 :                 if (BBP_status(i) & BBPPERSISTENT) {
    4138     1625486 :                         BAT *b = dirty_bat(&i, subcommit != NULL);
    4139     1625486 :                         if (i <= 0) {
    4140           0 :                                 ret = GDK_FAIL;
    4141           0 :                                 break;
    4142             :                         }
    4143     1625486 :                         bi = bat_iterator(BBP_desc(i));
    4144     1625486 :                         assert(sizes == NULL || size <= bi.count);
    4145     1621932 :                         assert(sizes == NULL || bi.width == 0 || (bi.type == TYPE_msk ? ((size + 31) / 32) * 4 : size << bi.shift) <= bi.hfree);
    4146     1625486 :                         if (size > bi.count) /* includes sizes==NULL */
    4147             :                                 size = bi.count;
    4148     1625486 :                         MT_lock_set(&bi.b->theaplock);
    4149     1625486 :                         bi.b->batInserted = size;
    4150     1625486 :                         if (bi.b->ttype >= 0 && ATOMvarsized(bi.b->ttype)) {
    4151             :                                 /* see epilogue() for other part of this */
    4152             :                                 /* remember the tail we're saving */
    4153      360919 :                                 if (BATsetprop_nolock(bi.b, (enum prop_t) 20, TYPE_ptr, &bi.h) == NULL) {
    4154           0 :                                         GDKerror("setprop failed\n");
    4155           0 :                                         ret = GDK_FAIL;
    4156             :                                 } else {
    4157      360919 :                                         if (bi.b->oldtail == NULL)
    4158      358908 :                                                 bi.b->oldtail = (Heap *) 1;
    4159      360919 :                                         HEAPincref(bi.h);
    4160             :                                 }
    4161             :                         }
    4162     1625486 :                         MT_lock_unset(&bi.b->theaplock);
    4163     1625486 :                         if (ret == GDK_SUCCEED && b && size != 0) {
    4164             :                                 /* wait for BBPSAVING so that we
    4165             :                                  * can set it, wait for
    4166             :                                  * BBPUNLOADING before
    4167             :                                  * attempting to save */
    4168      970136 :                                 for (;;) {
    4169      970136 :                                         if (lock)
    4170      970136 :                                                 MT_lock_set(&GDKswapLock(i));
    4171      970136 :                                         if (!(BBP_status(i) & (BBPSAVING|BBPUNLOADING)))
    4172             :                                                 break;
    4173           0 :                                         if (lock)
    4174           0 :                                                 MT_lock_unset(&GDKswapLock(i));
    4175           0 :                                         BBPspin(i, __func__, BBPSAVING|BBPUNLOADING);
    4176             :                                 }
    4177      970136 :                                 BBP_status_on(i, BBPSAVING);
    4178      970136 :                                 if (lock)
    4179      970136 :                                         MT_lock_unset(&GDKswapLock(i));
    4180      970136 :                                 ret = BATsave_iter(b, &bi, size);
    4181      970136 :                                 BBP_status_off(i, BBPSAVING);
    4182             :                         }
    4183             :                 } else {
    4184      191489 :                         bi = bat_iterator(NULL);
    4185             :                 }
    4186     1816975 :                 if (ret == GDK_SUCCEED) {
    4187     1816975 :                         n = BBPdir_step(i, size, n, buf, sizeof(buf), &obbpf, nbbpf, &bi);
    4188     1816975 :                         if (n < -1)
    4189           0 :                                 ret = GDK_FAIL;
    4190             :                 }
    4191     1816975 :                 bat_iterator_end(&bi);
    4192             :                 /* we once again have a saved heap */
    4193             :         }
    4194             : 
    4195       11933 :         TRC_DEBUG(PERF, "write time "LLFMT" usec\n", (t0 = GDKusec()) - t1);
    4196             : 
    4197       11933 :         if (ret == GDK_SUCCEED) {
    4198       11933 :                 ret = BBPdir_last(n, buf, sizeof(buf), obbpf, nbbpf);
    4199             :         }
    4200             : 
    4201       11933 :         TRC_DEBUG(PERF, "dir time "LLFMT" usec, %d bats\n", (t1 = GDKusec()) - t0, (bat) ATOMIC_GET(&BBPsize));
    4202             : 
    4203       11933 :         if (ret == GDK_SUCCEED) {
    4204             :                 /* atomic switchover */
    4205             :                 /* this is the big one: this call determines
    4206             :                  * whether the operation of this function
    4207             :                  * succeeded, so no changing of ret after this
    4208             :                  * call anymore */
    4209             : 
    4210       11933 :                 if (MT_rename(bakdir, deldir) < 0 &&
    4211             :                     /* maybe there was an old deldir, so remove and try again */
    4212           0 :                     (GDKremovedir(0, DELDIR) != GDK_SUCCEED ||
    4213           0 :                      MT_rename(bakdir, deldir) < 0))
    4214           0 :                         ret = GDK_FAIL;
    4215           0 :                 if (ret != GDK_SUCCEED)
    4216           0 :                         GDKsyserror("rename(%s,%s) failed\n", bakdir, deldir);
    4217       11933 :                 TRC_DEBUG(IO_, "rename %s %s = %d\n", bakdir, deldir, (int) ret);
    4218             :         }
    4219             : 
    4220             :         /* AFTERMATH */
    4221       11933 :         if (ret == GDK_SUCCEED) {
    4222       11933 :                 ATOMIC_SET(&BBPlogno, logno);       /* the new value */
    4223       11933 :                 ATOMIC_SET(&BBPtransid, transid);
    4224       11933 :                 backup_files = subcommit ? (backup_files - backup_subdir) : 0;
    4225       11933 :                 backup_dir = backup_subdir = 0;
    4226       11933 :                 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED)
    4227           0 :                         fprintf(stderr, "#BBPsync: cannot remove directory %s\n", DELDIR);
    4228       11933 :                 (void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */
    4229       11933 :                 if (backup_files > 1) {
    4230       11925 :                         TRC_DEBUG(PERF, "backup_files %d > 1\n", backup_files);
    4231       11925 :                         backup_files = 1;
    4232             :                 }
    4233             :         }
    4234       11933 :         TRC_DEBUG(PERF, "%s (ready time "LLFMT" usec)\n",
    4235             :                   ret == GDK_SUCCEED ? "" : " failed",
    4236             :                   (t0 = GDKusec()) - t1);
    4237             : 
    4238       11933 :         if (ret != GDK_SUCCEED) {
    4239             :                 /* clean up extra refs we created */
    4240           0 :                 for (int idx = 1; idx < cnt; idx++) {
    4241           0 :                         bat i = subcommit ? subcommit[idx] : idx;
    4242           0 :                         BAT *b = BBP_desc(i);
    4243           0 :                         if (b && ATOMvarsized(b->ttype)) {
    4244           0 :                                 MT_lock_set(&b->theaplock);
    4245           0 :                                 ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
    4246           0 :                                 if (p != NULL) {
    4247           0 :                                         HEAPdecref(p->val.pval, false);
    4248           0 :                                         BATrmprop_nolock(b, (enum prop_t) 20);
    4249             :                                 }
    4250           0 :                                 MT_lock_unset(&b->theaplock);
    4251             :                         }
    4252             :                 }
    4253             :         }
    4254             : 
    4255             :         /* turn off the BBPSYNCING bits for all bats, even when things
    4256             :          * didn't go according to plan (i.e., don't check for ret ==
    4257             :          * GDK_SUCCEED) */
    4258     1828908 :         for (int idx = 1; idx < cnt; idx++) {
    4259     1816975 :                 bat i = subcommit ? subcommit[idx] : idx;
    4260     1816975 :                 BBP_status_off(i, BBPSYNCING);
    4261             :         }
    4262             : 
    4263       11933 :         GDKfree(bakdir);
    4264       11933 :         GDKfree(deldir);
    4265       11933 :         return ret;
    4266             : }
    4267             : 
    4268             : /*
    4269             :  * Recovery just moves all files back to their original location. this
    4270             :  * is an incremental process: if something fails, just stop with still
    4271             :  * files left for moving in BACKUP/.  The recovery process can resume
    4272             :  * later with the left over files.
    4273             :  */
    4274             : static gdk_return
    4275           0 : force_move(int farmid, const char *srcdir, const char *dstdir, const char *name)
    4276             : {
    4277           0 :         const char *p;
    4278           0 :         char *dstpath, *killfile;
    4279           0 :         gdk_return ret = GDK_SUCCEED;
    4280             : 
    4281           0 :         if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill") == 0) {
    4282             :                 /* Found a X.new.kill file, ie remove the X.new file */
    4283           0 :                 ptrdiff_t len = p - name;
    4284           0 :                 long_str srcpath;
    4285             : 
    4286           0 :                 strncpy(srcpath, name, len);
    4287           0 :                 srcpath[len] = '\0';
    4288           0 :                 if ((dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL)) == NULL) {
    4289             :                         return GDK_FAIL;
    4290             :                 }
    4291             : 
    4292             :                 /* step 1: remove the X.new file that is going to be
    4293             :                  * overridden by X */
    4294           0 :                 if (MT_remove(dstpath) != 0 && errno != ENOENT) {
    4295             :                         /* if it exists and cannot be removed, all
    4296             :                          * this is going to fail */
    4297           0 :                         GDKsyserror("force_move: remove(%s)\n", dstpath);
    4298           0 :                         GDKfree(dstpath);
    4299           0 :                         return GDK_FAIL;
    4300             :                 }
    4301           0 :                 GDKfree(dstpath);
    4302             : 
    4303             :                 /* step 2: now remove the .kill file. This one is
    4304             :                  * crucial, otherwise we'll never finish recovering */
    4305           0 :                 if ((killfile = GDKfilepath(farmid, srcdir, name, NULL)) == NULL) {
    4306             :                         return GDK_FAIL;
    4307             :                 }
    4308           0 :                 if (MT_remove(killfile) != 0) {
    4309           0 :                         ret = GDK_FAIL;
    4310           0 :                         GDKsyserror("force_move: remove(%s)\n", killfile);
    4311             :                 }
    4312           0 :                 GDKfree(killfile);
    4313           0 :                 return ret;
    4314             :         }
    4315             :         /* try to rename it */
    4316           0 :         ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, false);
    4317             : 
    4318           0 :         if (ret != GDK_SUCCEED) {
    4319           0 :                 char *srcpath;
    4320             : 
    4321           0 :                 GDKclrerr();
    4322             :                 /* two legal possible causes: file exists or dir
    4323             :                  * doesn't exist */
    4324           0 :                 if ((dstpath = GDKfilepath(farmid, dstdir, name, NULL)) == NULL)
    4325             :                         return GDK_FAIL;
    4326           0 :                 if ((srcpath = GDKfilepath(farmid, srcdir, name, NULL)) == NULL) {
    4327           0 :                         GDKfree(dstpath);
    4328           0 :                         return GDK_FAIL;
    4329             :                 }
    4330           0 :                 if (MT_remove(dstpath) != 0)    /* clear destination */
    4331           0 :                         ret = GDK_FAIL;
    4332           0 :                 TRC_DEBUG(IO_, "remove %s = %d\n", dstpath, (int) ret);
    4333             : 
    4334           0 :                 (void) GDKcreatedir(dstdir); /* if fails, move will fail */
    4335           0 :                 ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, true);
    4336           0 :                 TRC_DEBUG(IO_, "link %s %s = %d\n", srcpath, dstpath, (int) ret);
    4337           0 :                 GDKfree(dstpath);
    4338           0 :                 GDKfree(srcpath);
    4339             :         }
    4340             :         return ret;
    4341             : }
    4342             : 
    4343             : gdk_return
    4344         343 : BBPrecover(int farmid)
    4345             : {
    4346         343 :         str bakdirpath;
    4347         343 :         str leftdirpath;
    4348         343 :         DIR *dirp;
    4349         343 :         struct dirent *dent;
    4350         343 :         long_str path, dstpath;
    4351         343 :         bat i;
    4352         343 :         size_t j = strlen(BATDIR);
    4353         343 :         gdk_return ret = GDK_SUCCEED;
    4354         343 :         bool dirseen = false;
    4355         343 :         str dstdir;
    4356             : 
    4357         343 :         bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL);
    4358         343 :         leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL);
    4359         343 :         if (bakdirpath == NULL || leftdirpath == NULL) {
    4360           0 :                 GDKfree(bakdirpath);
    4361           0 :                 GDKfree(leftdirpath);
    4362           0 :                 return GDK_FAIL;
    4363             :         }
    4364         343 :         dirp = opendir(bakdirpath);
    4365         343 :         if (dirp == NULL) {
    4366         231 :                 if (errno != ENOENT)
    4367           0 :                         GDKsyserror("cannot open directory %s\n", bakdirpath);
    4368         231 :                 GDKfree(bakdirpath);
    4369         231 :                 GDKfree(leftdirpath);
    4370         231 :                 return GDK_SUCCEED;     /* nothing to do */
    4371             :         }
    4372         112 :         memcpy(dstpath, BATDIR, j);
    4373         112 :         dstpath[j] = DIR_SEP;
    4374         112 :         dstpath[++j] = 0;
    4375         112 :         dstdir = dstpath + j;
    4376         112 :         TRC_DEBUG(IO_, "start\n");
    4377             : 
    4378         112 :         if (MT_mkdir(leftdirpath) < 0 && errno != EEXIST) {
    4379           0 :                 GDKsyserror("cannot create directory %s\n", leftdirpath);
    4380           0 :                 closedir(dirp);
    4381           0 :                 GDKfree(bakdirpath);
    4382           0 :                 GDKfree(leftdirpath);
    4383           0 :                 return GDK_FAIL;
    4384             :         }
    4385             : 
    4386             :         /* move back all files */
    4387         336 :         while ((dent = readdir(dirp)) != NULL) {
    4388         224 :                 const char *q = strchr(dent->d_name, '.');
    4389             : 
    4390         224 :                 if (q == dent->d_name) {
    4391         224 :                         char *fn;
    4392             : 
    4393         224 :                         if (strcmp(dent->d_name, ".") == 0 ||
    4394         112 :                             strcmp(dent->d_name, "..") == 0)
    4395         224 :                                 continue;
    4396           0 :                         fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL);
    4397           0 :                         if (fn) {
    4398           0 :                                 int uret = MT_remove(fn);
    4399           0 :                                 TRC_DEBUG(IO_, "remove %s = %d\n",
    4400             :                                           fn, uret);
    4401           0 :                                 GDKfree(fn);
    4402             :                         }
    4403           0 :                         continue;
    4404           0 :                 } else if (strcmp(dent->d_name, "BBP.dir") == 0) {
    4405           0 :                         dirseen = true;
    4406           0 :                         continue;
    4407             :                 }
    4408           0 :                 if (q == NULL)
    4409           0 :                         q = dent->d_name + strlen(dent->d_name);
    4410           0 :                 if ((j = q - dent->d_name) + 1 > sizeof(path)) {
    4411             :                         /* name too long: ignore */
    4412           0 :                         continue;
    4413             :                 }
    4414           0 :                 strncpy(path, dent->d_name, j);
    4415           0 :                 path[j] = 0;
    4416           0 :                 if (GDKisdigit(*path)) {
    4417           0 :                         i = strtol(path, NULL, 8);
    4418             :                 } else {
    4419           0 :                         i = BBP_find(path, false);
    4420           0 :                         if (i < 0)
    4421           0 :                                 i = -i;
    4422             :                 }
    4423           0 :                 if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) {
    4424           0 :                         force_move(farmid, BAKDIR, LEFTDIR, dent->d_name);
    4425             :                 } else {
    4426           0 :                         BBPgetsubdir(dstdir, i);
    4427           0 :                         if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED) {
    4428             :                                 ret = GDK_FAIL;
    4429             :                                 break;
    4430             :                         }
    4431             :                         /* don't trust index files after recovery */
    4432           0 :                         GDKunlink(farmid, dstpath, path, "thashl");
    4433           0 :                         GDKunlink(farmid, dstpath, path, "thashb");
    4434           0 :                         GDKunlink(farmid, dstpath, path, "timprints");
    4435           0 :                         GDKunlink(farmid, dstpath, path, "torderidx");
    4436           0 :                         GDKunlink(farmid, dstpath, path, "tstrimps");
    4437             :                 }
    4438             :         }
    4439         112 :         closedir(dirp);
    4440         112 :         if (dirseen && ret == GDK_SUCCEED) {    /* we have a saved BBP.dir; it should be moved back!! */
    4441           0 :                 struct stat st;
    4442           0 :                 char *fn;
    4443             : 
    4444           0 :                 fn = GDKfilepath(farmid, BATDIR, "BBP", "dir");
    4445           0 :                 if (fn == NULL) {
    4446             :                         ret = GDK_FAIL;
    4447             :                 } else {
    4448           0 :                         ret = recover_dir(farmid, MT_stat(fn, &st) == 0);
    4449           0 :                         GDKfree(fn);
    4450             :                 }
    4451             :         }
    4452             : 
    4453         112 :         if (ret == GDK_SUCCEED) {
    4454         112 :                 if (MT_rmdir(bakdirpath) < 0) {
    4455           0 :                         GDKsyserror("cannot remove directory %s\n", bakdirpath);
    4456           0 :                         ret = GDK_FAIL;
    4457             :                 }
    4458         112 :                 TRC_DEBUG(IO_, "rmdir %s = %d\n", bakdirpath, (int) ret);
    4459             :         }
    4460         112 :         if (ret != GDK_SUCCEED)
    4461           0 :                 GDKerror("recovery failed.\n");
    4462             : 
    4463         112 :         TRC_DEBUG(IO_, "end\n");
    4464         112 :         GDKfree(bakdirpath);
    4465         112 :         GDKfree(leftdirpath);
    4466         112 :         return ret;
    4467             : }
    4468             : 
    4469             : /*
    4470             :  * SUBDIR recovery is quite mindlessly moving all files back to the
    4471             :  * parent (BAKDIR).  We do recognize moving back BBP.dir and set
    4472             :  * backed_up_subdir accordingly.
    4473             :  */
    4474             : gdk_return
    4475       12260 : BBPrecover_subdir(void)
    4476             : {
    4477       12260 :         str subdirpath;
    4478       12260 :         DIR *dirp;
    4479       12260 :         struct dirent *dent;
    4480       12260 :         gdk_return ret = GDK_SUCCEED;
    4481             : 
    4482       12260 :         subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
    4483       12260 :         if (subdirpath == NULL)
    4484             :                 return GDK_FAIL;
    4485       12260 :         dirp = opendir(subdirpath);
    4486       12260 :         if (dirp == NULL && errno != ENOENT)
    4487           0 :                 GDKsyserror("cannot open directory %s\n", subdirpath);
    4488       12260 :         GDKfree(subdirpath);
    4489       12260 :         if (dirp == NULL) {
    4490             :                 return GDK_SUCCEED;     /* nothing to do */
    4491             :         }
    4492           0 :         TRC_DEBUG(IO_, "start\n");
    4493             : 
    4494             :         /* move back all files */
    4495           0 :         while ((dent = readdir(dirp)) != NULL) {
    4496           0 :                 if (dent->d_name[0] == '.')
    4497           0 :                         continue;
    4498           0 :                 ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL, true);
    4499           0 :                 if (ret != GDK_SUCCEED)
    4500             :                         break;
    4501           0 :                 if (strcmp(dent->d_name, "BBP.dir") == 0)
    4502           0 :                         backup_dir = 1;
    4503             :         }
    4504           0 :         closedir(dirp);
    4505             : 
    4506             :         /* delete the directory */
    4507           0 :         if (ret == GDK_SUCCEED) {
    4508           0 :                 ret = GDKremovedir(0, SUBDIR);
    4509           0 :                 if (backup_dir == 2) {
    4510           0 :                         TRC_DEBUG(IO_, "%s%cBBP.dir had disappeared!\n", SUBDIR, DIR_SEP);
    4511           0 :                         backup_dir = 0;
    4512             :                 }
    4513             :         }
    4514           0 :         TRC_DEBUG(IO_, "end = %d\n", (int) ret);
    4515             : 
    4516           0 :         if (ret != GDK_SUCCEED)
    4517           0 :                 GDKerror("recovery failed.\n");
    4518             :         return ret;
    4519             : }
    4520             : 
    4521             : /*
    4522             :  * @- The diskscan
    4523             :  * The BBPdiskscan routine walks through the BAT dir, cleans up
    4524             :  * leftovers, and measures disk occupancy.  Leftovers are files that
    4525             :  * cannot belong to a BAT. in order to establish this for [ht]heap
    4526             :  * files, the BAT descriptor is loaded in order to determine whether
    4527             :  * these files are still required.
    4528             :  *
    4529             :  * The routine gathers all bat sizes in a bat that contains bat-ids
    4530             :  * and bytesizes. The return value is the number of bytes of space
    4531             :  * freed.
    4532             :  */
    4533             : static bool
    4534       26120 : persistent_bat(bat bid)
    4535             : {
    4536       26120 :         if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) {
    4537       26120 :                 BAT *b = BBP_cache(bid);
    4538             : 
    4539       26120 :                 if (b == NULL || b->batCopiedtodisk) {
    4540             :                         return true;
    4541             :                 }
    4542             :         }
    4543             :         return false;
    4544             : }
    4545             : 
    4546             : static BAT *
    4547       26120 : getdesc(bat bid)
    4548             : {
    4549       26120 :         BAT *b = NULL;
    4550             : 
    4551       26120 :         if (is_bat_nil(bid))
    4552             :                 return NULL;
    4553       26120 :         assert(bid > 0);
    4554       26120 :         if (bid < (bat) ATOMIC_GET(&BBPsize) && BBP_logical(bid))
    4555       26120 :                 b = BBP_desc(bid);
    4556       26120 :         if (b == NULL)
    4557           0 :                 BBPclear(bid);
    4558             :         return b;
    4559             : }
    4560             : 
    4561             : static bool
    4562        1787 : BBPdiskscan(const char *parent, size_t baseoff)
    4563             : {
    4564        1787 :         DIR *dirp = opendir(parent);
    4565        1787 :         struct dirent *dent;
    4566        1787 :         char fullname[FILENAME_MAX];
    4567        1787 :         str dst;
    4568        1787 :         size_t dstlen;
    4569        1787 :         const char *src = parent;
    4570             : 
    4571        1787 :         if (dirp == NULL) {
    4572         173 :                 if (errno != ENOENT)
    4573           0 :                         GDKsyserror("cannot open directory %s\n", parent);
    4574         173 :                 return true;    /* nothing to do */
    4575             :         }
    4576             : 
    4577        1614 :         dst = stpcpy(fullname, src);
    4578        1614 :         if (dst > fullname && dst[-1] != DIR_SEP)
    4579        1614 :                 *dst++ = DIR_SEP;
    4580        1614 :         dstlen = sizeof(fullname) - (dst - fullname);
    4581             : 
    4582       34198 :         while ((dent = readdir(dirp)) != NULL) {
    4583       30970 :                 const char *p;
    4584       30970 :                 bat bid;
    4585       30970 :                 bool ok, delete;
    4586             : 
    4587       30970 :                 if (dent->d_name[0] == '.')
    4588        3228 :                         continue;       /* ignore .dot files and directories (. ..) */
    4589             : 
    4590             : #ifdef GDKLIBRARY_JSON
    4591       27742 :                 if (strcmp(dent->d_name, "jsonupgradeneeded") == 0) {
    4592           8 :                         continue; /* ignore json upgrade signal file  */
    4593             :                 }
    4594             : #endif
    4595             : 
    4596       27734 :                 if (strncmp(dent->d_name, "BBP.", 4) == 0 &&
    4597         335 :                     (strcmp(parent + baseoff, BATDIR) == 0 ||
    4598         335 :                      strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 ||
    4599           0 :                      strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0))
    4600         335 :                         continue;
    4601             : 
    4602       27399 :                 p = strchr(dent->d_name, '.');
    4603             : 
    4604       27399 :                 if (strlen(dent->d_name) >= dstlen) {
    4605             :                         /* found a file with too long a name
    4606             :                            (i.e. unknown); stop pruning in this
    4607             :                            subdir */
    4608           0 :                         fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
    4609           0 :                         break;
    4610             :                 }
    4611       27399 :                 strncpy(dst, dent->d_name, dstlen);
    4612       27399 :                 fullname[sizeof(fullname) - 1] = 0;
    4613             : 
    4614       27399 :                 if (p == NULL && !BBPdiskscan(fullname, baseoff)) {
    4615             :                         /* it was a directory */
    4616        1279 :                         continue;
    4617             :                 }
    4618             : 
    4619       26120 :                 if (p && strcmp(p + 1, "tmp") == 0) {
    4620             :                         delete = true;
    4621             :                         ok = true;
    4622       26120 :                         bid = 0;
    4623             :                 } else {
    4624       26120 :                         bid = strtol(dent->d_name, NULL, 8);
    4625       26120 :                         ok = p && bid;
    4626       26120 :                         delete = false;
    4627             : 
    4628       26120 :                         if (!ok || !persistent_bat(bid)) {
    4629             :                                 delete = true;
    4630       26120 :                         } else if (strncmp(p + 1, "tail", 4) == 0) {
    4631       19663 :                                 BAT *b = getdesc(bid);
    4632       19663 :                                 delete = (b == NULL || !b->ttype || !b->batCopiedtodisk || b->batCount == 0);
    4633       19663 :                                 assert(b == NULL || b->batCount > 0 || b->theap->free == 0);
    4634       19663 :                                 if (!delete) {
    4635       19661 :                                         if (b->ttype == TYPE_str) {
    4636        5269 :                                                 switch (b->twidth) {
    4637        3068 :                                                 case 1:
    4638        3068 :                                                         delete = strcmp(p + 1, "tail1") != 0;
    4639        3068 :                                                         break;
    4640        1824 :                                                 case 2:
    4641        1824 :                                                         delete = strcmp(p + 1, "tail2") != 0;
    4642        1824 :                                                         break;
    4643             : #if SIZEOF_VAR_T == 8
    4644         377 :                                                 case 4:
    4645         377 :                                                         delete = strcmp(p + 1, "tail4") != 0;
    4646         377 :                                                         break;
    4647             : #endif
    4648           0 :                                                 default:
    4649           0 :                                                         delete = strcmp(p + 1, "tail") != 0;
    4650           0 :                                                         break;
    4651             :                                                 }
    4652             :                                         } else {
    4653       14392 :                                                 delete = strcmp(p + 1, "tail") != 0;
    4654             :                                         }
    4655             :                                 }
    4656        6457 :                         } else if (strncmp(p + 1, "theap", 5) == 0) {
    4657        5537 :                                 BAT *b = getdesc(bid);
    4658        5537 :                                 delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk || b->tvheap->free == 0);
    4659         920 :                         } else if (strncmp(p + 1, "thashl", 6) == 0 ||
    4660         461 :                                    strncmp(p + 1, "thashb", 6) == 0) {
    4661             : #ifdef PERSISTENTHASH
    4662         918 :                                 BAT *b = getdesc(bid);
    4663         918 :                                 delete = b == NULL;
    4664         918 :                                 if (!delete)
    4665         918 :                                         b->thash = (Hash *) 1;
    4666             : #else
    4667             :                                 delete = true;
    4668             : #endif
    4669           2 :                         } else if (strncmp(p + 1, "thash", 5) == 0) {
    4670             :                                 /* older versions used .thash which we
    4671             :                                  * can simply ignore */
    4672             :                                 delete = true;
    4673           2 :                         } else if (strncmp(p + 1, "thsh", 4) == 0) {
    4674             :                                 /* temporary hash files which we can
    4675             :                                  * simply ignore */
    4676             :                                 delete = true;
    4677           2 :                         } else if (strncmp(p + 1, "timprints", 9) == 0) {
    4678           0 :                                 BAT *b = getdesc(bid);
    4679           0 :                                 delete = b == NULL;
    4680           0 :                                 if (!delete)
    4681           0 :                                         b->timprints = (Imprints *) 1;
    4682           2 :                         } else if (strncmp(p + 1, "torderidx", 9) == 0) {
    4683             : #ifdef PERSISTENTIDX
    4684           0 :                                 BAT *b = getdesc(bid);
    4685           0 :                                 delete = b == NULL;
    4686           0 :                                 if (!delete)
    4687           0 :                                         b->torderidx = (Heap *) 1;
    4688             : #else
    4689             :                                 delete = true;
    4690             : #endif
    4691           2 :                         } else if (strncmp(p + 1, "tstrimps", 8) == 0) {
    4692           2 :                                 BAT *b = getdesc(bid);
    4693           2 :                                 delete = b == NULL;
    4694           2 :                                 if (!delete)
    4695           2 :                                         b->tstrimps = (Strimps *)1;
    4696           0 :                         } else if (strncmp(p + 1, "new", 3) != 0) {
    4697       26120 :                                 ok = false;
    4698             :                         }
    4699             :                 }
    4700       26120 :                 if (!ok) {
    4701             :                         /* found an unknown file; stop pruning in this
    4702             :                          * subdir */
    4703           0 :                         fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
    4704           0 :                         break;
    4705             :                 }
    4706       26120 :                 if (delete) {
    4707           2 :                         if (MT_remove(fullname) != 0 && errno != ENOENT) {
    4708           0 :                                 GDKsyserror("remove(%s)", fullname);
    4709           0 :                                 continue;
    4710             :                         }
    4711       32586 :                         TRC_DEBUG(IO_, "remove(%s) = 0\n", fullname);
    4712             :                 }
    4713             :         }
    4714        1614 :         closedir(dirp);
    4715        1614 :         return false;
    4716             : }
    4717             : 
    4718             : void
    4719         334 : gdk_bbp_reset(void)
    4720             : {
    4721         334 :         int i;
    4722             : 
    4723         334 :         BBP_free = 0;
    4724         334 :         BBP_nfree = 0;
    4725         675 :         while (BBPlimit > 0) {
    4726         341 :                 BBPlimit -= BBPINIT;
    4727         341 :                 assert(BBPlimit >= 0);
    4728         341 :                 GDKfree(BBP[BBPlimit >> BBPINITLOG]);
    4729         341 :                 BBP[BBPlimit >> BBPINITLOG] = NULL;
    4730             :         }
    4731         334 :         ATOMIC_SET(&BBPsize, 0);
    4732       11022 :         for (i = 0; i < MAXFARMS; i++)
    4733       10688 :                 GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */
    4734         334 :         memset(BBPfarms, 0, sizeof(BBPfarms));
    4735         334 :         memset(BBP_hash, 0, sizeof(BBP_hash));
    4736             : 
    4737         334 :         locked_by = 0;
    4738         334 :         BBPunloadCnt = 0;
    4739         334 :         backup_files = 0;
    4740         334 :         backup_dir = 0;
    4741         334 :         backup_subdir = 0;
    4742         334 : }
    4743             : 
    4744             : static MT_Lock GDKCallbackListLock = MT_LOCK_INITIALIZER(GDKCallbackListLock);
    4745             : 
    4746             : static struct {
    4747             :         int cnt;
    4748             :         gdk_callback *head;
    4749             : } callback_list = {
    4750             :         .cnt = 0,
    4751             :         .head = NULL,
    4752             : };
    4753             : 
    4754             : /*
    4755             :  * @- Add a callback
    4756             :  * Adds new callback to the callback list.
    4757             :  */
    4758             : gdk_return
    4759           0 : gdk_add_callback(char *name, gdk_callback_func *f, int argc, void *argv[], int
    4760             :                 interval)
    4761             : {
    4762             : 
    4763           0 :         gdk_callback *callback = NULL;
    4764             : 
    4765           0 :         if (!(callback = GDKmalloc(sizeof(gdk_callback) + sizeof(void *) * argc))) {
    4766           0 :                 TRC_CRITICAL(GDK, "Failed to allocate memory!");
    4767           0 :                 return GDK_FAIL;
    4768             :         }
    4769             : 
    4770           0 :         *callback = (gdk_callback) {
    4771             :                 .name = name,
    4772             :                 .argc = argc,
    4773             :                 .interval = interval,
    4774             :                 .func = f,
    4775             :         };
    4776             : 
    4777           0 :         for (int i=0; i < argc; i++) {
    4778           0 :                 callback->argv[i] = argv[i];
    4779             :         }
    4780             : 
    4781           0 :         MT_lock_set(&GDKCallbackListLock);
    4782           0 :         gdk_callback *p = callback_list.head;
    4783           0 :         if (p) {
    4784             :                 int cnt = 1;
    4785           0 :                 do {
    4786             :                         /* check if already added */
    4787           0 :                         if (strcmp(callback->name, p->name) == 0) {
    4788           0 :                                 MT_lock_unset(&GDKCallbackListLock);
    4789           0 :                                 GDKfree(callback);
    4790           0 :                                 return GDK_FAIL;
    4791             :                         }
    4792           0 :                         if (p->next == NULL) {
    4793           0 :                                 p->next = callback;
    4794           0 :                                 p = callback->next;
    4795             :                         } else {
    4796             :                                 p = p->next;
    4797             :                         }
    4798           0 :                         cnt += 1;
    4799           0 :                 } while(p);
    4800           0 :                 callback_list.cnt = cnt;
    4801             :         } else {
    4802           0 :                 callback_list.cnt = 1;
    4803           0 :                 callback_list.head = callback;
    4804             :         }
    4805           0 :         MT_lock_unset(&GDKCallbackListLock);
    4806           0 :         return GDK_SUCCEED;
    4807             : }
    4808             : 
    4809             : /*
    4810             :  * @- Remove a callback
    4811             :  * Removes a callback from the callback list with a given name as an argument.
    4812             :  */
    4813             : gdk_return
    4814           0 : gdk_remove_callback(char *cb_name, gdk_callback_func *argsfree)
    4815             : {
    4816           0 :         gdk_callback *prev = NULL;
    4817           0 :         gdk_return res = GDK_FAIL;
    4818             : 
    4819           0 :         MT_lock_set(&GDKCallbackListLock);
    4820           0 :         gdk_callback *curr = callback_list.head;
    4821           0 :         while(curr) {
    4822           0 :                 if (strcmp(cb_name, curr->name) == 0) {
    4823           0 :                         if (curr == callback_list.head && prev == NULL) {
    4824           0 :                                 callback_list.head = curr->next;
    4825             :                         } else {
    4826           0 :                                 prev->next = curr->next;
    4827             :                         }
    4828           0 :                         if (argsfree)
    4829           0 :                                 argsfree(curr->argc, curr->argv);
    4830           0 :                         GDKfree(curr);
    4831           0 :                         curr = NULL;
    4832           0 :                         callback_list.cnt -=1;
    4833           0 :                         res = GDK_SUCCEED;
    4834             :                 } else {
    4835           0 :                         prev = curr;
    4836           0 :                         curr = curr->next;
    4837             :                 }
    4838             :         }
    4839           0 :         MT_lock_unset(&GDKCallbackListLock);
    4840           0 :         return res;
    4841             : }
    4842             : 
    4843             : static gdk_return
    4844           0 : do_callback(gdk_callback *cb)
    4845             : {
    4846           0 :         cb->last_called = GDKusec();
    4847           0 :         return cb->func(cb->argc, cb->argv);
    4848             : }
    4849             : 
    4850             : static bool
    4851           0 : should_call(gdk_callback *cb)
    4852             : {
    4853           0 :         if (cb->last_called && cb->interval) {
    4854           0 :                 return (cb->last_called + cb->interval * 1000 * 1000) <
    4855           0 :                         GDKusec();
    4856             :         }
    4857             :         return true;
    4858             : }
    4859             : 
    4860             : static void
    4861         113 : BBPcallbacks(void)
    4862             : {
    4863         113 :         MT_lock_set(&GDKCallbackListLock);
    4864         113 :         gdk_callback *next = callback_list.head;
    4865             : 
    4866         113 :         while (next) {
    4867           0 :                 if(should_call(next))
    4868           0 :                         do_callback(next);
    4869           0 :                 next = next->next;
    4870             :         }
    4871         113 :         MT_lock_unset(&GDKCallbackListLock);
    4872         113 : }
    4873             : 
    4874             : /* GDKtmLock protects all accesses and changes to BAKDIR and SUBDIR.
    4875             :  * MUST use BBPtmlock()/BBPtmunlock() to set/unset the lock.
    4876             :  *
    4877             :  * This is at the end of the file on purpose: we don't want people to
    4878             :  * accidentally use GDKtmLock directly. */
    4879             : static MT_Lock GDKtmLock = MT_LOCK_INITIALIZER(GDKtmLock);
    4880             : static int lockfd;
    4881             : 
    4882             : static void
    4883      115169 : BBPtmlockFinish(void)
    4884             : {
    4885      115169 :         if (!GDKinmemory(0) &&
    4886             :             /* also use an external lock file to synchronize with
    4887             :              * external programs */
    4888      115169 :             (lockfile != NULL ||
    4889         335 :              (lockfile = GDKfilepath(0, NULL, ".tm_lock", NULL)) != NULL)) {
    4890      115169 :                     lockfd = MT_lockf(lockfile, F_LOCK);
    4891             :         }
    4892      115169 : }
    4893             : 
    4894             : void
    4895      115169 : BBPtmlock(void)
    4896             : {
    4897      115169 :         MT_lock_set(&GDKtmLock);
    4898      115169 :         BBPtmlockFinish();
    4899      115169 : }
    4900             : 
    4901             : void
    4902      115169 : BBPtmunlock(void)
    4903             : {
    4904      115169 :         if (lockfile && lockfd >= 0) {
    4905      115169 :                 assert(!GDKinmemory(0));
    4906      115169 :                 MT_lockf(lockfile, F_ULOCK);
    4907      115169 :                 close(lockfd);
    4908      115169 :                 lockfd = -1;
    4909             :         }
    4910      115169 :         MT_lock_unset(&GDKtmLock);
    4911      115169 : }
    4912             : 
    4913             : void
    4914         114 : BBPprintinfo(void)
    4915             : {
    4916             :         /* 32 categories for the bats, not all are expected to be filled */
    4917         114 :         struct counters {
    4918             :                 size_t sz;
    4919             :                 size_t vmsz;
    4920             :                 int nr;
    4921         114 :         } bats[2][2][2][2][2] = {0};
    4922         114 :         int nbats = 0;
    4923             : 
    4924         114 :         BBPtmlock();
    4925         114 :         bat sz = (bat) ATOMIC_GET(&BBPsize);
    4926      351024 :         for (bat i = 1; i < sz; i++) {
    4927      350910 :                 MT_lock_set(&GDKswapLock(i));
    4928      350910 :                 int r;
    4929      350910 :                 if ((r = BBP_refs(i)) > 0 || BBP_lrefs(i) > 0) {
    4930      310468 :                         BAT *b = BBP_desc(i);
    4931      310468 :                         if (b != NULL) {
    4932      310468 :                                 nbats++;
    4933      310468 :                                 MT_lock_set(&b->theaplock);
    4934      310468 :                                 ATOMIC_BASE_TYPE status = BBP_status(i);
    4935      310468 :                                 struct counters *bt = &bats[r > 0][BATdirty(b)][(status & BBPPERSISTENT) != 0][(status & BBPLOADED) != 0][(status & BBPHOT) != 0];
    4936      310468 :                                 bt->nr++;
    4937      310468 :                                 if (b->theap && b->batCacheid == b->theap->parentid) {
    4938      310468 :                                         bt->sz += HEAPmemsize(b->theap);
    4939      310468 :                                         bt->vmsz += HEAPvmsize(b->theap);
    4940             :                                 }
    4941      310468 :                                 if (b->tvheap && b->batCacheid == b->tvheap->parentid) {
    4942       10838 :                                         bt->sz += HEAPmemsize(b->tvheap);
    4943       10838 :                                         bt->vmsz += HEAPvmsize(b->tvheap);
    4944             :                                 }
    4945      310468 :                                 MT_lock_unset(&b->theaplock);
    4946             :                         }
    4947             :                 }
    4948      350910 :                 MT_lock_unset(&GDKswapLock(i));
    4949             :         }
    4950         114 :         uint32_t nfree = BBP_nfree;
    4951         114 :         BBPtmunlock();
    4952         114 :         if (bats[1][1][1][1][1].nr > 0)
    4953         114 :                 printf("fix, dirty, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][1][1].nr, bats[1][1][1][1][1].vmsz, bats[1][1][1][1][1].sz);
    4954         114 :         if (bats[1][1][1][1][0].nr > 0)
    4955           0 :                 printf("fix, dirty, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][1][0].nr, bats[1][1][1][1][0].vmsz, bats[1][1][1][1][0].sz);
    4956         114 :         if (bats[1][1][1][0][1].nr > 0)
    4957           0 :                 printf("fix, dirty, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][0][1].nr, bats[1][1][1][0][1].vmsz, bats[1][1][1][0][1].sz);
    4958         114 :         if (bats[1][1][1][0][0].nr > 0)
    4959           0 :                 printf("fix, dirty, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][0][0].nr, bats[1][1][1][0][0].vmsz, bats[1][1][1][0][0].sz);
    4960         114 :         if (bats[1][1][0][1][1].nr > 0)
    4961         114 :                 printf("fix, dirty, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][1][1].nr, bats[1][1][0][1][1].vmsz, bats[1][1][0][1][1].sz);
    4962         114 :         if (bats[1][1][0][1][0].nr > 0)
    4963           1 :                 printf("fix, dirty, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][1][0].nr, bats[1][1][0][1][0].vmsz, bats[1][1][0][1][0].sz);
    4964         114 :         if (bats[1][1][0][0][1].nr > 0)
    4965           0 :                 printf("fix, dirty, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][0][1].nr, bats[1][1][0][0][1].vmsz, bats[1][1][0][0][1].sz);
    4966         114 :         if (bats[1][1][0][0][0].nr > 0)
    4967           0 :                 printf("fix, dirty, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][0][0].nr, bats[1][1][0][0][0].vmsz, bats[1][1][0][0][0].sz);
    4968         114 :         if (bats[1][0][1][1][1].nr > 0)
    4969         106 :                 printf("fix, clean, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][1][1].nr, bats[1][0][1][1][1].vmsz, bats[1][0][1][1][1].sz);
    4970         114 :         if (bats[1][0][1][1][0].nr > 0)
    4971           0 :                 printf("fix, clean, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][1][0].nr, bats[1][0][1][1][0].vmsz, bats[1][0][1][1][0].sz);
    4972         114 :         if (bats[1][0][1][0][1].nr > 0)
    4973           0 :                 printf("fix, clean, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][0][1].nr, bats[1][0][1][0][1].vmsz, bats[1][0][1][0][1].sz);
    4974         114 :         if (bats[1][0][1][0][0].nr > 0)
    4975           0 :                 printf("fix, clean, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][0][0].nr, bats[1][0][1][0][0].vmsz, bats[1][0][1][0][0].sz);
    4976         114 :         if (bats[1][0][0][1][1].nr > 0)
    4977           0 :                 printf("fix, clean, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][1][1].nr, bats[1][0][0][1][1].vmsz, bats[1][0][0][1][1].sz);
    4978         114 :         if (bats[1][0][0][1][0].nr > 0)
    4979           0 :                 printf("fix, clean, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][1][0].nr, bats[1][0][0][1][0].vmsz, bats[1][0][0][1][0].sz);
    4980         114 :         if (bats[1][0][0][0][1].nr > 0)
    4981           0 :                 printf("fix, clean, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][0][1].nr, bats[1][0][0][0][1].vmsz, bats[1][0][0][0][1].sz);
    4982         114 :         if (bats[1][0][0][0][0].nr > 0)
    4983           0 :                 printf("fix, clean, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][0][0].nr, bats[1][0][0][0][0].vmsz, bats[1][0][0][0][0].sz);
    4984         114 :         if (bats[0][1][1][1][1].nr > 0)
    4985         107 :                 printf("no fix, dirty, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][1][1].nr, bats[0][1][1][1][1].vmsz, bats[0][1][1][1][1].sz);
    4986         114 :         if (bats[0][1][1][1][0].nr > 0)
    4987          33 :                 printf("no fix, dirty, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][1][0].nr, bats[0][1][1][1][0].vmsz, bats[0][1][1][1][0].sz);
    4988         114 :         if (bats[0][1][1][0][1].nr > 0)
    4989           0 :                 printf("no fix, dirty, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][0][1].nr, bats[0][1][1][0][1].vmsz, bats[0][1][1][0][1].sz);
    4990         114 :         if (bats[0][1][1][0][0].nr > 0)
    4991           2 :                 printf("no fix, dirty, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][0][0].nr, bats[0][1][1][0][0].vmsz, bats[0][1][1][0][0].sz);
    4992         114 :         if (bats[0][1][0][1][1].nr > 0)
    4993          87 :                 printf("no fix, dirty, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][1][1].nr, bats[0][1][0][1][1].vmsz, bats[0][1][0][1][1].sz);
    4994         114 :         if (bats[0][1][0][1][0].nr > 0)
    4995          19 :                 printf("no fix, dirty, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][1][0].nr, bats[0][1][0][1][0].vmsz, bats[0][1][0][1][0].sz);
    4996         114 :         if (bats[0][1][0][0][1].nr > 0)
    4997           0 :                 printf("no fix, dirty, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][0][1].nr, bats[0][1][0][0][1].vmsz, bats[0][1][0][0][1].sz);
    4998         114 :         if (bats[0][1][0][0][0].nr > 0)
    4999          16 :                 printf("no fix, dirty, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][0][0].nr, bats[0][1][0][0][0].vmsz, bats[0][1][0][0][0].sz);
    5000         114 :         if (bats[0][0][1][1][1].nr > 0)
    5001         113 :                 printf("no fix, clean, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][1][1].nr, bats[0][0][1][1][1].vmsz, bats[0][0][1][1][1].sz);
    5002         114 :         if (bats[0][0][1][1][0].nr > 0)
    5003          25 :                 printf("no fix, clean, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][1][0].nr, bats[0][0][1][1][0].vmsz, bats[0][0][1][1][0].sz);
    5004         114 :         if (bats[0][0][1][0][1].nr > 0)
    5005           0 :                 printf("no fix, clean, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][0][1].nr, bats[0][0][1][0][1].vmsz, bats[0][0][1][0][1].sz);
    5006         114 :         if (bats[0][0][1][0][0].nr > 0)
    5007          18 :                 printf("no fix, clean, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][0][0].nr, bats[0][0][1][0][0].vmsz, bats[0][0][1][0][0].sz);
    5008         114 :         if (bats[0][0][0][1][1].nr > 0)
    5009           4 :                 printf("no fix, clean, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][1][1].nr, bats[0][0][0][1][1].vmsz, bats[0][0][0][1][1].sz);
    5010         114 :         if (bats[0][0][0][1][0].nr > 0)
    5011           1 :                 printf("no fix, clean, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][1][0].nr, bats[0][0][0][1][0].vmsz, bats[0][0][0][1][0].sz);
    5012         114 :         if (bats[0][0][0][0][1].nr > 0)
    5013           0 :                 printf("no fix, clean, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][0][1].nr, bats[0][0][0][0][1].vmsz, bats[0][0][0][0][1].sz);
    5014         114 :         if (bats[0][0][0][0][0].nr > 0)
    5015           0 :                 printf("no fix, clean, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][0][0].nr, bats[0][0][0][0][0].vmsz, bats[0][0][0][0][0].sz);
    5016             : 
    5017         114 :         printf("%d bats total, %d in use, %"PRIu32" free bats in common shared list\n",
    5018             :                sz - 1, nbats, nfree);
    5019         114 : }

Generated by: LCOV version 1.14