Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. J. Nes
15 : * @* BAT Buffer Pool (BBP)
16 : * The BATs created and loaded are collected in a BAT buffer pool.
17 : * The Bat Buffer Pool has a number of functions:
18 : * @table @code
19 : *
20 : * @item administration and lookup
21 : * The BBP is a directory which contains status information about all
22 : * known BATs. This interface may be used very heavily, by
23 : * data-intensive applications. To eliminate all overhead, read-only
24 : * access to the BBP may be done by table-lookups. The integer index
25 : * type for these lookups is @emph{bat}, as retrieved by
26 : * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil
27 : * bat.
28 : *
29 : * @item persistence
30 : * The BBP is made persistent by saving it to the dictionary file
31 : * called @emph{BBP.dir} in the database.
32 : *
33 : * When the number of BATs rises, having all files in one directory
34 : * becomes a bottleneck. The BBP therefore implements a scheme that
35 : * distributes all BATs in a growing directory tree with at most 64
36 : * BATs stored in one node.
37 : *
38 : * @item buffer management
39 : * The BBP is responsible for loading and saving of BATs to disk. It
40 : * also contains routines to unload BATs from memory when memory
41 : * resources get scarce. For this purpose, it administers BAT memory
42 : * reference counts (to know which BATs can be unloaded) and BAT usage
43 : * statistics (it unloads the least recently used BATs).
44 : *
45 : * @item recovery
46 : * When the database is closed or during a run-time syncpoint, the
47 : * system tables must be written to disk in a safe way, that is immune
48 : * for system failures (like disk full). To do so, the BBP implements
49 : * an atomic commit and recovery protocol: first all files to be
50 : * overwritten are moved to a BACKUP/ dir. If that succeeds, the
51 : * writes are done. If that also fully succeeds the BACKUP/ dir is
52 : * renamed to DELETE_ME/ and subsequently deleted. If not, all files
53 : * in BACKUP/ are moved back to their original location.
54 : *
55 : * @item unloading
56 : * Bats which have a logical reference (ie. a lrefs > 0) but no memory
57 : * reference (refcnt == 0) can be unloaded. Unloading dirty bats
58 : * means, moving the original (committed version) to the BACKUP/ dir
59 : * and saving the bat. This complicates the commit and recovery/abort
60 : * issues. The commit has to check if the bat is already moved. And
61 : * The recovery has to always move back the files from the BACKUP/
62 : * dir.
63 : *
64 : * @item reference counting
65 : * Bats use have two kinds of references: logical and physical
66 : * (pointer) ones. The logical references are administered by
67 : * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix.
68 : */
69 :
70 : #include "monetdb_config.h"
71 : #include "gdk.h"
72 : #include "gdk_private.h"
73 : #include "mutils.h"
74 : #ifdef HAVE_FCNTL_H
75 : #include <fcntl.h>
76 : #endif
77 :
78 : #ifndef F_OK
79 : #define F_OK 0
80 : #endif
81 : #ifndef S_ISDIR
82 : #define S_ISDIR(mode) (((mode) & _S_IFMT) == _S_IFDIR)
83 : #endif
84 : #ifndef O_CLOEXEC
85 : #ifdef _O_NOINHERIT
86 : #define O_CLOEXEC _O_NOINHERIT /* Windows */
87 : #else
88 : #define O_CLOEXEC 0
89 : #endif
90 : #endif
91 : #ifndef O_BINARY
92 : #define O_BINARY 0
93 : #endif
94 :
95 : /*
96 : * The BBP has a fixed address, so re-allocation due to a growing BBP
97 : * caused by one thread does not disturb reads to the old entries by
98 : * another. This is implemented using anonymous virtual memory;
99 : * extensions on the same address are guaranteed because a large
100 : * non-committed VM area is requested initially. New slots in the BBP
101 : * are found in O(1) by keeping a freelist that uses the 'next' field
102 : * in the BBPrec records.
103 : */
104 : BBPrec *BBP[N_BBPINIT]; /* fixed base VM address of BBP array */
105 : bat BBPlimit = 0; /* current committed VM BBP array */
106 : static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */
107 :
108 : struct BBPfarm_t BBPfarms[MAXFARMS];
109 :
110 : #define KITTENNAP 1 /* used to suspend processing */
111 : #define BBPNONAME "." /* filler for no name in BBP.dir */
112 : /*
113 : * The hash index uses a bucket index (int array) of size mask that is
114 : * tuned for perfect hashing (1 lookup). The bucket chain uses the
115 : * 'next' field in the BBPrec records.
116 : */
117 : static MT_Lock BBPnameLock = MT_LOCK_INITIALIZER(BBPnameLock);
118 : #define BBP_mask 1023 /* number of buckets = & mask */
119 : static bat BBP_hash[BBP_mask+1]; /* BBP logical name hash buckets */
120 : static MT_Lock GDKcacheLock = MT_LOCK_INITIALIZER(GDKcacheLock);
121 : static bat BBP_free;
122 : static uint32_t BBP_nfree;
123 : #define BBP_FREE_LOWATER 10
124 : #define BBP_FREE_HIWATER 50
125 :
126 : static gdk_return BBPfree(BAT *b);
127 : static void BBPdestroy(BAT *b);
128 : static void BBPuncacheit(bat bid, bool unloaddesc);
129 : static gdk_return BBPprepare(bool subcommit);
130 : static BAT *getBBPdescriptor(bat i);
131 : static gdk_return BBPbackup(BAT *b, bool subcommit);
132 : static gdk_return BBPdir_init(void);
133 : static void BBPcallbacks(void);
134 :
135 : /* two lngs of extra info in BBP.dir */
136 : /* these two are atomic because of their use in log_new() */
137 : static ATOMIC_TYPE BBPlogno = ATOMIC_VAR_INIT(0);
138 : static ATOMIC_TYPE BBPtransid = ATOMIC_VAR_INIT(0);
139 :
140 : #define BBPtmpcheck(s) (strncmp(s, "tmp_", 4) == 0)
141 :
142 : #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
143 :
144 : #define BATno_shared_heap(b) \
145 : (!VIEWtparent(b) && (ATOMIC_GET(&(b)->theap->refs) & HEAPREFS) == 1)
146 :
147 : #define BATshared(b) \
148 : ((!VIEWtparent(b) && (ATOMIC_GET(&(b)->theap->refs) & HEAPREFS) > 1) || \
149 : ((b)->tvheap && !VIEWvtparent(b) && (ATOMIC_GET(&(b)->tvheap->refs) & HEAPREFS) > 1))
150 :
151 : static void
152 26916 : BBP_insert(bat i)
153 : {
154 26916 : bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
155 :
156 26916 : BBP_next(i) = BBP_hash[idx];
157 26916 : BBP_hash[idx] = i;
158 26916 : }
159 :
160 : static void
161 14960 : BBP_delete(bat i)
162 : {
163 14960 : const char *s = BBP_logical(i);
164 14960 : bat idx = (bat) (strHash(s) & BBP_mask);
165 :
166 14960 : for (bat *h = &BBP_hash[idx]; (i = *h) != 0; h = &BBP_next(i)) {
167 14960 : if (strcmp(BBP_logical(i), s) == 0) {
168 14960 : *h = BBP_next(i);
169 14960 : break;
170 : }
171 : }
172 14960 : }
173 :
174 : bat
175 418055821 : getBBPsize(void)
176 : {
177 418055821 : return (bat) ATOMIC_GET(&BBPsize);
178 : }
179 :
180 : lng
181 368 : getBBPlogno(void)
182 : {
183 368 : return (lng) ATOMIC_GET(&BBPlogno);
184 : }
185 :
186 : lng
187 368 : getBBPtransid(void)
188 : {
189 368 : return (lng) ATOMIC_GET(&BBPtransid);
190 : }
191 :
192 :
193 : /*
194 : * @+ BBP Consistency and Concurrency
195 : * While GDK provides the basic building blocks for an ACID system, in
196 : * itself it is not such a system, as we this would entail too much
197 : * overhead that is often not needed. Hence, some consistency control
198 : * is left to the user. The first important user constraint is that if
199 : * a user updates a BAT, (s)he himself must assure that no-one else
200 : * accesses this BAT.
201 : *
202 : * Concerning buffer management, the BBP carries out a swapping
203 : * policy. BATs are kept in memory till the memory is full. If the
204 : * memory is full, the malloc functions initiate BBP trim actions,
205 : * that unload the coldest BATs that have a zero reference count. The
206 : * second important user constraint is therefore that a user may only
207 : * manipulate live BAT data in memory if it is sure that there is at
208 : * least one reference count to that BAT.
209 : *
210 : * The main BBP array is protected by two locks:
211 : * @table @code
212 : * @item GDKcacheLock]
213 : * this lock guards the free slot management in the BBP array. The
214 : * BBP operations that allocate a new slot for a new BAT
215 : * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed
216 : * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold
217 : * this lock. It also protects all BAT (re)naming actions include
218 : * (read and write) in the hash table with BAT names.
219 : * @item GDKswapLock
220 : * this lock guards the swap (loaded/unloaded) status of the
221 : * BATs. Hence, all BBP routines that influence the swapping policy,
222 : * or actually carry out the swapping policy itself, acquire this lock
223 : * (e.g. @emph{BBPfix},@emph{BBPunfix}). Note that this also means
224 : * that updates to the BBP_status indicator array must be protected by
225 : * GDKswapLock.
226 : *
227 : * To reduce contention GDKswapLock was split into multiple locks; it
228 : * is now an array of lock pointers which is accessed by
229 : * GDKswapLock(bat)
230 : * @end table
231 : *
232 : * Routines that need both locks should first acquire the locks in the
233 : * GDKswapLock array (in ascending order) and then GDKcacheLock (and
234 : * release them in reverse order).
235 : *
236 : * To obtain maximum speed, read operations to existing elements in
237 : * the BBP are unguarded. As said, it is the users responsibility that
238 : * the BAT that is being read is not being modified. BBP update
239 : * actions that modify the BBP data structure itself are locked by the
240 : * BBP functions themselves. Hence, multiple concurrent BBP read
241 : * operations may be ongoing while at the same time at most one BBP
242 : * write operation @strong{on a different BAT} is executing. This
243 : * holds for accesses to the public (quasi-) arrays @emph{BBPcache},
244 : * @emph{BBPstatus} and @emph{BBPrefs}.
245 : * These arrays are called quasi as now they are
246 : * actually stored together in one big BBPrec array called BBP, that
247 : * is allocated in anonymous VM space, so we can reallocate this
248 : * structure without changing the base address (a crucial feature if
249 : * read actions are to go on unlocked while other entries in the BBP
250 : * may be modified).
251 : */
252 : static volatile MT_Id locked_by = 0;
253 :
254 : /* use a lock instead of atomic instructions so that we wait for
255 : * BBPlock/BBPunlock */
256 : #define BBP_unload_inc() \
257 : do { \
258 : MT_lock_set(&GDKunloadLock); \
259 : BBPunloadCnt++; \
260 : MT_lock_unset(&GDKunloadLock); \
261 : } while (0)
262 :
263 : #define BBP_unload_dec() \
264 : do { \
265 : MT_lock_set(&GDKunloadLock); \
266 : --BBPunloadCnt; \
267 : assert(BBPunloadCnt >= 0); \
268 : MT_lock_unset(&GDKunloadLock); \
269 : } while (0)
270 :
271 : static int BBPunloadCnt = 0;
272 : static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER(GDKunloadLock);
273 :
274 : void
275 16 : BBPlock(void)
276 : {
277 16 : int i;
278 :
279 : /* wait for all pending unloads to finish */
280 16 : MT_lock_set(&GDKunloadLock);
281 16 : while (BBPunloadCnt > 0) {
282 0 : MT_lock_unset(&GDKunloadLock);
283 0 : MT_sleep_ms(1);
284 16 : MT_lock_set(&GDKunloadLock);
285 : }
286 :
287 16 : BBPtmlock();
288 16 : MT_lock_set(&GDKcacheLock);
289 131104 : for (i = 0; i <= BBP_BATMASK; i++)
290 131072 : MT_lock_set(&GDKswapLock(i));
291 16 : locked_by = MT_getpid();
292 :
293 16 : MT_lock_unset(&GDKunloadLock);
294 16 : }
295 :
296 : void
297 16 : BBPunlock(void)
298 : {
299 16 : int i;
300 :
301 131088 : for (i = BBP_BATMASK; i >= 0; i--)
302 131072 : MT_lock_unset(&GDKswapLock(i));
303 16 : MT_lock_unset(&GDKcacheLock);
304 16 : locked_by = 0;
305 16 : BBPtmunlock();
306 16 : }
307 :
308 : int
309 11705017 : BBPselectfarm(role_t role, int type, enum heaptype hptype)
310 : {
311 11705017 : int i;
312 :
313 11705017 : (void) type; /* may use in future */
314 11705017 : (void) hptype; /* may use in future */
315 :
316 11705017 : if (GDKinmemory(0))
317 : return 0;
318 :
319 : #ifndef PERSISTENTHASH
320 : if (hptype == hashheap)
321 : role = TRANSIENT;
322 : #endif
323 : #ifndef PERSISTENTIDX
324 : if (hptype == orderidxheap)
325 : role = TRANSIENT;
326 : #endif
327 23044660 : for (i = 0; i < MAXFARMS; i++)
328 23044660 : if (BBPfarms[i].roles & (1U << (int) role))
329 11684846 : return i;
330 : /* must be able to find farms for TRANSIENT and PERSISTENT */
331 0 : assert(role != TRANSIENT && role != PERSISTENT);
332 : return -1;
333 : }
334 :
335 : static gdk_return
336 343 : BBPextend(bat newsize)
337 : {
338 343 : if (newsize > N_BBPINIT * BBPINIT) {
339 0 : GDKerror("trying to extend BAT pool beyond the "
340 : "limit (%d)\n", N_BBPINIT * BBPINIT);
341 0 : return GDK_FAIL;
342 : }
343 :
344 : /* make sure the new size is at least BBPsize large */
345 686 : while (BBPlimit < newsize) {
346 343 : BUN limit = BBPlimit >> BBPINITLOG;
347 343 : assert(BBP[limit] == NULL);
348 343 : BBP[limit] = GDKzalloc(BBPINIT * sizeof(BBPrec));
349 343 : if (BBP[limit] == NULL) {
350 0 : GDKerror("failed to extend BAT pool\n");
351 0 : return GDK_FAIL;
352 : }
353 5620055 : for (BUN i = 0; i < BBPINIT; i++) {
354 5619712 : ATOMIC_INIT(&BBP[limit][i].status, 0);
355 5619712 : BBP[limit][i].pid = ~(MT_Id)0;
356 : }
357 343 : BBPlimit += BBPINIT;
358 : }
359 :
360 : return GDK_SUCCEED;
361 : }
362 :
363 : static gdk_return
364 112 : recover_dir(int farmid, bool direxists)
365 : {
366 112 : if (direxists) {
367 : /* just try; don't care about these non-vital files */
368 0 : if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
369 0 : GDKwarning("unlink of BBP.bak failed\n");
370 0 : if (GDKmove(farmid, BATDIR, "BBP", "dir", BATDIR, "BBP", "bak", false) != GDK_SUCCEED)
371 0 : GDKwarning("rename of BBP.dir to BBP.bak failed\n");
372 : }
373 112 : return GDKmove(farmid, BAKDIR, "BBP", "dir", BATDIR, "BBP", "dir", true);
374 : }
375 :
376 : static gdk_return BBPrecover(int farmid);
377 : static gdk_return BBPrecover_subdir(void);
378 : static bool BBPdiskscan(const char *, size_t);
379 :
380 : static int
381 8078 : vheapinit(BAT *b, const char *buf, unsigned bbpversion, const char *filename, int lineno)
382 : {
383 8078 : int n = 0;
384 8078 : uint64_t free, size;
385 8078 : uint16_t storage;
386 :
387 8078 : (void) bbpversion; /* could be used to implement compatibility */
388 :
389 8078 : size = 0; /* for GDKLIBRARY_HSIZE case */
390 8078 : storage = STORE_INVALID; /* for GDKLIBRARY_HSIZE case */
391 16156 : if (bbpversion <= GDKLIBRARY_HSIZE ?
392 0 : sscanf(buf,
393 : " %" SCNu64 " %" SCNu64 " %" SCNu16
394 : "%n",
395 : &free, &size, &storage, &n) < 3 :
396 8078 : sscanf(buf,
397 : " %" SCNu64
398 : "%n",
399 : &free, &n) < 1) {
400 0 : TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
401 0 : return -1;
402 : }
403 8078 : if (b->batCount == 0)
404 2321 : free = 0;
405 8078 : if (b->ttype >= 0 &&
406 7864 : ATOMstorage(b->ttype) == TYPE_str &&
407 7810 : free < GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN)
408 6664 : size = GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY * GDK_VARALIGN;
409 1414 : else if (free < 512)
410 132 : size = 512;
411 : else
412 1282 : size = free;
413 16156 : *b->tvheap = (Heap) {
414 8078 : .free = (size_t) free,
415 8078 : .size = (size_t) size,
416 : .base = NULL,
417 : .storage = STORE_INVALID,
418 : .cleanhash = true,
419 : .newstorage = STORE_INVALID,
420 : .dirty = false,
421 8078 : .parentid = b->batCacheid,
422 8078 : .farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap),
423 8078 : .hasfile = free > 0,
424 : };
425 8078 : strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
426 : filename, ".theap", NULL);
427 8078 : return n;
428 : }
429 :
430 : static int
431 30944 : heapinit(BAT *b, const char *buf,
432 : #ifdef GDKLIBRARY_HASHASH
433 : int *hashash,
434 : #endif
435 : unsigned bbpversion, const char *filename, int lineno)
436 : {
437 30944 : int t;
438 30944 : char type[33];
439 30944 : uint16_t width;
440 30944 : uint16_t var;
441 30944 : uint16_t properties;
442 30944 : uint64_t nokey0;
443 30944 : uint64_t nokey1;
444 30944 : uint64_t nosorted;
445 30944 : uint64_t norevsorted;
446 30944 : uint64_t base;
447 30944 : uint64_t free;
448 30944 : uint64_t size;
449 30944 : uint16_t storage;
450 30944 : uint64_t minpos, maxpos;
451 30944 : int n;
452 :
453 30944 : (void) bbpversion; /* could be used to implement compatibility */
454 :
455 30944 : minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case */
456 30944 : size = 0; /* for GDKLIBRARY_HSIZE case */
457 30944 : storage = STORE_INVALID; /* for GDKLIBRARY_HSIZE case */
458 61888 : if (bbpversion <= GDKLIBRARY_MINMAX_POS ?
459 0 : sscanf(buf,
460 : " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
461 : " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
462 : " %" SCNu64 " %" SCNu64 " %" SCNu16
463 : "%n",
464 : type, &width, &var, &properties, &nokey0,
465 : &nokey1, &nosorted, &norevsorted, &base,
466 : &free, &size, &storage,
467 : &n) < 12 :
468 : bbpversion <= GDKLIBRARY_HSIZE ?
469 0 : sscanf(buf,
470 : " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
471 : " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
472 : " %" SCNu64 " %" SCNu64 " %" SCNu16 " %" SCNu64 " %" SCNu64
473 : "%n",
474 : type, &width, &var, &properties, &nokey0,
475 : &nokey1, &nosorted, &norevsorted, &base,
476 : &free, &size, &storage, &minpos, &maxpos,
477 : &n) < 14 :
478 30944 : sscanf(buf,
479 : " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
480 : " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
481 : " %" SCNu64 " %" SCNu64 " %" SCNu64
482 : "%n",
483 : type, &width, &var, &properties, &nokey0,
484 : &nokey1, &nosorted, &norevsorted, &base,
485 : &free, &minpos, &maxpos,
486 : &n) < 12) {
487 0 : TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
488 0 : return -1;
489 : }
490 :
491 30944 : if (strcmp(type, "wkba") == 0)
492 0 : GDKwarning("type wkba (SQL name: GeometryA) is deprecated\n");
493 :
494 30944 : if (properties & ~0x0F81) {
495 0 : TRC_CRITICAL(GDK, "unknown properties are set: incompatible database on line %d of BBP.dir\n", lineno);
496 0 : return -1;
497 : }
498 : #ifdef GDKLIBRARY_HASHASH
499 30944 : *hashash = var & 2;
500 : #endif
501 30944 : var &= ~2;
502 30944 : if ((t = ATOMindex(type)) < 0) {
503 241 : if ((t = ATOMunknown_find(type)) == 0) {
504 0 : TRC_CRITICAL(GDK, "no space for atom %s", type);
505 0 : return -1;
506 : }
507 38567 : } else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) {
508 0 : TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d on line %d\n", (int) b->batCacheid, lineno);
509 0 : return -1;
510 30703 : } else if (var && t != 0 ?
511 7864 : ATOMsize(t) < width ||
512 7864 : (width != 1 && width != 2 && width != 4
513 : #if SIZEOF_VAR_T == 8
514 54 : && width != 8
515 : #endif
516 : ) :
517 22839 : ATOMsize(t) != width) {
518 0 : TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tsize mismatch for BAT %d on line %d\n", (int) b->batCacheid, lineno);
519 0 : return -1;
520 : }
521 30944 : b->ttype = t;
522 30944 : b->twidth = width;
523 30944 : b->tshift = ATOMelmshift(width);
524 30944 : assert_shift_width(b->tshift,b->twidth);
525 30944 : b->tnokey[0] = (BUN) nokey0;
526 30944 : b->tnokey[1] = (BUN) nokey1;
527 30944 : b->tsorted = (bit) ((properties & 0x0001) != 0);
528 30944 : b->trevsorted = (bit) ((properties & 0x0080) != 0);
529 30944 : b->tkey = (properties & 0x0100) != 0;
530 30944 : b->tnonil = (properties & 0x0400) != 0;
531 30944 : b->tnil = (properties & 0x0800) != 0;
532 30944 : b->tnosorted = (BUN) nosorted;
533 30944 : b->tnorevsorted = (BUN) norevsorted;
534 30944 : b->tunique_est = 0.0;
535 : /* (properties & 0x0200) is the old tdense flag */
536 30944 : b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base;
537 30944 : b->theap->free = (size_t) free;
538 30944 : b->theap->hasfile = free > 0;
539 : /* set heap size to match capacity */
540 30944 : if (b->ttype == TYPE_msk) {
541 : /* round up capacity to multiple of 32 */
542 4716 : b->batCapacity = (b->batCapacity + 31) & ~((BUN) 31);
543 4716 : b->theap->size = b->batCapacity / 8;
544 : } else {
545 26228 : b->theap->size = (size_t) b->batCapacity << b->tshift;
546 : }
547 30944 : b->theap->base = NULL;
548 30944 : settailname(b->theap, filename, t, width);
549 30944 : b->theap->storage = STORE_INVALID;
550 30944 : b->theap->newstorage = STORE_INVALID;
551 30944 : b->theap->farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap);
552 30944 : b->theap->dirty = false;
553 30944 : b->theap->parentid = b->batCacheid;
554 30944 : if (minpos < b->batCount)
555 11738 : b->tminpos = (BUN) minpos;
556 : else
557 19206 : b->tminpos = BUN_NONE;
558 30944 : if (maxpos < b->batCount)
559 11773 : b->tmaxpos = (BUN) maxpos;
560 : else
561 19171 : b->tmaxpos = BUN_NONE;
562 30944 : if (t && var) {
563 8078 : t = vheapinit(b, buf + n, bbpversion, filename, lineno);
564 8078 : if (t < 0)
565 : return t;
566 8078 : n += t;
567 : } else {
568 22866 : b->tvheap = NULL;
569 : }
570 30944 : return n;
571 : }
572 :
573 : /* read a single line from the BBP.dir file (file pointer fp) and fill
574 : * in the structure pointed to by bn and extra information through the
575 : * other pointers; this function does not allocate any memory; return 0
576 : * on end of file, 1 on success, and -1 on failure */
577 : /* set to true during initialization, else always false; if false, do
578 : * not return any options (set pointer to NULL as if there aren't any);
579 : * if true and there are options, return them in freshly allocated
580 : * memory through *options */
581 : static bool return_options = false;
582 : int
583 31284 : BBPreadBBPline(FILE *fp, unsigned bbpversion, int *lineno, BAT *bn,
584 : #ifdef GDKLIBRARY_HASHASH
585 : int *hashash,
586 : #endif
587 : char *batname, char *filename, char **options)
588 : {
589 31284 : char buf[4096];
590 31284 : uint64_t batid;
591 31284 : unsigned int status;
592 31284 : unsigned int properties;
593 31284 : int nread, n;
594 31284 : char *s;
595 31284 : uint64_t count, capacity = 0, base = 0;
596 :
597 31284 : if (fgets(buf, sizeof(buf), fp) == NULL) {
598 340 : if (ferror(fp)) {
599 0 : TRC_CRITICAL(GDK, "error reading BBP.dir on line %d\n", *lineno);
600 0 : return -1;
601 : }
602 : return 0; /* end of file */
603 : }
604 30944 : (*lineno)++;
605 30944 : if ((s = strpbrk(buf, "\r\n")) != NULL) {
606 30944 : if (s[0] == '\r' && s[1] != '\n') {
607 0 : TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
608 0 : return -1;
609 : }
610 : /* zap the newline */
611 30944 : *s = '\0';
612 : } else {
613 0 : TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d: line too long\n", *lineno);
614 0 : return -1;
615 : }
616 :
617 61888 : if (bbpversion <= GDKLIBRARY_HSIZE ?
618 0 : sscanf(buf,
619 : "%" SCNu64 " %u %128s %23s %u %" SCNu64
620 : " %" SCNu64 " %" SCNu64
621 : "%n",
622 : &batid, &status, batname, filename,
623 : &properties, &count, &capacity, &base,
624 : &nread) < 8 :
625 30944 : sscanf(buf,
626 : "%" SCNu64 " %u %128s %23s %u %" SCNu64
627 : " %" SCNu64
628 : "%n",
629 : &batid, &status, batname, filename,
630 : &properties, &count, &base,
631 : &nread) < 7) {
632 0 : TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
633 0 : return -1;
634 : }
635 :
636 30944 : if (batid >= N_BBPINIT * BBPINIT) {
637 0 : TRC_CRITICAL(GDK, "bat ID (%" PRIu64 ") too large to accomodate (max %d), on line %d.", batid, N_BBPINIT * BBPINIT - 1, *lineno);
638 0 : return -1;
639 : }
640 :
641 : /* convert both / and \ path separators to our own DIR_SEP */
642 : #if DIR_SEP != '/'
643 : s = filename;
644 : while ((s = strchr(s, '/')) != NULL)
645 : *s++ = DIR_SEP;
646 : #endif
647 : #if DIR_SEP != '\\'
648 : s = filename;
649 30944 : while ((s = strchr(s, '\\')) != NULL)
650 0 : *s++ = DIR_SEP;
651 : #endif
652 :
653 30944 : bn->batCacheid = (bat) batid;
654 30944 : bn->batTransient = false;
655 30944 : bn->batCopiedtodisk = true;
656 30944 : switch ((properties & 0x06) >> 1) {
657 928 : case 0:
658 928 : bn->batRestricted = BAT_WRITE;
659 928 : break;
660 30016 : case 1:
661 30016 : bn->batRestricted = BAT_READ;
662 30016 : break;
663 0 : case 2:
664 0 : bn->batRestricted = BAT_APPEND;
665 0 : break;
666 0 : default:
667 0 : TRC_CRITICAL(GDK, "incorrect batRestricted value");
668 0 : return -1;
669 : }
670 30944 : bn->batCount = (BUN) count;
671 30944 : bn->batInserted = bn->batCount;
672 : /* set capacity to at least count */
673 30944 : bn->batCapacity = (BUN) count <= BATTINY ? BATTINY : (BUN) count;
674 :
675 30944 : if (base > (uint64_t) GDK_oid_max) {
676 0 : TRC_CRITICAL(GDK, "head seqbase out of range (ID = %" PRIu64 ", seq = %" PRIu64 ") on line %d.", batid, base, *lineno);
677 0 : return -1;
678 : }
679 30944 : bn->hseqbase = (oid) base;
680 30944 : n = heapinit(bn, buf + nread,
681 : #ifdef GDKLIBRARY_HASHASH
682 : hashash,
683 : #endif
684 : bbpversion, filename, *lineno);
685 30944 : if (n < 0) {
686 : return -1;
687 : }
688 30944 : nread += n;
689 :
690 30944 : if (nread >= (int) sizeof(buf) || (buf[nread] != '\0' && buf[nread] != ' ')) {
691 0 : TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", *lineno);
692 0 : return -1;
693 : }
694 30944 : if (options) {
695 30944 : if (return_options && buf[nread] == ' ') {
696 0 : if ((*options = GDKstrdup(buf + nread + 1)) == NULL) {
697 0 : TRC_CRITICAL(GDK, "GDKstrdup failed\n");
698 0 : return -1;
699 : }
700 : } else {
701 30944 : *options = NULL;
702 : }
703 : }
704 : return 1;
705 : }
706 :
707 : static gdk_return
708 335 : BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno
709 : #ifdef GDKLIBRARY_HASHASH
710 : , bat **hashbats, bat *nhashbats
711 : #endif
712 : )
713 : {
714 : #ifdef GDKLIBRARY_HASHASH
715 335 : bat *hbats = NULL;
716 335 : bat nhbats = 0;
717 : #endif
718 :
719 : /* read the BBP.dir and insert the BATs into the BBP */
720 335 : return_options = true;
721 335 : MT_lock_set(&BBPnameLock);
722 29704 : for (;;) {
723 30039 : BAT b;
724 30039 : Heap h;
725 30039 : Heap vh;
726 30039 : vh = h = (Heap) {
727 : .free = 0,
728 : };
729 30039 : b = (BAT) {
730 : .theap = &h,
731 : .tvheap = &vh,
732 : };
733 30039 : char *options;
734 30039 : char headname[129];
735 30039 : char filename[sizeof(BBP_physical(0))];
736 30039 : char logical[1024];
737 : #ifdef GDKLIBRARY_HASHASH
738 30039 : int Thashash;
739 : #endif
740 :
741 30039 : switch (BBPreadBBPline(fp, bbpversion, &lineno, &b,
742 : #ifdef GDKLIBRARY_HASHASH
743 : &Thashash,
744 : #endif
745 : headname, filename, &options)) {
746 335 : case 0:
747 : /* end of file */
748 : #ifdef GDKLIBRARY_HASHASH
749 335 : *hashbats = hbats;
750 335 : *nhashbats = nhbats;
751 : #endif
752 335 : return_options = false;
753 335 : MT_lock_unset(&BBPnameLock);
754 335 : return GDK_SUCCEED;
755 : case 1:
756 : /* successfully read an entry */
757 29704 : break;
758 0 : default:
759 : /* error */
760 0 : goto bailout;
761 : }
762 :
763 29704 : if (b.batCacheid >= N_BBPINIT * BBPINIT) {
764 0 : GDKfree(options);
765 0 : TRC_CRITICAL(GDK, "bat ID (%d) too large to accommodate (max %d), on line %d.", b.batCacheid, N_BBPINIT * BBPINIT - 1, lineno);
766 0 : goto bailout;
767 : }
768 :
769 29704 : if (b.batCacheid >= (bat) ATOMIC_GET(&BBPsize)) {
770 0 : if ((bat) ATOMIC_GET(&BBPsize) + 1 >= BBPlimit &&
771 0 : BBPextend(b.batCacheid + 1) != GDK_SUCCEED) {
772 0 : GDKfree(options);
773 0 : goto bailout;
774 : }
775 0 : ATOMIC_SET(&BBPsize, b.batCacheid + 1);
776 : }
777 29704 : if (BBP_desc(b.batCacheid) != NULL) {
778 0 : GDKfree(options);
779 0 : TRC_CRITICAL(GDK, "duplicate entry in BBP.dir (ID = "
780 : "%d) on line %d.", b.batCacheid, lineno);
781 0 : goto bailout;
782 : }
783 :
784 : #ifdef GDKLIBRARY_HASHASH
785 29704 : if (Thashash) {
786 0 : assert(bbpversion <= GDKLIBRARY_HASHASH);
787 0 : bat *sb = GDKrealloc(hbats, ++nhbats * sizeof(bat));
788 0 : if (sb == NULL) {
789 0 : GDKfree(options);
790 0 : goto bailout;
791 : }
792 0 : hbats = sb;
793 0 : hbats[nhbats - 1] = b.batCacheid;
794 : }
795 : #endif
796 :
797 29704 : BAT *bn;
798 29704 : Heap *hn;
799 29704 : if ((bn = GDKmalloc(sizeof(BAT))) == NULL ||
800 29704 : (hn = GDKmalloc(sizeof(Heap))) == NULL) {
801 0 : GDKfree(bn);
802 0 : GDKfree(options);
803 0 : TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
804 0 : goto bailout;
805 : }
806 29704 : *bn = b;
807 29704 : *hn = h;
808 29704 : bn->theap = hn;
809 29704 : if (b.tvheap) {
810 7753 : Heap *vhn;
811 7753 : assert(b.tvheap == &vh);
812 7753 : if ((vhn = GDKmalloc(sizeof(Heap))) == NULL) {
813 0 : GDKfree(hn);
814 0 : GDKfree(bn);
815 0 : GDKfree(options);
816 0 : TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
817 0 : goto bailout;
818 : }
819 7753 : *vhn = vh;
820 7753 : bn->tvheap = vhn;
821 7753 : ATOMIC_INIT(&bn->tvheap->refs, 1);
822 : }
823 :
824 29704 : char name[MT_NAME_LEN];
825 29704 : snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
826 29704 : MT_lock_init(&bn->theaplock, name);
827 29704 : snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
828 29704 : MT_lock_init(&bn->batIdxLock, name);
829 29704 : snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
830 29704 : MT_rwlock_init(&bn->thashlock, name);
831 29704 : ATOMIC_INIT(&bn->theap->refs, 1);
832 :
833 29704 : if (snprintf(BBP_bak(b.batCacheid), sizeof(BBP_bak(b.batCacheid)), "tmp_%o", (unsigned) b.batCacheid) >= (int) sizeof(BBP_bak(b.batCacheid))) {
834 0 : BATdestroy(bn);
835 0 : GDKfree(options);
836 0 : TRC_CRITICAL(GDK, "BBP logical filename directory is too large, on line %d\n", lineno);
837 0 : goto bailout;
838 : }
839 29704 : char *s;
840 29704 : if ((s = strchr(headname, '~')) != NULL && s == headname) {
841 : /* sizeof(logical) > sizeof(BBP_bak(b.batCacheid)), so
842 : * this fits */
843 0 : strcpy(logical, BBP_bak(b.batCacheid));
844 : } else {
845 0 : if (s)
846 0 : *s = 0;
847 29704 : strcpy_len(logical, headname, sizeof(logical));
848 : }
849 29704 : if (strcmp(logical, BBP_bak(b.batCacheid)) == 0) {
850 28984 : BBP_logical(b.batCacheid) = BBP_bak(b.batCacheid);
851 : } else {
852 720 : BBP_logical(b.batCacheid) = GDKstrdup(logical);
853 720 : if (BBP_logical(b.batCacheid) == NULL) {
854 0 : BATdestroy(bn);
855 0 : GDKfree(options);
856 0 : TRC_CRITICAL(GDK, "GDKstrdup failed\n");
857 0 : goto bailout;
858 : }
859 : }
860 29704 : strcpy_len(BBP_physical(b.batCacheid), filename, sizeof(BBP_physical(b.batCacheid)));
861 : #ifdef __COVERITY__
862 : /* help coverity */
863 : BBP_physical(b.batCacheid)[sizeof(BBP_physical(b.batCacheid)) - 1] = 0;
864 : #endif
865 29704 : BBP_options(b.batCacheid) = options;
866 29704 : BBP_refs(b.batCacheid) = 0;
867 29704 : BBP_lrefs(b.batCacheid) = 1; /* any BAT we encounter here is persistent, so has a logical reference */
868 29704 : BBP_desc(b.batCacheid) = bn;
869 29704 : BBP_pid(b.batCacheid) = 0;
870 29704 : BBP_status_set(b.batCacheid, BBPEXISTING); /* do we need other status bits? */
871 29704 : if (BBPnamecheck(BBP_logical(b.batCacheid)) == 0)
872 720 : BBP_insert(b.batCacheid);
873 : }
874 :
875 0 : bailout:
876 0 : MT_lock_unset(&BBPnameLock);
877 0 : return_options = false;
878 : #ifdef GDKLIBRARY_HASHASH
879 0 : GDKfree(hbats);
880 : #endif
881 0 : return GDK_FAIL;
882 : }
883 :
884 : /* check that the necessary files for all BATs exist and are large
885 : * enough */
886 : static gdk_return
887 336 : BBPcheckbats(unsigned bbpversion)
888 : {
889 336 : (void) bbpversion;
890 68550 : for (bat bid = 1, size = (bat) ATOMIC_GET(&BBPsize); bid < size; bid++) {
891 68214 : struct stat statb;
892 68214 : BAT *b;
893 68214 : char *path;
894 :
895 68214 : if ((b = BBP_desc(bid)) == NULL) {
896 : /* not a valid BAT */
897 38510 : continue;
898 : }
899 29704 : if (b->ttype == TYPE_void) {
900 : /* no files needed */
901 0 : continue;
902 : }
903 29704 : if (b->theap->free > 0) {
904 19661 : path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
905 19661 : if (path == NULL)
906 0 : return GDK_FAIL;
907 : /* first check string offset heap with width,
908 : * then without */
909 19661 : if (MT_stat(path, &statb) < 0) {
910 : #ifdef GDKLIBRARY_TAILN
911 0 : if (b->ttype == TYPE_str &&
912 0 : b->twidth < SIZEOF_VAR_T) {
913 0 : size_t taillen = strlen(path) - 1;
914 0 : char tailsave = path[taillen];
915 0 : path[taillen] = 0;
916 0 : if (MT_stat(path, &statb) < 0) {
917 0 : GDKsyserror("cannot stat file %s%c or %s (expected size %zu)\n",
918 : path, tailsave, path, b->theap->free);
919 0 : GDKfree(path);
920 0 : return GDK_FAIL;
921 : }
922 : } else
923 : #endif
924 : {
925 0 : GDKsyserror("cannot stat file %s (expected size %zu)\n",
926 : path, b->theap->free);
927 0 : GDKfree(path);
928 0 : return GDK_FAIL;
929 : }
930 : }
931 19661 : if ((size_t) statb.st_size < b->theap->free) {
932 0 : GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->theap->free, (size_t) statb.st_size);
933 0 : GDKfree(path);
934 0 : return GDK_FAIL;
935 : }
936 19661 : size_t hfree = b->theap->free;
937 19661 : hfree = (hfree + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
938 19661 : if (hfree == 0)
939 0 : hfree = GDK_mmap_pagesize;
940 19661 : if (statb.st_size > (off_t) hfree) {
941 9 : int fd;
942 9 : if ((fd = MT_open(path, O_RDWR | O_CLOEXEC | O_BINARY)) >= 0) {
943 9 : if (ftruncate(fd, hfree) == -1)
944 0 : perror("ftruncate");
945 9 : (void) close(fd);
946 : }
947 : }
948 19661 : GDKfree(path);
949 : }
950 29704 : if (b->tvheap != NULL && b->tvheap->free > 0) {
951 5537 : path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap");
952 5537 : if (path == NULL)
953 : return GDK_FAIL;
954 5537 : if (MT_stat(path, &statb) < 0) {
955 0 : GDKsyserror("cannot stat file %s\n",
956 : path);
957 0 : GDKfree(path);
958 0 : return GDK_FAIL;
959 : }
960 5537 : if ((size_t) statb.st_size < b->tvheap->free) {
961 0 : GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->tvheap->free, (size_t) statb.st_size);
962 0 : GDKfree(path);
963 0 : return GDK_FAIL;
964 : }
965 5537 : size_t hfree = b->tvheap->free;
966 5537 : hfree = (hfree + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
967 5537 : if (hfree == 0)
968 0 : hfree = GDK_mmap_pagesize;
969 5537 : if (statb.st_size > (off_t) hfree) {
970 9 : int fd;
971 9 : if ((fd = MT_open(path, O_RDWR | O_CLOEXEC | O_BINARY)) >= 0) {
972 9 : if (ftruncate(fd, hfree) == -1)
973 0 : perror("ftruncate");
974 9 : (void) close(fd);
975 : }
976 : }
977 5537 : GDKfree(path);
978 : }
979 : }
980 : return GDK_SUCCEED;
981 : }
982 :
983 : #ifdef HAVE_HGE
984 : #define SIZEOF_MAX_INT SIZEOF_HGE
985 : #else
986 : #define SIZEOF_MAX_INT SIZEOF_LNG
987 : #endif
988 :
989 : unsigned
990 340 : BBPheader(FILE *fp, int *lineno, bat *bbpsize, lng *logno, lng *transid, bool allow_hge_upgrade)
991 : {
992 340 : char buf[BUFSIZ];
993 340 : int sz, ptrsize, oidsize, intsize;
994 340 : unsigned bbpversion;
995 :
996 340 : if (fgets(buf, sizeof(buf), fp) == NULL) {
997 0 : TRC_CRITICAL(GDK, "BBP.dir is empty");
998 0 : return 0;
999 : }
1000 340 : ++*lineno;
1001 340 : if (sscanf(buf, "BBP.dir, GDKversion %u\n", &bbpversion) != 1) {
1002 0 : GDKerror("old BBP without version number; "
1003 : "dump the database using a compatible version, "
1004 : "then restore into new database using this version.\n");
1005 0 : return 0;
1006 : }
1007 340 : if (bbpversion != GDKLIBRARY &&
1008 : bbpversion != GDKLIBRARY_JSON &&
1009 : bbpversion != GDKLIBRARY_HSIZE &&
1010 : bbpversion != GDKLIBRARY_HASHASH &&
1011 340 : bbpversion != GDKLIBRARY_TAILN &&
1012 : bbpversion != GDKLIBRARY_MINMAX_POS) {
1013 0 : TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got 0%o. "
1014 : "This database was probably created by a %s version of MonetDB.",
1015 : GDKLIBRARY, bbpversion,
1016 : bbpversion > GDKLIBRARY ? "newer" : "too old");
1017 0 : return 0;
1018 : }
1019 340 : if (fgets(buf, sizeof(buf), fp) == NULL) {
1020 0 : TRC_CRITICAL(GDK, "short BBP");
1021 0 : return 0;
1022 : }
1023 340 : ++*lineno;
1024 340 : if (sscanf(buf, "%d %d %d", &ptrsize, &oidsize, &intsize) != 3) {
1025 0 : TRC_CRITICAL(GDK, "BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing on line %d", *lineno);
1026 0 : return 0;
1027 : }
1028 340 : if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) {
1029 0 : TRC_CRITICAL(GDK, "database created with incompatible server: "
1030 : "expected pointer size %d, got %d, expected OID size %d, got %d.",
1031 : SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize);
1032 0 : return 0;
1033 : }
1034 340 : if (intsize > SIZEOF_MAX_INT) {
1035 0 : TRC_CRITICAL(GDK, "database created with incompatible server: "
1036 : "expected max. integer size %d, got %d.",
1037 : SIZEOF_MAX_INT, intsize);
1038 0 : return 0;
1039 : }
1040 340 : if (intsize < SIZEOF_MAX_INT && !allow_hge_upgrade) {
1041 0 : TRC_CRITICAL(GDK, "database created with incompatible server: "
1042 : "expected max. integer size %d, got %d; "
1043 : "use --set allow_hge_upgrade=yes to upgrade.",
1044 : SIZEOF_MAX_INT, intsize);
1045 0 : return 0;
1046 : }
1047 340 : if (fgets(buf, sizeof(buf), fp) == NULL) {
1048 0 : TRC_CRITICAL(GDK, "short BBP");
1049 0 : return 0;
1050 : }
1051 340 : ++*lineno;
1052 340 : if (sscanf(buf, "BBPsize=%d", &sz) != 1) {
1053 0 : TRC_CRITICAL(GDK, "no BBPsize value found\n");
1054 0 : return 0;
1055 : }
1056 340 : if (sz > *bbpsize)
1057 117 : *bbpsize = sz;
1058 340 : if (bbpversion > GDKLIBRARY_MINMAX_POS) {
1059 340 : if (fgets(buf, sizeof(buf), fp) == NULL) {
1060 0 : TRC_CRITICAL(GDK, "short BBP");
1061 0 : return 0;
1062 : }
1063 340 : if (sscanf(buf, "BBPinfo=" LLSCN " " LLSCN, logno, transid) != 2) {
1064 0 : TRC_CRITICAL(GDK, "no info value found\n");
1065 0 : return 0;
1066 : }
1067 : } else {
1068 0 : *logno = *transid = 0;
1069 : }
1070 340 : return bbpversion;
1071 : }
1072 :
1073 : bool
1074 66994625 : GDKinmemory(int farmid)
1075 : {
1076 66994625 : if (farmid == NOFARM)
1077 : farmid = 0;
1078 65891305 : assert(farmid >= 0 && farmid < MAXFARMS);
1079 66994625 : return BBPfarms[farmid].dirname == NULL;
1080 : }
1081 :
1082 : /* all errors are fatal */
1083 : gdk_return
1084 1002 : BBPaddfarm(const char *dirname, uint32_t rolemask, bool logerror)
1085 : {
1086 1002 : struct stat st;
1087 1002 : int i;
1088 :
1089 1002 : if (dirname == NULL) {
1090 1 : assert(BBPfarms[0].dirname == NULL);
1091 1 : assert(rolemask & 1);
1092 1 : assert(BBPfarms[0].roles == 0);
1093 1 : BBPfarms[0].roles = rolemask;
1094 1 : return GDK_SUCCEED;
1095 : }
1096 1001 : if (strchr(dirname, '\n') != NULL) {
1097 0 : if (logerror)
1098 0 : GDKerror("no newline allowed in directory name\n");
1099 0 : return GDK_FAIL;
1100 : }
1101 1001 : if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].roles != 0)) {
1102 0 : if (logerror)
1103 0 : GDKerror("bad rolemask\n");
1104 0 : return GDK_FAIL;
1105 : }
1106 1001 : if (strcmp(dirname, "in-memory") == 0 ||
1107 1000 : /* backward compatibility: */ strcmp(dirname, ":memory:") == 0) {
1108 : dirname = NULL;
1109 1000 : } else if (MT_mkdir(dirname) < 0) {
1110 913 : if (errno == EEXIST) {
1111 913 : if (MT_stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) {
1112 0 : if (logerror)
1113 0 : GDKerror("%s: not a directory\n", dirname);
1114 0 : return GDK_FAIL;
1115 : }
1116 : } else {
1117 0 : if (logerror)
1118 0 : GDKsyserror("%s: cannot create directory\n", dirname);
1119 0 : return GDK_FAIL;
1120 : }
1121 : }
1122 1991 : for (i = 0; i < MAXFARMS; i++) {
1123 1991 : if (BBPfarms[i].roles == 0) {
1124 1001 : if (dirname) {
1125 1000 : BBPfarms[i].dirname = GDKstrdup(dirname);
1126 1000 : if (BBPfarms[i].dirname == NULL)
1127 : return GDK_FAIL;
1128 : }
1129 1001 : BBPfarms[i].roles = rolemask;
1130 1001 : if ((rolemask & 1) == 0 && dirname != NULL) {
1131 : char *bbpdir;
1132 : int j;
1133 :
1134 1009 : for (j = 0; j < i; j++)
1135 836 : if (BBPfarms[j].dirname != NULL &&
1136 836 : strcmp(BBPfarms[i].dirname,
1137 : BBPfarms[j].dirname) == 0)
1138 : return GDK_SUCCEED;
1139 : /* if an extra farm, make sure we
1140 : * don't find a BBP.dir there that
1141 : * might belong to an existing
1142 : * database */
1143 173 : bbpdir = GDKfilepath(i, BATDIR, "BBP", "dir");
1144 173 : if (bbpdir == NULL) {
1145 : return GDK_FAIL;
1146 : }
1147 173 : if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
1148 0 : GDKfree(bbpdir);
1149 0 : if (logerror)
1150 0 : GDKerror("%s is a database\n", dirname);
1151 0 : return GDK_FAIL;
1152 : }
1153 173 : GDKfree(bbpdir);
1154 173 : bbpdir = GDKfilepath(i, BAKDIR, "BBP", "dir");
1155 173 : if (bbpdir == NULL) {
1156 : return GDK_FAIL;
1157 : }
1158 173 : if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
1159 0 : GDKfree(bbpdir);
1160 0 : if (logerror)
1161 0 : GDKerror("%s is a database\n", dirname);
1162 0 : return GDK_FAIL;
1163 : }
1164 173 : GDKfree(bbpdir);
1165 : }
1166 511 : return GDK_SUCCEED;
1167 : }
1168 : }
1169 0 : if (logerror)
1170 0 : GDKerror("too many farms\n");
1171 : return GDK_FAIL;
1172 : }
1173 :
1174 : gdk_return
1175 338 : BBPchkfarms(void)
1176 : {
1177 338 : const char *dir = NULL;
1178 338 : uint32_t rolemask = 0;
1179 338 : if ((BBPfarms[0].roles & 1) == 0) {
1180 0 : GDKerror("Must call BBPaddfarms at least once for persistent data\n");
1181 0 : return GDK_FAIL;
1182 : }
1183 11154 : for (int i = 0; i < MAXFARMS; i++) {
1184 10816 : if (BBPfarms[i].roles != 0) {
1185 664 : dir = BBPfarms[i].dirname;
1186 664 : rolemask |= BBPfarms[i].roles;
1187 : }
1188 : }
1189 338 : if (dir == NULL)
1190 1 : dir = "in-memory";
1191 338 : if ((rolemask & (1U << TRANSIENT)) == 0) {
1192 0 : gdk_return rc = BBPaddfarm(dir, 1U << TRANSIENT, true);
1193 0 : if (rc != GDK_SUCCEED)
1194 : return rc;
1195 : }
1196 338 : if ((rolemask & (1U << SYSTRANS)) == 0) {
1197 338 : gdk_return rc = BBPaddfarm(dir, 1U << SYSTRANS, true);
1198 338 : if (rc != GDK_SUCCEED)
1199 : return rc;
1200 : }
1201 : return GDK_SUCCEED;
1202 : }
1203 :
1204 : #ifdef GDKLIBRARY_HASHASH
1205 : static gdk_return
1206 0 : fixhashashbat(BAT *b)
1207 : {
1208 0 : const char *nme = BBP_physical(b->batCacheid);
1209 0 : char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
1210 0 : if (srcdir == NULL) {
1211 0 : TRC_CRITICAL(GDK, "GDKfilepath failed\n");
1212 0 : return GDK_FAIL;
1213 : }
1214 0 : char *s;
1215 0 : if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
1216 0 : *s = 0;
1217 0 : const char *bnme;
1218 0 : if ((bnme = strrchr(nme, DIR_SEP)) != NULL)
1219 0 : bnme++;
1220 : else
1221 : bnme = nme;
1222 0 : long_str filename;
1223 0 : snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
1224 :
1225 : /* we don't maintain index structures */
1226 0 : HASHdestroy(b);
1227 0 : IMPSdestroy(b);
1228 0 : OIDXdestroy(b);
1229 0 : PROPdestroy(b);
1230 0 : STRMPdestroy(b);
1231 0 : RTREEdestroy(b);
1232 :
1233 : /* make backup of heaps */
1234 0 : const char *t;
1235 0 : if (GDKmove(b->theap->farmid, srcdir, bnme, "tail1",
1236 : BAKDIR, bnme, "tail1", false) == GDK_SUCCEED)
1237 : t = "tail1";
1238 0 : else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail2",
1239 : BAKDIR, bnme, "tail2", false) == GDK_SUCCEED)
1240 : t = "tail2";
1241 : #if SIZEOF_VAR_T == 8
1242 0 : else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail4",
1243 : BAKDIR, bnme, "tail4", false) == GDK_SUCCEED)
1244 : t = "tail4";
1245 : #endif
1246 0 : else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
1247 : BAKDIR, bnme, "tail", true) == GDK_SUCCEED)
1248 : t = "tail";
1249 : else {
1250 0 : GDKfree(srcdir);
1251 0 : TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
1252 0 : return GDK_FAIL;
1253 : }
1254 0 : GDKclrerr();
1255 0 : if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
1256 : BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
1257 0 : GDKfree(srcdir);
1258 0 : TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
1259 0 : return GDK_FAIL;
1260 : }
1261 : /* load old heaps */
1262 0 : Heap h1 = *b->theap; /* old heap */
1263 0 : h1.base = NULL;
1264 0 : h1.dirty = false;
1265 0 : strconcat_len(h1.filename, sizeof(h1.filename), filename, ".", t, NULL);
1266 0 : if (HEAPload(&h1, filename, t, false) != GDK_SUCCEED) {
1267 0 : GDKfree(srcdir);
1268 0 : TRC_CRITICAL(GDK, "loading old tail heap "
1269 : "for BAT %d failed\n", b->batCacheid);
1270 0 : return GDK_FAIL;
1271 : }
1272 0 : Heap vh1 = *b->tvheap; /* old heap */
1273 0 : vh1.base = NULL;
1274 0 : vh1.dirty = false;
1275 0 : strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL);
1276 0 : if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
1277 0 : GDKfree(srcdir);
1278 0 : HEAPfree(&h1, false);
1279 0 : TRC_CRITICAL(GDK, "loading old string heap "
1280 : "for BAT %d failed\n", b->batCacheid);
1281 0 : return GDK_FAIL;
1282 : }
1283 :
1284 : /* create new heaps */
1285 0 : Heap *h2 = GDKmalloc(sizeof(Heap));
1286 0 : Heap *vh2 = GDKmalloc(sizeof(Heap));
1287 0 : if (h2 == NULL || vh2 == NULL) {
1288 0 : GDKfree(h2);
1289 0 : GDKfree(vh2);
1290 0 : GDKfree(srcdir);
1291 0 : HEAPfree(&h1, false);
1292 0 : HEAPfree(&vh1, false);
1293 0 : TRC_CRITICAL(GDK, "allocating new heaps "
1294 : "for BAT %d failed\n", b->batCacheid);
1295 0 : return GDK_FAIL;
1296 : }
1297 0 : *h2 = *b->theap;
1298 0 : h2->base = NULL;
1299 0 : if (HEAPalloc(h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
1300 0 : GDKfree(h2);
1301 0 : GDKfree(vh2);
1302 0 : GDKfree(srcdir);
1303 0 : HEAPfree(&h1, false);
1304 0 : HEAPfree(&vh1, false);
1305 0 : TRC_CRITICAL(GDK, "allocating new tail heap "
1306 : "for BAT %d failed\n", b->batCacheid);
1307 0 : return GDK_FAIL;
1308 : }
1309 0 : h2->dirty = true;
1310 0 : h2->free = h1.free;
1311 :
1312 0 : *vh2 = *b->tvheap;
1313 0 : strconcat_len(vh2->filename, sizeof(vh2->filename), nme, ".theap", NULL);
1314 0 : strHeap(vh2, b->batCapacity);
1315 0 : if (vh2->base == NULL) {
1316 0 : GDKfree(srcdir);
1317 0 : HEAPfree(&h1, false);
1318 0 : HEAPfree(&vh1, false);
1319 0 : HEAPfree(h2, false);
1320 0 : GDKfree(h2);
1321 0 : GDKfree(vh2);
1322 0 : TRC_CRITICAL(GDK, "allocating new string heap "
1323 : "for BAT %d failed\n", b->batCacheid);
1324 0 : return GDK_FAIL;
1325 : }
1326 0 : vh2->dirty = true;
1327 0 : ATOMIC_INIT(&h2->refs, 1);
1328 0 : ATOMIC_INIT(&vh2->refs, 1);
1329 0 : Heap *ovh = b->tvheap;
1330 0 : b->tvheap = vh2;
1331 0 : vh2 = NULL; /* no longer needed */
1332 0 : for (BUN i = 0; i < b->batCount; i++) {
1333 0 : var_t o;
1334 0 : switch (b->twidth) {
1335 0 : case 1:
1336 0 : o = (var_t) ((uint8_t *) h1.base)[i] + GDK_VAROFFSET;
1337 0 : break;
1338 0 : case 2:
1339 0 : o = (var_t) ((uint16_t *) h1.base)[i] + GDK_VAROFFSET;
1340 0 : break;
1341 : #if SIZEOF_VAR_T == 8
1342 0 : case 4:
1343 0 : o = (var_t) ((uint32_t *) h1.base)[i];
1344 0 : break;
1345 : #endif
1346 0 : default:
1347 0 : o = ((var_t *) h1.base)[i];
1348 0 : break;
1349 : }
1350 0 : const char *s = vh1.base + o;
1351 0 : var_t no = strPut(b, &o, s);
1352 0 : if (no == 0) {
1353 0 : HEAPfree(&h1, false);
1354 0 : HEAPfree(&vh1, false);
1355 0 : HEAPdecref(h2, false);
1356 0 : HEAPdecref(b->tvheap, false);
1357 0 : b->tvheap = ovh;
1358 0 : GDKfree(srcdir);
1359 0 : TRC_CRITICAL(GDK, "storing string value "
1360 : "for BAT %d failed\n", b->batCacheid);
1361 0 : return GDK_FAIL;
1362 : }
1363 0 : assert(no >= GDK_VAROFFSET);
1364 0 : switch (b->twidth) {
1365 0 : case 1:
1366 0 : no -= GDK_VAROFFSET;
1367 0 : assert(no <= 0xFF);
1368 0 : ((uint8_t *) h2->base)[i] = (uint8_t) no;
1369 0 : break;
1370 0 : case 2:
1371 0 : no -= GDK_VAROFFSET;
1372 0 : assert(no <= 0xFFFF);
1373 0 : ((uint16_t *) h2->base)[i] = (uint16_t) no;
1374 0 : break;
1375 : #if SIZEOF_VAR_T == 8
1376 0 : case 4:
1377 0 : assert(no <= 0xFFFFFFFF);
1378 0 : ((uint32_t *) h2->base)[i] = (uint32_t) no;
1379 0 : break;
1380 : #endif
1381 0 : default:
1382 0 : ((var_t *) h2->base)[i] = no;
1383 0 : break;
1384 : }
1385 : }
1386 :
1387 : /* cleanup */
1388 0 : HEAPfree(&h1, false);
1389 0 : HEAPfree(&vh1, false);
1390 0 : if (HEAPsave(h2, nme, BATtailname(b), true, h2->free, NULL) != GDK_SUCCEED) {
1391 0 : HEAPdecref(h2, false);
1392 0 : HEAPdecref(b->tvheap, false);
1393 0 : b->tvheap = ovh;
1394 0 : GDKfree(srcdir);
1395 0 : TRC_CRITICAL(GDK, "saving heap failed\n");
1396 0 : return GDK_FAIL;
1397 : }
1398 0 : if (HEAPsave(b->tvheap, nme, "theap", true, b->tvheap->free, &b->theaplock) != GDK_SUCCEED) {
1399 0 : HEAPfree(b->tvheap, false);
1400 0 : b->tvheap = ovh;
1401 0 : GDKfree(srcdir);
1402 0 : TRC_CRITICAL(GDK, "saving string heap failed\n");
1403 0 : return GDK_FAIL;
1404 : }
1405 0 : HEAPdecref(b->theap, false);
1406 0 : b->theap = h2;
1407 0 : HEAPfree(h2, false);
1408 0 : HEAPdecref(ovh, false);
1409 0 : HEAPfree(b->tvheap, false);
1410 0 : GDKfree(srcdir);
1411 0 : return GDK_SUCCEED;
1412 : }
1413 :
1414 : static gdk_return
1415 0 : fixhashash(bat *hashbats, bat nhashbats)
1416 : {
1417 0 : for (bat i = 0; i < nhashbats; i++) {
1418 0 : bat bid = hashbats[i];
1419 0 : BAT *b;
1420 0 : if ((b = BBP_desc(bid)) == NULL) {
1421 : /* not a valid BAT (shouldn't happen) */
1422 0 : continue;
1423 : }
1424 0 : if (fixhashashbat(b) != GDK_SUCCEED)
1425 : return GDK_FAIL;
1426 : }
1427 : return GDK_SUCCEED;
1428 : }
1429 : #endif
1430 :
1431 : #ifdef GDKLIBRARY_TAILN
1432 : static gdk_return
1433 0 : movestrbats(void)
1434 : {
1435 0 : for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++) {
1436 0 : BAT *b = BBP_desc(bid);
1437 0 : if (b == NULL) {
1438 : /* not a valid BAT */
1439 0 : continue;
1440 : }
1441 0 : if (b->ttype != TYPE_str || b->twidth == SIZEOF_VAR_T || b->batCount == 0)
1442 0 : continue;
1443 0 : char *oldpath = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail");
1444 0 : char *newpath = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
1445 0 : int ret = -1;
1446 0 : if (oldpath != NULL && newpath != NULL) {
1447 0 : struct stat oldst, newst;
1448 0 : bool oldexist = MT_stat(oldpath, &oldst) == 0;
1449 0 : bool newexist = MT_stat(newpath, &newst) == 0;
1450 0 : if (newexist) {
1451 0 : if (oldexist) {
1452 0 : if (oldst.st_mtime > newst.st_mtime) {
1453 0 : GDKerror("both %s and %s exist with %s unexpectedly newer: manual intervention required\n", oldpath, newpath, oldpath);
1454 0 : ret = -1;
1455 : } else {
1456 0 : GDKwarning("both %s and %s exist, removing %s\n", oldpath, newpath, oldpath);
1457 0 : ret = MT_remove(oldpath);
1458 : }
1459 : } else {
1460 : /* already good */
1461 : ret = 0;
1462 : }
1463 0 : } else if (oldexist) {
1464 0 : TRC_DEBUG(IO_, "rename %s to %s\n", oldpath, newpath);
1465 0 : ret = MT_rename(oldpath, newpath);
1466 : } else {
1467 : /* neither file exists: may be ok, but
1468 : * will be checked later */
1469 : ret = 0;
1470 : }
1471 : }
1472 0 : GDKfree(oldpath);
1473 0 : GDKfree(newpath);
1474 0 : if (ret == -1)
1475 : return GDK_FAIL;
1476 : }
1477 : return GDK_SUCCEED;
1478 : }
1479 : #endif
1480 :
1481 : #ifdef GDKLIBRARY_JSON
1482 : static gdk_return
1483 2 : jsonupgradebat(BAT *b, json_storage_conversion fixJSONStorage)
1484 : {
1485 2 : const char *nme = BBP_physical(b->batCacheid);
1486 2 : char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
1487 :
1488 2 : if (srcdir == NULL) {
1489 0 : TRC_CRITICAL(GDK, "GDKfilepath failed\n");
1490 0 : return GDK_FAIL;
1491 : }
1492 :
1493 2 : char *s;
1494 2 : if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
1495 2 : *s = 0;
1496 2 : const char *bnme;
1497 2 : if ((bnme = strrchr(nme, DIR_SEP)) != NULL) {
1498 2 : bnme++;
1499 : } else {
1500 : bnme = nme;
1501 : }
1502 :
1503 2 : long_str filename;
1504 2 : snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
1505 :
1506 : /* A json column should not normally have any index structures */
1507 2 : HASHdestroy(b);
1508 2 : IMPSdestroy(b);
1509 2 : OIDXdestroy(b);
1510 2 : PROPdestroy(b);
1511 2 : STRMPdestroy(b);
1512 2 : RTREEdestroy(b);
1513 :
1514 : /* backup the current heaps */
1515 2 : if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
1516 : BAKDIR, bnme, "tail", false) != GDK_SUCCEED) {
1517 0 : GDKfree(srcdir);
1518 0 : TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
1519 0 : return GDK_FAIL;
1520 : }
1521 2 : if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
1522 : BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
1523 0 : GDKfree(srcdir);
1524 0 : TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
1525 0 : return GDK_FAIL;
1526 : }
1527 :
1528 : /* load the old heaps */
1529 2 : Heap h1 = *b->theap;
1530 2 : h1.base = NULL;
1531 2 : h1.dirty = false;
1532 2 : strconcat_len(h1.filename, sizeof(h1.filename), filename, ".tail", NULL);
1533 2 : if (HEAPload(&h1, filename, "tail", false) != GDK_SUCCEED) {
1534 0 : GDKfree(srcdir);
1535 0 : TRC_CRITICAL(GDK, "loading old tail heap "
1536 : "for BAT %d failed\n", b->batCacheid);
1537 0 : return GDK_FAIL;
1538 : }
1539 :
1540 2 : Heap vh1 = *b->tvheap;
1541 2 : vh1.base = NULL;
1542 2 : vh1.dirty = false;
1543 2 : strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL);
1544 2 : if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
1545 0 : GDKfree(srcdir);
1546 0 : HEAPfree(&h1, false);
1547 0 : TRC_CRITICAL(GDK, "loading old string heap "
1548 : "for BAT %d failed\n", b->batCacheid);
1549 0 : return GDK_FAIL;
1550 : }
1551 :
1552 : /* create the new heaps */
1553 2 : Heap *h2 = GDKmalloc(sizeof(Heap));
1554 2 : Heap *vh2 = GDKmalloc(sizeof(Heap));
1555 2 : if (h2 == NULL || vh2 == NULL) {
1556 0 : GDKfree(h2);
1557 0 : GDKfree(vh2);
1558 0 : GDKfree(srcdir);
1559 0 : HEAPfree(&h1, false);
1560 0 : HEAPfree(&vh1, false);
1561 0 : TRC_CRITICAL(GDK, "allocating new heaps "
1562 : "for BAT %d failed\n", b->batCacheid);
1563 0 : return GDK_FAIL;
1564 : }
1565 2 : *h2 = *b->theap;
1566 2 : h2->base = NULL;
1567 2 : if (HEAPalloc(h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
1568 0 : GDKfree(h2);
1569 0 : GDKfree(vh2);
1570 0 : GDKfree(srcdir);
1571 0 : HEAPfree(&h1, false);
1572 0 : HEAPfree(&vh1, false);
1573 0 : TRC_CRITICAL(GDK, "allocating new tail heap "
1574 : "for BAT %d failed\n", b->batCacheid);
1575 0 : return GDK_FAIL;
1576 :
1577 : }
1578 2 : h2->dirty = true;
1579 2 : h2->free = h1.free;
1580 :
1581 2 : *vh2 = *b->tvheap;
1582 2 : strconcat_len(vh2->filename, sizeof(vh2->filename), nme, ".theap", NULL);
1583 2 : strHeap(vh2, b->batCapacity);
1584 2 : if (vh2->base == NULL) {
1585 0 : GDKfree(srcdir);
1586 0 : HEAPfree(&h1, false);
1587 0 : HEAPfree(&vh1, false);
1588 0 : HEAPfree(h2, false);
1589 0 : GDKfree(h2);
1590 0 : GDKfree(vh2);
1591 0 : TRC_CRITICAL(GDK, "allocating new string heap "
1592 : "for BAT %d failed\n", b->batCacheid);
1593 0 : return GDK_FAIL;
1594 : }
1595 2 : vh2->dirty = true;
1596 2 : ATOMIC_INIT(&h2->refs, 1);
1597 2 : ATOMIC_INIT(&vh2->refs, 1);
1598 2 : Heap *ovh = b->tvheap;
1599 2 : b->tvheap = vh2;
1600 2 : vh2 = NULL;
1601 :
1602 6 : for (BUN i = 0; i < b->batCount; i++) {
1603 4 : var_t o = ((var_t *) h1.base)[i];
1604 4 : const char *s = vh1.base + o;
1605 4 : char *ns;
1606 4 : if (fixJSONStorage(&ns, &s) != GDK_SUCCEED) {
1607 0 : GDKfree(srcdir);
1608 0 : HEAPfree(&h1, false);
1609 0 : HEAPfree(&vh1, false);
1610 0 : HEAPdecref(h2, false);
1611 0 : HEAPdecref(b->tvheap, false);
1612 0 : b->tvheap = ovh;
1613 0 : TRC_CRITICAL(GDK, "converting value "
1614 : "in BAT %d failed\n", b->batCacheid);
1615 0 : return GDK_FAIL;
1616 : }
1617 4 : var_t no = strPut(b, &o, ns);
1618 4 : GDKfree(ns);
1619 4 : if (no == 0) {
1620 0 : GDKfree(srcdir);
1621 0 : HEAPfree(&h1, false);
1622 0 : HEAPfree(&vh1, false);
1623 0 : HEAPdecref(h2, false);
1624 0 : HEAPdecref(b->tvheap, false);
1625 0 : b->tvheap = ovh;
1626 0 : TRC_CRITICAL(GDK, "storing new value "
1627 : "in BAT %d failed\n", b->batCacheid);
1628 0 : return GDK_FAIL;
1629 :
1630 : }
1631 4 : ((var_t *)h2->base)[i] = no;
1632 : }
1633 :
1634 : /* cleanup */
1635 2 : HEAPfree(&h1, false);
1636 2 : HEAPfree(&vh1, false);
1637 2 : if (HEAPsave(h2, nme, BATtailname(b), true, h2->free, NULL) !=
1638 : GDK_SUCCEED) {
1639 0 : HEAPdecref(h2, false);
1640 0 : HEAPdecref(b->tvheap, false);
1641 0 : b->tvheap = ovh;
1642 0 : GDKfree(srcdir);
1643 0 : TRC_CRITICAL(GDK, "saving heap failed\n");
1644 0 : return GDK_FAIL;
1645 : }
1646 :
1647 2 : if (HEAPsave(b->tvheap, nme, "theap", true, b->tvheap->free,
1648 : &b->theaplock) != GDK_SUCCEED) {
1649 0 : HEAPfree(b->tvheap, false);
1650 0 : b->tvheap = ovh;
1651 0 : GDKfree(srcdir);
1652 0 : TRC_CRITICAL(GDK, "saving string failed\n");
1653 0 : return GDK_FAIL;
1654 : }
1655 :
1656 2 : HEAPdecref(b->theap, false);
1657 2 : b->theap = h2;
1658 2 : HEAPfree(h2, false);
1659 2 : HEAPdecref(ovh, false);
1660 2 : HEAPfree(b->tvheap, false);
1661 2 : GDKfree(srcdir);
1662 :
1663 2 : return GDK_SUCCEED;
1664 : }
1665 :
1666 : gdk_return
1667 8 : BBPjson_upgrade(json_storage_conversion fixJSONStorage)
1668 : {
1669 8 : bat bid;
1670 8 : BAT *b;
1671 8 : int JSON_type = ATOMindex("json");
1672 8 : bat nbat = (bat) ATOMIC_GET(&BBPsize);
1673 8 : bat *upd = GDKmalloc(sizeof(bat) * (size_t) nbat);
1674 8 : int nupd = 0;
1675 :
1676 8 : if (upd == NULL) {
1677 0 : TRC_CRITICAL(GDK, "could not create bat\n");
1678 0 : return GDK_FAIL;
1679 : }
1680 8 : upd[nupd++] = 0; /* first entry unused */
1681 :
1682 8 : BBPlock();
1683 :
1684 10544 : for (bid = 1; bid < nbat; bid++) {
1685 10528 : if ((b = BBP_desc(bid)) == NULL) {
1686 : /* not a valid BAT */
1687 7954 : continue;
1688 : }
1689 :
1690 2574 : if (b->ttype < 0) {
1691 56 : const char *nme;
1692 :
1693 56 : nme = ATOMunknown_name(b->ttype);
1694 56 : if (strcmp(nme, "json") != 0)
1695 54 : continue;
1696 2518 : } else if (b->ttype != JSON_type) {
1697 2518 : continue;
1698 : }
1699 2 : fprintf(stderr, "Upgrading json bat %d\n", bid);
1700 2 : if (jsonupgradebat(b, fixJSONStorage) != GDK_SUCCEED) {
1701 0 : BBPunlock();
1702 0 : GDKfree(upd);
1703 0 : return GDK_FAIL;
1704 : }
1705 2 : upd[nupd++] = bid;
1706 : }
1707 8 : BBPunlock();
1708 10 : if (nupd > 1 &&
1709 2 : TMsubcommit_list(upd, NULL, nupd, -1, -1) != GDK_SUCCEED) {
1710 0 : TRC_CRITICAL(GDK, "failed to commit changes\n");
1711 0 : GDKfree(upd);
1712 0 : return GDK_FAIL;
1713 : }
1714 8 : GDKfree(upd);
1715 8 : return GDK_SUCCEED;
1716 : }
1717 : #endif
1718 :
1719 : static bool
1720 113 : BBPtrim(bool aggressive, bat nbat)
1721 : {
1722 113 : int n = 0;
1723 113 : int waitctr = 0;
1724 113 : bool changed = false;
1725 113 : unsigned flag = BBPUNLOADING | BBPSYNCING | BBPSAVING;
1726 113 : if (!aggressive)
1727 113 : flag |= BBPHOT;
1728 113 : lng t0 = GDKusec();
1729 139823 : for (bat bid = 1; bid < nbat && !GDKexiting(); bid++) {
1730 : /* quick check to see if we might possibly have to do
1731 : * work (includes free bats) */
1732 139710 : if ((BBP_status(bid) & BBPLOADED) == 0)
1733 37271 : continue;
1734 : /* don't do this during a (sub)commit */
1735 102439 : BBPtmlock();
1736 102439 : MT_lock_set(&GDKswapLock(bid));
1737 102439 : BAT *b = NULL;
1738 102439 : bool swap = false;
1739 102439 : if (!(BBP_status(bid) & flag) &&
1740 13064 : BBP_refs(bid) == 0 &&
1741 13064 : BBP_lrefs(bid) != 0 &&
1742 13062 : (b = BBP_cache(bid)) != NULL) {
1743 13062 : MT_lock_set(&b->theaplock);
1744 13062 : if (!BATshared(b) &&
1745 12969 : !isVIEW(b) &&
1746 12794 : (!BATdirty(b) ||
1747 0 : (aggressive &&
1748 0 : b->theap->storage == STORE_MMAP &&
1749 0 : (b->tvheap == NULL ||
1750 0 : b->tvheap->storage == STORE_MMAP)) ||
1751 9431 : (b->batRole == PERSISTENT &&
1752 8993 : BBP_lrefs(bid) <= 2))) {
1753 4866 : BBP_status_on(bid, BBPUNLOADING);
1754 4866 : swap = true;
1755 6369 : waitctr += BATdirty(b) ? 9 : 1;
1756 : }
1757 13062 : MT_lock_unset(&b->theaplock);
1758 : }
1759 102439 : MT_lock_unset(&GDKswapLock(bid));
1760 102439 : if (swap) {
1761 4866 : TRC_DEBUG(BAT_, "unload and free bat %d\n", bid);
1762 4866 : if (BBPfree(b) != GDK_SUCCEED)
1763 0 : GDKerror("unload failed for bat %d", bid);
1764 4866 : n++;
1765 4866 : changed = true;
1766 : }
1767 102439 : BBPtmunlock();
1768 : /* every once in a while, give others a chance */
1769 102439 : if (++waitctr >= 1000) {
1770 85 : waitctr = 0;
1771 85 : MT_sleep_ms(2);
1772 : }
1773 : }
1774 113 : if (n > 0)
1775 42 : TRC_INFO(BAT_, "unloaded %d bats in "LLFMT" usec%s\n", n, GDKusec() - t0, aggressive ? " (also hot)" : "");
1776 113 : return changed;
1777 : }
1778 :
1779 : static void
1780 336 : BBPmanager(void *dummy)
1781 : {
1782 336 : (void) dummy;
1783 336 : bool changed = true;
1784 :
1785 449 : for (;;) {
1786 449 : int n = 0;
1787 449 : bat nbat = (bat) ATOMIC_GET(&BBPsize);
1788 449 : MT_thread_setworking("clearing HOT bits");
1789 430833 : for (bat bid = 1; bid < nbat; bid++) {
1790 430384 : MT_lock_set(&GDKswapLock(bid));
1791 430384 : if (BBP_refs(bid) == 0 && BBP_lrefs(bid) != 0) {
1792 204507 : n += (BBP_status(bid) & BBPHOT) != 0;
1793 204507 : BBP_status_off(bid, BBPHOT);
1794 : }
1795 430384 : MT_lock_unset(&GDKswapLock(bid));
1796 : }
1797 449 : TRC_DEBUG(BAT_, "cleared HOT bit from %d bats\n", n);
1798 449 : size_t cur = GDKvm_cursize();
1799 449 : MT_thread_setworking("sleeping");
1800 20237 : for (int i = 0, n = changed && cur > GDK_vm_maxsize / 2 ? 1 : cur > GDK_vm_maxsize / 4 ? 10 : 100; i < n; i++) {
1801 19683 : MT_sleep_ms(100);
1802 19681 : if (GDKexiting())
1803 : return;
1804 : }
1805 113 : MT_thread_setworking("BBPtrim");
1806 113 : changed = BBPtrim(false, nbat);
1807 113 : MT_thread_setworking("BBPcallbacks");
1808 113 : BBPcallbacks();
1809 113 : if (GDKexiting())
1810 : return;
1811 : }
1812 : }
1813 :
1814 : static MT_Id manager;
1815 :
1816 : gdk_return
1817 336 : BBPinit(bool allow_hge_upgrade)
1818 : {
1819 336 : FILE *fp = NULL;
1820 336 : struct stat st;
1821 336 : unsigned bbpversion = 0;
1822 336 : int i;
1823 336 : int lineno = 0;
1824 : #ifdef GDKLIBRARY_HASHASH
1825 336 : bat *hashbats = NULL;
1826 336 : bat nhashbats = 0;
1827 336 : gdk_return res = GDK_SUCCEED;
1828 : #endif
1829 336 : ATOMIC_BASE_TYPE dbg = ATOMIC_GET(&GDKdebug);
1830 :
1831 336 : ATOMIC_AND(&GDKdebug, ~TAILCHKMASK);
1832 :
1833 : /* the maximum number of BATs allowed in the system and the
1834 : * size of the "physical" array are linked in a complicated
1835 : * manner. The expression below shows the relationship */
1836 336 : static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * ((sizeof(BBP[0][0].physical) + 2) * 2 / 5))), "\"physical\" array in BBPrec is too small");
1837 : /* similarly, the maximum number of BATs allowed also has a
1838 : * (somewhat simpler) relation with the size of the "bak"
1839 : * array */
1840 336 : static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * (sizeof(BBP[0][0].bak) - 5))), "\"bak\" array in BBPrec is too small");
1841 :
1842 336 : if (!GDKinmemory(0)) {
1843 335 : str bbpdirstr, backupbbpdirstr;
1844 :
1845 335 : BBPtmlock();
1846 :
1847 335 : if ((bbpdirstr = GDKfilepath(0, BATDIR, "BBP", "dir")) == NULL) {
1848 0 : TRC_CRITICAL(GDK, "GDKmalloc failed\n");
1849 0 : BBPtmunlock();
1850 0 : ATOMIC_SET(&GDKdebug, dbg);
1851 0 : return GDK_FAIL;
1852 : }
1853 :
1854 335 : if ((backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP", "dir")) == NULL) {
1855 0 : GDKfree(bbpdirstr);
1856 0 : TRC_CRITICAL(GDK, "GDKmalloc failed\n");
1857 0 : BBPtmunlock();
1858 0 : ATOMIC_SET(&GDKdebug, dbg);
1859 0 : return GDK_FAIL;
1860 : }
1861 :
1862 335 : if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) {
1863 0 : GDKfree(bbpdirstr);
1864 0 : GDKfree(backupbbpdirstr);
1865 0 : TRC_CRITICAL(GDK, "cannot remove directory %s\n", TEMPDIR);
1866 0 : BBPtmunlock();
1867 0 : ATOMIC_SET(&GDKdebug, dbg);
1868 0 : return GDK_FAIL;
1869 : }
1870 :
1871 335 : if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) {
1872 0 : GDKfree(bbpdirstr);
1873 0 : GDKfree(backupbbpdirstr);
1874 0 : TRC_CRITICAL(GDK, "cannot remove directory %s\n", DELDIR);
1875 0 : BBPtmunlock();
1876 0 : ATOMIC_SET(&GDKdebug, dbg);
1877 0 : return GDK_FAIL;
1878 : }
1879 :
1880 : /* first move everything from SUBDIR to BAKDIR (its parent) */
1881 335 : if (BBPrecover_subdir() != GDK_SUCCEED) {
1882 0 : GDKfree(bbpdirstr);
1883 0 : GDKfree(backupbbpdirstr);
1884 0 : TRC_CRITICAL(GDK, "cannot properly recover_subdir process %s.", SUBDIR);
1885 0 : BBPtmunlock();
1886 0 : ATOMIC_SET(&GDKdebug, dbg);
1887 0 : return GDK_FAIL;
1888 : }
1889 :
1890 : /* try to obtain a BBP.dir from bakdir */
1891 335 : if (MT_stat(backupbbpdirstr, &st) == 0) {
1892 : /* backup exists; *must* use it */
1893 112 : if (recover_dir(0, MT_stat(bbpdirstr, &st) == 0) != GDK_SUCCEED) {
1894 0 : GDKfree(bbpdirstr);
1895 0 : GDKfree(backupbbpdirstr);
1896 0 : BBPtmunlock();
1897 0 : goto bailout;
1898 : }
1899 112 : if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
1900 0 : GDKfree(bbpdirstr);
1901 0 : GDKfree(backupbbpdirstr);
1902 0 : TRC_CRITICAL(GDK, "cannot open recovered BBP.dir.");
1903 0 : BBPtmunlock();
1904 0 : ATOMIC_SET(&GDKdebug, dbg);
1905 0 : return GDK_FAIL;
1906 : }
1907 223 : } else if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
1908 : /* there was no BBP.dir either. Panic! try to use a
1909 : * BBP.bak */
1910 223 : if (MT_stat(backupbbpdirstr, &st) < 0) {
1911 : /* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir):
1912 : * create a new one */
1913 223 : TRC_DEBUG(IO_, "initializing BBP.\n");
1914 223 : if (BBPdir_init() != GDK_SUCCEED) {
1915 0 : GDKfree(bbpdirstr);
1916 0 : GDKfree(backupbbpdirstr);
1917 0 : BBPtmunlock();
1918 0 : goto bailout;
1919 : }
1920 0 : } else if (GDKmove(0, BATDIR, "BBP", "bak", BATDIR, "BBP", "dir", true) == GDK_SUCCEED)
1921 0 : TRC_DEBUG(IO_, "reverting to dir saved in BBP.bak.\n");
1922 :
1923 223 : if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
1924 0 : GDKsyserror("cannot open BBP.dir");
1925 0 : GDKfree(bbpdirstr);
1926 0 : GDKfree(backupbbpdirstr);
1927 0 : BBPtmunlock();
1928 0 : goto bailout;
1929 : }
1930 : }
1931 : assert(fp != NULL);
1932 335 : GDKfree(bbpdirstr);
1933 335 : GDKfree(backupbbpdirstr);
1934 335 : BBPtmunlock();
1935 : }
1936 :
1937 : /* scan the BBP.dir to obtain current size */
1938 336 : BBPlimit = 0;
1939 336 : memset(BBP, 0, sizeof(BBP));
1940 :
1941 336 : bat bbpsize;
1942 336 : bbpsize = 1;
1943 336 : if (GDKinmemory(0)) {
1944 : bbpversion = GDKLIBRARY;
1945 : } else {
1946 335 : lng logno, transid;
1947 335 : bbpversion = BBPheader(fp, &lineno, &bbpsize, &logno, &transid, allow_hge_upgrade);
1948 335 : if (bbpversion == 0) {
1949 0 : ATOMIC_SET(&GDKdebug, dbg);
1950 0 : return GDK_FAIL;
1951 : }
1952 335 : assert(bbpversion > GDKLIBRARY_MINMAX_POS || logno == 0);
1953 0 : assert(bbpversion > GDKLIBRARY_MINMAX_POS || transid == 0);
1954 335 : ATOMIC_SET(&BBPlogno, logno);
1955 335 : ATOMIC_SET(&BBPtransid, transid);
1956 : }
1957 :
1958 : /* allocate BBP records */
1959 336 : if (BBPextend(bbpsize) != GDK_SUCCEED) {
1960 0 : ATOMIC_SET(&GDKdebug, dbg);
1961 0 : return GDK_FAIL;
1962 : }
1963 336 : ATOMIC_SET(&BBPsize, bbpsize);
1964 :
1965 336 : if (!GDKinmemory(0)) {
1966 335 : if (BBPreadEntries(fp, bbpversion, lineno
1967 : #ifdef GDKLIBRARY_HASHASH
1968 : , &hashbats, &nhashbats
1969 : #endif
1970 : ) != GDK_SUCCEED) {
1971 0 : ATOMIC_SET(&GDKdebug, dbg);
1972 0 : return GDK_FAIL;
1973 : }
1974 335 : fclose(fp);
1975 : }
1976 :
1977 : /* remove trailing free bats from potential free list (they will
1978 : * get added when needed) */
1979 8507 : for (bat i = (bat) ATOMIC_GET(&BBPsize) - 1; i > 0; i--) {
1980 8283 : if (BBP_desc(i) != NULL)
1981 : break;
1982 8171 : bbpsize--;
1983 : }
1984 336 : ATOMIC_SET(&BBPsize, bbpsize);
1985 :
1986 : /* add free bats to free list in such a way that low numbered
1987 : * ones are at the head of the list */
1988 68550 : for (bat i = (bat) ATOMIC_GET(&BBPsize) - 1; i > 0; i--) {
1989 68214 : if (BBP_desc(i) == NULL) {
1990 38510 : BBP_next(i) = BBP_free;
1991 38510 : BBP_free = i;
1992 38510 : BBP_nfree++;
1993 : }
1994 : }
1995 :
1996 : /* will call BBPrecover if needed */
1997 336 : if (!GDKinmemory(0)) {
1998 335 : BBPtmlock();
1999 335 : gdk_return rc = BBPprepare(false);
2000 335 : BBPtmunlock();
2001 335 : if (rc != GDK_SUCCEED) {
2002 : #ifdef GDKLIBRARY_HASHASH
2003 0 : GDKfree(hashbats);
2004 : #endif
2005 0 : TRC_CRITICAL(GDK, "cannot properly prepare process %s.", BAKDIR);
2006 0 : ATOMIC_SET(&GDKdebug, dbg);
2007 0 : return rc;
2008 : }
2009 : }
2010 :
2011 336 : if (BBPcheckbats(bbpversion) != GDK_SUCCEED) {
2012 : #ifdef GDKLIBRARY_HASHASH
2013 0 : GDKfree(hashbats);
2014 : #endif
2015 0 : ATOMIC_SET(&GDKdebug, dbg);
2016 0 : return GDK_FAIL;
2017 : }
2018 :
2019 : #ifdef GDKLIBRARY_TAILN
2020 336 : char *needstrbatmove;
2021 336 : if (GDKinmemory(0)) {
2022 : needstrbatmove = NULL;
2023 : } else {
2024 335 : if ((needstrbatmove = GDKfilepath(0, BATDIR, "needstrbatmove", NULL)) == NULL) {
2025 : #ifdef GDKLIBRARY_HASHASH
2026 0 : GDKfree(hashbats);
2027 : #endif
2028 0 : ATOMIC_SET(&GDKdebug, dbg);
2029 0 : return GDK_FAIL;
2030 : }
2031 335 : if (bbpversion <= GDKLIBRARY_TAILN) {
2032 : /* create signal file that we need to rename string
2033 : * offset heaps */
2034 0 : int fd = MT_open(needstrbatmove, O_WRONLY | O_CREAT);
2035 0 : if (fd < 0) {
2036 0 : TRC_CRITICAL(GDK, "cannot create signal file needstrbatmove.\n");
2037 0 : GDKfree(needstrbatmove);
2038 : #ifdef GDKLIBRARY_HASHASH
2039 0 : GDKfree(hashbats);
2040 : #endif
2041 0 : ATOMIC_SET(&GDKdebug, dbg);
2042 0 : return GDK_FAIL;
2043 : }
2044 0 : close(fd);
2045 : } else {
2046 : /* check signal file whether we need to rename string
2047 : * offset heaps */
2048 335 : int fd = MT_open(needstrbatmove, O_RDONLY);
2049 335 : if (fd >= 0) {
2050 : /* yes, we do */
2051 0 : close(fd);
2052 335 : } else if (errno == ENOENT) {
2053 : /* no, we don't: set var to NULL */
2054 335 : GDKfree(needstrbatmove);
2055 335 : needstrbatmove = NULL;
2056 : } else {
2057 0 : GDKsyserror("unexpected error opening %s\n", needstrbatmove);
2058 0 : GDKfree(needstrbatmove);
2059 : #ifdef GDKLIBRARY_HASHASH
2060 0 : GDKfree(hashbats);
2061 : #endif
2062 0 : ATOMIC_SET(&GDKdebug, dbg);
2063 0 : return GDK_FAIL;
2064 : }
2065 : }
2066 : }
2067 : #endif
2068 :
2069 : #ifdef GDKLIBRARY_HASHASH
2070 336 : if (nhashbats > 0)
2071 0 : res = fixhashash(hashbats, nhashbats);
2072 336 : GDKfree(hashbats);
2073 336 : if (res != GDK_SUCCEED)
2074 : return res;
2075 : #endif
2076 :
2077 : #ifdef GDKLIBRARY_JSON
2078 336 : if (bbpversion <= GDKLIBRARY_JSON) {
2079 8 : char *jsonupgradestr;
2080 8 : if (GDKinmemory(0)) {
2081 344 : jsonupgradestr = NULL;
2082 : } else {
2083 8 : if ((jsonupgradestr = GDKfilepath(0, BATDIR, "jsonupgradeneeded", NULL)) == NULL) {
2084 0 : TRC_CRITICAL(GDK, "GDKfilepath failed\n");
2085 0 : ATOMIC_SET(&GDKdebug, dbg);
2086 0 : return GDK_FAIL;
2087 : }
2088 :
2089 : /* create signal file that we need to upgrade
2090 : * stored json strings. This will be performed
2091 : * by an upgrade function in the GDK that will
2092 : * be called at the end of the json module
2093 : * initialzation with a callback that actually
2094 : * knows how to perform the upgrade. */
2095 8 : int fd = MT_open(jsonupgradestr, O_WRONLY | O_CREAT);
2096 8 : GDKfree(jsonupgradestr);
2097 8 : if (fd < 0) {
2098 0 : TRC_CRITICAL(GDK, "cannot create signal file jsonupgradeneeded");
2099 0 : ATOMIC_SET(&GDKdebug, dbg);
2100 0 : return GDK_FAIL;
2101 : }
2102 :
2103 8 : close(fd);
2104 : }
2105 : }
2106 : #endif
2107 :
2108 8 : if (bbpversion < GDKLIBRARY && TMcommit() != GDK_SUCCEED) {
2109 0 : TRC_CRITICAL(GDK, "TMcommit failed\n");
2110 0 : ATOMIC_SET(&GDKdebug, dbg);
2111 0 : return GDK_FAIL;
2112 : }
2113 :
2114 : #ifdef GDKLIBRARY_TAILN
2115 : /* we rename the offset heaps after the above commit: in this
2116 : * version we accept both the old and new names, but we want to
2117 : * convert so that future versions only have the new name */
2118 336 : if (needstrbatmove) {
2119 : /* note, if renaming fails, nothing is lost: a next
2120 : * invocation will just try again; an older version of
2121 : * mserver will not work because of the TMcommit
2122 : * above */
2123 0 : if (movestrbats() != GDK_SUCCEED) {
2124 0 : GDKfree(needstrbatmove);
2125 0 : ATOMIC_SET(&GDKdebug, dbg);
2126 0 : return GDK_FAIL;
2127 : }
2128 0 : MT_remove(needstrbatmove);
2129 0 : GDKfree(needstrbatmove);
2130 0 : needstrbatmove = NULL;
2131 : }
2132 : #endif
2133 336 : ATOMIC_SET(&GDKdebug, dbg);
2134 :
2135 : /* cleanup any leftovers (must be done after BBPrecover) */
2136 1330 : for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) {
2137 : int j;
2138 1340 : for (j = 0; j < i; j++) {
2139 : /* don't clean a directory twice */
2140 832 : if (BBPfarms[j].dirname &&
2141 832 : strcmp(BBPfarms[i].dirname,
2142 : BBPfarms[j].dirname) == 0)
2143 : break;
2144 : }
2145 994 : if (j == i) {
2146 508 : char *d = GDKfilepath(i, NULL, BATDIR, NULL);
2147 508 : if (d == NULL) {
2148 : return GDK_FAIL;
2149 : }
2150 508 : BBPdiskscan(d, strlen(d) - strlen(BATDIR));
2151 508 : GDKfree(d);
2152 : }
2153 : }
2154 :
2155 336 : if (MT_create_thread(&manager, BBPmanager, NULL, MT_THR_DETACHED, "BBPmanager") < 0) {
2156 0 : TRC_CRITICAL(GDK, "Could not start BBPmanager thread.");
2157 0 : return GDK_FAIL;
2158 : }
2159 : return GDK_SUCCEED;
2160 :
2161 0 : bailout:
2162 : /* now it is time for real panic */
2163 0 : TRC_CRITICAL(GDK, "could not write %s%cBBP.dir.", BATDIR, DIR_SEP);
2164 0 : return GDK_FAIL;
2165 : }
2166 :
2167 : /*
2168 : * During the exit phase all non-persistent BATs are removed. Upon
2169 : * exit the status of the BBP tables is saved on disk. This function
2170 : * is called once and during the shutdown of the server. Since
2171 : * shutdown may be issued from any thread (dangerous) it may lead to
2172 : * interference in a parallel session.
2173 : */
2174 :
2175 : static int backup_files = 0, backup_dir = 0, backup_subdir = 0;
2176 : static char *lockfile = NULL;
2177 :
2178 : void
2179 334 : BBPexit(void)
2180 : {
2181 334 : bat i;
2182 334 : bool skipped;
2183 :
2184 : //BBPlock(); /* stop all threads ever touching more descriptors */
2185 :
2186 : /* free all memory (just for leak-checking in Purify) */
2187 334 : do {
2188 334 : skipped = false;
2189 546575 : for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
2190 546241 : if (BBPvalid(i)) {
2191 438278 : BAT *b = BBP_desc(i);
2192 :
2193 438278 : if (b) {
2194 438278 : if (BATshared(b)) {
2195 0 : skipped = true;
2196 0 : continue;
2197 : }
2198 438278 : MT_lock_set(&b->theaplock);
2199 438278 : bat tp = VIEWtparent(b);
2200 0 : if (tp != 0) {
2201 0 : --BBP_lrefs(tp);
2202 0 : HEAPdecref(b->theap, false);
2203 0 : b->theap = NULL;
2204 : }
2205 438278 : tp = VIEWvtparent(b);
2206 0 : if (tp != 0) {
2207 0 : --BBP_lrefs(tp);
2208 0 : HEAPdecref(b->tvheap, false);
2209 0 : b->tvheap = NULL;
2210 : }
2211 438278 : if (b->oldtail) {
2212 7 : Heap *h = b->oldtail;
2213 7 : b->oldtail = NULL;
2214 7 : ATOMIC_AND(&h->refs, ~DELAYEDREMOVE);
2215 7 : HEAPdecref(h, false);
2216 : }
2217 438278 : PROPdestroy_nolock(b);
2218 438278 : MT_lock_unset(&b->theaplock);
2219 438278 : BATfree(b);
2220 : }
2221 438278 : BBP_pid(i) = 0;
2222 438278 : BBPuncacheit(i, true);
2223 438278 : if (BBP_logical(i) != BBP_bak(i))
2224 11875 : GDKfree(BBP_logical(i));
2225 438278 : BBP_logical(i) = NULL;
2226 : }
2227 : }
2228 334 : } while (skipped);
2229 : /* these need to be NULL, otherwise no new ones get created */
2230 334 : memset(BBP_hash, 0, sizeof(BBP_hash));
2231 334 : backup_files = 0;
2232 334 : backup_dir = 0;
2233 334 : backup_subdir = 0;
2234 334 : if (lockfile) {
2235 333 : GDKfree(lockfile);
2236 333 : lockfile = NULL;
2237 : }
2238 334 : }
2239 :
2240 : /*
2241 : * The routine BBPdir creates the BAT pool dictionary file. It
2242 : * includes some information about the current state of affair in the
2243 : * pool. The location in the buffer pool is saved for later use as
2244 : * well. This is merely done for ease of debugging and of no
2245 : * importance to front-ends. The tail of non-used entries is
2246 : * reclaimed as well.
2247 : */
2248 : static inline int
2249 1625486 : heap_entry(FILE *fp, BATiter *bi, BUN size)
2250 : {
2251 1625486 : size_t free = bi->hfree;
2252 1625486 : if (size < BUN_NONE) {
2253 1625486 : if ((bi->type >= 0 && ATOMstorage(bi->type) == TYPE_msk))
2254 254952 : free = ((size + 31) / 32) * 4;
2255 1370534 : else if (bi->width > 0)
2256 1370534 : free = size << bi->shift;
2257 : else
2258 : free = 0;
2259 : }
2260 :
2261 5196101 : return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
2262 : BUNFMT " " OIDFMT " %zu %" PRIu64" %" PRIu64,
2263 1625486 : bi->type >= 0 ? BATatoms[bi->type].name : ATOMunknown_name(bi->type),
2264 1625486 : bi->width,
2265 1625486 : bi->type == TYPE_void || bi->vh != NULL,
2266 1625486 : (unsigned short) bi->sorted |
2267 1625486 : ((unsigned short) bi->revsorted << 7) |
2268 3250972 : ((unsigned short) bi->key << 8) |
2269 1625486 : ((unsigned short) BATtdensebi(bi) << 9) |
2270 1625486 : ((unsigned short) bi->nonil << 10) |
2271 1625486 : ((unsigned short) bi->nil << 11),
2272 971940 : bi->nokey[0] >= size || bi->nokey[1] >= size ? 0 : bi->nokey[0],
2273 1625486 : bi->nokey[0] >= size || bi->nokey[1] >= size ? 0 : bi->nokey[1],
2274 1625486 : bi->nosorted >= size ? 0 : bi->nosorted,
2275 1625486 : bi->norevsorted >= size ? 0 : bi->norevsorted,
2276 : bi->tseq,
2277 : free,
2278 1625486 : bi->minpos < size ? (uint64_t) bi->minpos : (uint64_t) oid_nil,
2279 1625486 : bi->maxpos < size ? (uint64_t) bi->maxpos : (uint64_t) oid_nil);
2280 : }
2281 :
2282 : static inline int
2283 1625486 : vheap_entry(FILE *fp, BATiter *bi, BUN size)
2284 : {
2285 1625486 : (void) size;
2286 1625486 : if (bi->vh == NULL)
2287 : return 0;
2288 360971 : return fprintf(fp, " %zu", size == 0 ? 0 : bi->vhfree);
2289 : }
2290 :
2291 : static gdk_return
2292 1625486 : new_bbpentry(FILE *fp, bat i, BUN size, BATiter *bi)
2293 : {
2294 : #ifndef NDEBUG
2295 1625486 : assert(i > 0);
2296 1625486 : assert(i < (bat) ATOMIC_GET(&BBPsize));
2297 1625486 : assert(bi->b);
2298 1625486 : assert(bi->b->batCacheid == i);
2299 1625486 : assert(bi->b->batRole == PERSISTENT);
2300 1625486 : assert(0 <= bi->h->farmid && bi->h->farmid < MAXFARMS);
2301 1625486 : assert(BBPfarms[bi->h->farmid].roles & (1U << PERSISTENT));
2302 1625486 : if (bi->vh) {
2303 360971 : assert(0 <= bi->vh->farmid && bi->vh->farmid < MAXFARMS);
2304 360971 : assert(BBPfarms[bi->vh->farmid].roles & (1U << PERSISTENT));
2305 : }
2306 1625486 : assert(size <= bi->count || size == BUN_NONE);
2307 1625486 : assert(BBP_options(i) == NULL || strpbrk(BBP_options(i), "\r\n") == NULL);
2308 : #endif
2309 :
2310 1625486 : if (BBP_options(i) != NULL && strpbrk(BBP_options(i), "\r\n") != NULL) {
2311 0 : GDKerror("options for bat %d contains a newline\n", i);
2312 0 : return GDK_FAIL;
2313 : }
2314 1625486 : if (size > bi->count)
2315 : size = bi->count;
2316 1625486 : if (fprintf(fp, "%d %u %s %s %d " BUNFMT " " OIDFMT,
2317 : /* BAT info */
2318 : (int) i,
2319 1625486 : BBP_status(i) & BBPPERSISTENT,
2320 : BBP_logical(i),
2321 1625486 : BBP_physical(i),
2322 1625486 : (unsigned) bi->restricted << 1,
2323 : size,
2324 1625486 : bi->b->hseqbase) < 0 ||
2325 3250972 : heap_entry(fp, bi, size) < 0 ||
2326 1625486 : vheap_entry(fp, bi, size) < 0 ||
2327 3250972 : (BBP_options(i) && fprintf(fp, " %s", BBP_options(i)) < 0) ||
2328 1625486 : fprintf(fp, "\n") < 0) {
2329 0 : GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n");
2330 0 : return GDK_FAIL;
2331 : }
2332 :
2333 : return GDK_SUCCEED;
2334 : }
2335 :
2336 : static gdk_return
2337 12156 : BBPdir_header(FILE *f, int n, lng logno, lng transid)
2338 : {
2339 12156 : if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\nBBPinfo=" LLFMT " " LLFMT "\n",
2340 : GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID,
2341 : #ifdef HAVE_HGE
2342 : SIZEOF_HGE
2343 : #else
2344 : SIZEOF_LNG
2345 : #endif
2346 12156 : , n, logno, transid) < 0 ||
2347 12156 : ferror(f)) {
2348 0 : GDKsyserror("Writing BBP.dir header failed\n");
2349 0 : return GDK_FAIL;
2350 : }
2351 : return GDK_SUCCEED;
2352 : }
2353 :
2354 : static gdk_return
2355 12156 : BBPdir_first(bool subcommit, lng logno, lng transid,
2356 : FILE **obbpfp, FILE **nbbpfp)
2357 : {
2358 12156 : FILE *obbpf = NULL, *nbbpf = NULL;
2359 12156 : int n = 0;
2360 12156 : lng ologno, otransid;
2361 :
2362 12156 : if (obbpfp)
2363 11933 : *obbpfp = NULL;
2364 12156 : *nbbpfp = NULL;
2365 :
2366 12156 : if ((nbbpf = GDKfilelocate(0, "BBP", "w", "dir")) == NULL) {
2367 : return GDK_FAIL;
2368 : }
2369 :
2370 12156 : if (subcommit) {
2371 11925 : char buf[512];
2372 :
2373 11925 : assert(obbpfp != NULL);
2374 : /* we need to copy the backup BBP.dir to the new, but
2375 : * replacing the entries for the subcommitted bats */
2376 11925 : if ((obbpf = GDKfileopen(0, SUBDIR, "BBP", "dir", "r")) == NULL &&
2377 0 : (obbpf = GDKfileopen(0, BAKDIR, "BBP", "dir", "r")) == NULL) {
2378 0 : GDKsyserror("subcommit attempted without backup BBP.dir");
2379 0 : goto bailout;
2380 : }
2381 : /* read first three lines */
2382 23850 : if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */
2383 23850 : fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */
2384 11925 : fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */
2385 0 : GDKerror("subcommit attempted with invalid backup BBP.dir.");
2386 0 : goto bailout;
2387 : }
2388 : /* third line contains BBPsize */
2389 11925 : if (sscanf(buf, "BBPsize=%d", &n) != 1) {
2390 0 : GDKerror("cannot read BBPsize in backup BBP.dir.");
2391 0 : goto bailout;
2392 : }
2393 : /* fourth line contains BBPinfo */
2394 11925 : if (fgets(buf, sizeof(buf), obbpf) == NULL ||
2395 11925 : sscanf(buf, "BBPinfo=" LLSCN " " LLSCN, &ologno, &otransid) != 2) {
2396 0 : GDKerror("cannot read BBPinfo in backup BBP.dir.");
2397 0 : goto bailout;
2398 : }
2399 : }
2400 :
2401 12156 : if (n < (bat) ATOMIC_GET(&BBPsize))
2402 3193 : n = (bat) ATOMIC_GET(&BBPsize);
2403 :
2404 12156 : TRC_DEBUG(IO_, "writing BBP.dir (%d bats).\n", n);
2405 :
2406 12156 : if (BBPdir_header(nbbpf, n, logno, transid) != GDK_SUCCEED) {
2407 0 : goto bailout;
2408 : }
2409 :
2410 12156 : if (obbpfp)
2411 11933 : *obbpfp = obbpf;
2412 12156 : *nbbpfp = nbbpf;
2413 :
2414 12156 : return GDK_SUCCEED;
2415 :
2416 0 : bailout:
2417 0 : if (obbpf != NULL)
2418 0 : fclose(obbpf);
2419 0 : if (nbbpf != NULL)
2420 0 : fclose(nbbpf);
2421 0 : return GDK_FAIL;
2422 : }
2423 :
2424 : static bat
2425 1816975 : BBPdir_step(bat bid, BUN size, int n, char *buf, size_t bufsize,
2426 : FILE **obbpfp, FILE *nbbpf, BATiter *bi)
2427 : {
2428 1816975 : if (n < -1) /* safety catch */
2429 : return n;
2430 4842570 : while (n >= 0 && n < bid) {
2431 3025595 : if (n > 0) {
2432 1458797 : if (fputs(buf, nbbpf) == EOF) {
2433 0 : GDKerror("Writing BBP.dir file failed.\n");
2434 0 : goto bailout;
2435 : }
2436 : }
2437 3025595 : if (fgets(buf, (int) bufsize, *obbpfp) == NULL) {
2438 3280 : if (ferror(*obbpfp)) {
2439 0 : GDKerror("error reading backup BBP.dir.");
2440 0 : goto bailout;
2441 : }
2442 3280 : n = -1;
2443 3280 : if (fclose(*obbpfp) == EOF) {
2444 0 : GDKsyserror("Closing backup BBP.dir file failed\n");
2445 0 : GDKclrerr(); /* ignore error */
2446 : }
2447 3280 : *obbpfp = NULL;
2448 : } else {
2449 3022315 : if (sscanf(buf, "%d", &n) != 1 || n <= 0 || n >= N_BBPINIT * BBPINIT) {
2450 0 : GDKerror("subcommit attempted with invalid backup BBP.dir.");
2451 0 : goto bailout;
2452 : }
2453 : }
2454 : }
2455 1816975 : if (BBP_status(bid) & BBPPERSISTENT) {
2456 1625486 : if (new_bbpentry(nbbpf, bid, size, bi) != GDK_SUCCEED)
2457 0 : goto bailout;
2458 : }
2459 1816975 : return n == -1 ? -1 : n == bid ? 0 : n;
2460 :
2461 0 : bailout:
2462 0 : if (*obbpfp)
2463 0 : fclose(*obbpfp);
2464 0 : fclose(nbbpf);
2465 0 : return -2;
2466 : }
2467 :
2468 : static gdk_return
2469 12156 : BBPdir_last(int n, char *buf, size_t bufsize, FILE *obbpf, FILE *nbbpf)
2470 : {
2471 12156 : if (n > 0 && fputs(buf, nbbpf) == EOF) {
2472 0 : GDKerror("Writing BBP.dir file failed.\n");
2473 0 : goto bailout;
2474 : }
2475 278764 : while (obbpf) {
2476 275253 : if (fgets(buf, (int) bufsize, obbpf) == NULL) {
2477 8645 : if (ferror(obbpf)) {
2478 0 : GDKerror("error reading backup BBP.dir.");
2479 0 : goto bailout;
2480 : }
2481 8645 : if (fclose(obbpf) == EOF) {
2482 0 : GDKsyserror("Closing backup BBP.dir file failed\n");
2483 0 : GDKclrerr(); /* ignore error */
2484 : }
2485 : obbpf = NULL;
2486 : } else {
2487 266608 : if (fputs(buf, nbbpf) == EOF) {
2488 0 : GDKerror("Writing BBP.dir file failed.\n");
2489 0 : goto bailout;
2490 : }
2491 : }
2492 : }
2493 12156 : if (fflush(nbbpf) == EOF ||
2494 12156 : (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
2495 : #if defined(NATIVE_WIN32)
2496 : && _commit(_fileno(nbbpf)) < 0
2497 : #elif defined(HAVE_FDATASYNC)
2498 10 : && fdatasync(fileno(nbbpf)) < 0
2499 : #elif defined(HAVE_FSYNC)
2500 : && fsync(fileno(nbbpf)) < 0
2501 : #endif
2502 : )) {
2503 0 : GDKsyserror("Syncing BBP.dir file failed\n");
2504 0 : goto bailout;
2505 : }
2506 12156 : if (fclose(nbbpf) == EOF) {
2507 0 : GDKsyserror("Closing BBP.dir file failed\n");
2508 0 : nbbpf = NULL; /* can't close again */
2509 0 : goto bailout;
2510 : }
2511 :
2512 12156 : TRC_DEBUG(IO_, "end\n");
2513 :
2514 : return GDK_SUCCEED;
2515 :
2516 0 : bailout:
2517 0 : if (obbpf != NULL)
2518 0 : fclose(obbpf);
2519 0 : if (nbbpf != NULL)
2520 0 : fclose(nbbpf);
2521 : return GDK_FAIL;
2522 : }
2523 :
2524 : gdk_return
2525 223 : BBPdir_init(void)
2526 : {
2527 223 : FILE *fp;
2528 223 : gdk_return rc;
2529 :
2530 223 : rc = BBPdir_first(false, 0, 0, NULL, &fp);
2531 223 : if (rc == GDK_SUCCEED)
2532 223 : rc = BBPdir_last(-1, NULL, 0, NULL, fp);
2533 223 : return rc;
2534 : }
2535 :
2536 : /* function used for debugging */
2537 : void
2538 0 : BBPdump(void)
2539 : {
2540 0 : size_t mem = 0, vm = 0;
2541 0 : int n = 0;
2542 :
2543 0 : for (bat i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
2544 0 : if (BBP_refs(i) == 0 && BBP_lrefs(i) == 0)
2545 0 : continue;
2546 0 : BAT *b = BBP_desc(i);
2547 0 : unsigned status = BBP_status(i);
2548 0 : printf("# %d: " ALGOOPTBATFMT " refs=%d lrefs=%d status=%u%s",
2549 : i,
2550 0 : ALGOOPTBATPAR(b),
2551 : BBP_refs(i),
2552 : BBP_lrefs(i),
2553 : status,
2554 0 : BBP_cache(i) ? "" : " not cached");
2555 0 : if (b == NULL) {
2556 0 : printf(", no descriptor\n");
2557 0 : continue;
2558 : }
2559 0 : if (b->theap) {
2560 0 : if (b->theap->parentid != b->batCacheid) {
2561 0 : printf(" Theap -> %d", b->theap->parentid);
2562 : } else {
2563 0 : printf(" Theap=[%zu,%zu,f=%d]%s%s",
2564 : b->theap->free,
2565 : b->theap->size,
2566 0 : b->theap->farmid,
2567 0 : b->theap->base == NULL ? "X" : b->theap->storage == STORE_MMAP ? "M" : "",
2568 0 : status & BBPSWAPPED ? "(Swapped)" : b->theap->dirty ? "(Dirty)" : "");
2569 0 : mem += HEAPmemsize(b->theap);
2570 0 : vm += HEAPvmsize(b->theap);
2571 0 : n++;
2572 : }
2573 : }
2574 0 : if (b->tvheap) {
2575 0 : if (b->tvheap->parentid != b->batCacheid) {
2576 0 : printf(" Tvheap -> %d",
2577 : b->tvheap->parentid);
2578 : } else {
2579 0 : printf(" Tvheap=[%zu,%zu,f=%d]%s%s",
2580 : b->tvheap->free,
2581 : b->tvheap->size,
2582 0 : b->tvheap->farmid,
2583 0 : b->tvheap->base == NULL ? "X" : b->tvheap->storage == STORE_MMAP ? "M" : "",
2584 0 : b->tvheap->dirty ? "(Dirty)" : "");
2585 0 : mem += HEAPmemsize(b->tvheap);
2586 0 : vm += HEAPvmsize(b->tvheap);
2587 : }
2588 : }
2589 0 : if (MT_rwlock_rdtry(&b->thashlock)) {
2590 0 : if (b->thash && b->thash != (Hash *) 1) {
2591 0 : size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt);
2592 0 : size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt);
2593 0 : printf(" Thash=[%zu,%zu,f=%d/%d]", m, v,
2594 0 : b->thash->heaplink.farmid,
2595 0 : b->thash->heapbckt.farmid);
2596 0 : mem += m;
2597 0 : vm += v;
2598 : }
2599 0 : MT_rwlock_rdunlock(&b->thashlock);
2600 : }
2601 0 : printf(" role: %s\n",
2602 0 : b->batRole == PERSISTENT ? "persistent" : "transient");
2603 : }
2604 0 : printf("# %d bats: mem=%zu, vm=%zu\n", n, mem, vm);
2605 0 : fflush(stdout);
2606 0 : }
2607 :
2608 : /*
2609 : * @+ BBP Readonly Interface
2610 : *
2611 : * These interface functions do not change the BBP tables. If they
2612 : * only access one specific BAT, the caller must have ensured that no
2613 : * other thread is modifying that BAT, therefore such functions do not
2614 : * need locking.
2615 : *
2616 : * BBP index lookup by BAT name:
2617 : */
2618 : static inline bat
2619 39965 : BBP_find(const char *nme, bool lock)
2620 : {
2621 39965 : bat i = BBPnamecheck(nme);
2622 :
2623 12278 : if (i != 0) {
2624 : /* for tmp_X BATs, we already know X */
2625 12278 : const char *s;
2626 :
2627 12278 : if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) {
2628 12278 : i = 0;
2629 : }
2630 27687 : } else if (*nme != '.') {
2631 : /* must lock since hash-lookup traverses other BATs */
2632 27687 : if (lock)
2633 1490 : MT_lock_set(&BBPnameLock);
2634 28043 : for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) {
2635 1144 : if (strcmp(BBP_logical(i), nme) == 0)
2636 : break;
2637 : }
2638 27687 : if (lock)
2639 1490 : MT_lock_unset(&BBPnameLock);
2640 : }
2641 39965 : return i;
2642 : }
2643 :
2644 : bat
2645 1490 : BBPindex(const char *nme)
2646 : {
2647 1490 : return BBP_find(nme, true);
2648 : }
2649 :
2650 : /*
2651 : * @+ BBP Update Interface
2652 : * Operations to insert, delete, clear, and modify BBP entries.
2653 : * Our policy for the BBP is to provide unlocked BBP access for
2654 : * speed, but still write operations have to be locked.
2655 : * #ifdef DEBUG_THREADLOCAL_BATS
2656 : * Create the shadow version (reversed) of a bat.
2657 : *
2658 : * An existing BAT is inserted into the BBP
2659 : */
2660 : static inline str
2661 691551 : BBPsubdir_recursive(str s, bat i)
2662 : {
2663 691551 : i >>= 6;
2664 691551 : if (i >= 0100) {
2665 185170 : s = BBPsubdir_recursive(s, i);
2666 185165 : *s++ = DIR_SEP;
2667 : }
2668 691546 : i &= 077;
2669 691546 : *s++ = '0' + (i >> 3);
2670 691546 : *s++ = '0' + (i & 7);
2671 691546 : return s;
2672 : }
2673 :
2674 : static inline void
2675 540143 : BBPgetsubdir(str s, bat i)
2676 : {
2677 540143 : if (i >= 0100) {
2678 506388 : s = BBPsubdir_recursive(s, i);
2679 : }
2680 540143 : *s = 0;
2681 540143 : }
2682 :
2683 : /* The free list is empty. We create a new entry by either just
2684 : * increasing BBPsize (up to BBPlimit) or extending the BBP (which
2685 : * increases BBPlimit).
2686 : *
2687 : * Note that this is the only place in normal, multi-threaded operation
2688 : * where BBPsize is assigned a value (never decreasing) and that the
2689 : * assignment happens after any necessary memory was allocated and
2690 : * initialized. */
2691 : static gdk_return
2692 47814 : maybeextend(void)
2693 : {
2694 47814 : bat size = (bat) ATOMIC_GET(&BBPsize);
2695 47821 : if (size + BBP_FREE_LOWATER > BBPlimit &&
2696 7 : BBPextend(size + BBP_FREE_LOWATER) != GDK_SUCCEED) {
2697 : /* nothing available */
2698 : return GDK_FAIL;
2699 : }
2700 47814 : ATOMIC_SET(&BBPsize, size + BBP_FREE_LOWATER);
2701 47814 : assert(BBP_free == 0);
2702 47814 : BBP_free = size;
2703 478140 : for (int i = 1; i < BBP_FREE_LOWATER; i++) {
2704 430326 : bat sz = size;
2705 430326 : BBP_next(sz) = ++size;
2706 : }
2707 47814 : BBP_next(size) = 0;
2708 47814 : BBP_nfree += BBP_FREE_LOWATER;
2709 47814 : return GDK_SUCCEED;
2710 : }
2711 :
2712 : /* return new BAT id (> 0); return 0 on failure */
2713 : bat
2714 15695953 : BBPinsert(BAT *bn)
2715 : {
2716 15695953 : MT_Id pid = MT_getpid();
2717 15693928 : bool lock = locked_by == 0 || locked_by != pid;
2718 15693928 : char dirname[24];
2719 15693928 : bat i;
2720 15693928 : int len = 0;
2721 15693928 : struct freebats *t = MT_thread_getfreebats();
2722 :
2723 15692428 : if (t->freebats == 0) {
2724 : /* critical section: get a new BBP entry */
2725 192975 : assert(t->nfreebats == 0);
2726 192975 : if (lock) {
2727 192975 : MT_lock_set(&GDKcacheLock);
2728 : }
2729 :
2730 : /* get a global bat, perhaps extend */
2731 192983 : if (BBP_free <= 0) {
2732 : /* we need to extend the BBP */
2733 47814 : gdk_return r;
2734 47814 : r = maybeextend();
2735 47814 : if (r != GDK_SUCCEED) {
2736 0 : if (lock) {
2737 0 : MT_lock_unset(&GDKcacheLock);
2738 : }
2739 : /* failed */
2740 0 : return 0;
2741 : }
2742 : }
2743 192983 : t->freebats = i = BBP_free;
2744 192983 : bat l = 0;
2745 2089626 : for (int x = 0; x < BBP_FREE_LOWATER && i; x++) {
2746 1896643 : assert(BBP_next(i) == 0 || BBP_next(i) > i);
2747 1896643 : t->nfreebats++;
2748 1896643 : BBP_nfree--;
2749 1896643 : l = i;
2750 1896643 : i = BBP_next(i);
2751 : }
2752 192983 : BBP_next(l) = 0;
2753 192983 : BBP_free = i;
2754 :
2755 192983 : if (lock) {
2756 192983 : MT_lock_unset(&GDKcacheLock);
2757 : }
2758 : /* rest of the work outside the lock */
2759 : }
2760 15692436 : if (t->nfreebats > 0) {
2761 15692436 : assert(t->freebats > 0);
2762 15692436 : i = t->freebats;
2763 15692436 : t->freebats = BBP_next(i);
2764 15692436 : assert(t->freebats == 0 || t->freebats > i);
2765 15692436 : BBP_next(i) = 0;
2766 15692436 : t->nfreebats--;
2767 : } else {
2768 0 : assert(t->nfreebats == 0);
2769 0 : assert(t->freebats == 0);
2770 : return 0;
2771 : }
2772 :
2773 : /* fill in basic BBP fields for the new bat */
2774 :
2775 15692436 : bn->batCacheid = i;
2776 15692436 : bn->creator_tid = pid;
2777 :
2778 15692436 : MT_lock_set(&GDKswapLock(i));
2779 15687069 : BBP_status_set(i, BBPDELETING|BBPHOT);
2780 15687069 : BBP_cache(i) = NULL;
2781 15687069 : BBP_desc(i) = bn;
2782 15687069 : BBP_refs(i) = 1; /* new bats have 1 pin */
2783 15687069 : BBP_lrefs(i) = 0; /* ie. no logical refs */
2784 15687069 : BBP_pid(i) = pid;
2785 15687069 : MT_lock_unset(&GDKswapLock(i));
2786 :
2787 15695252 : if (*BBP_bak(i) == 0)
2788 501990 : len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o", (unsigned) i);
2789 15695252 : if (len == -1 || len >= FILENAME_MAX) {
2790 0 : GDKerror("impossible error\n");
2791 0 : return 0;
2792 : }
2793 15695252 : BBP_logical(i) = BBP_bak(i);
2794 :
2795 : /* Keep the physical location around forever */
2796 15695252 : if (!GDKinmemory(0) && *BBP_physical(i) == 0) {
2797 501612 : BBPgetsubdir(dirname, i);
2798 :
2799 501681 : if (*dirname) /* i.e., i >= 0100 */
2800 483544 : len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
2801 : "%s%c%o", dirname, DIR_SEP, (unsigned) i);
2802 : else
2803 18137 : len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
2804 : "%o", (unsigned) i);
2805 501681 : if (len == -1 || len >= FILENAME_MAX)
2806 : return 0;
2807 :
2808 501682 : TRC_DEBUG(BAT_, "%d = new %s(%s)\n", (int) i, BBP_logical(i), ATOMname(bn->ttype));
2809 : }
2810 :
2811 : return i;
2812 : }
2813 :
2814 : gdk_return
2815 15714239 : BBPcacheit(BAT *bn, bool lock)
2816 : {
2817 15714239 : bat i = bn->batCacheid;
2818 15714239 : unsigned mode;
2819 :
2820 15714239 : if (lock)
2821 31388150 : lock = locked_by == 0 || locked_by != MT_getpid();
2822 :
2823 15714239 : assert(i > 0);
2824 :
2825 15714239 : if (lock)
2826 15694086 : MT_lock_set(&GDKswapLock(i));
2827 15705620 : mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING | BBPSWAPPED);
2828 :
2829 : /* cache it! */
2830 15705620 : BBP_cache(i) = bn;
2831 :
2832 15705620 : BBP_status_set(i, mode);
2833 :
2834 15705620 : if (lock)
2835 15690041 : MT_lock_unset(&GDKswapLock(i));
2836 15711459 : return GDK_SUCCEED;
2837 : }
2838 :
2839 : /*
2840 : * BBPuncacheit changes the BBP status to swapped out. Currently only
2841 : * used in BBPfree (bat swapped out) and BBPclear (bat destroyed
2842 : * forever).
2843 : */
2844 :
2845 : static void
2846 15734249 : BBPuncacheit(bat i, bool unloaddesc)
2847 : {
2848 15734249 : if (i < 0)
2849 : i = -i;
2850 15734249 : if (BBPcheck(i)) {
2851 15733276 : BAT *b = BBP_desc(i);
2852 :
2853 15733276 : assert(unloaddesc || BBP_refs(i) == 0);
2854 :
2855 15733276 : if (b) {
2856 15733276 : if (BBP_cache(i)) {
2857 15711448 : TRC_DEBUG(BAT_, "uncache %d (%s)\n", (int) i, BBP_logical(i));
2858 :
2859 : /* clearing bits can be done without the lock */
2860 15711448 : BBP_status_off(i, BBPLOADED);
2861 :
2862 15711448 : BBP_cache(i) = NULL;
2863 : }
2864 15733276 : if (unloaddesc) {
2865 15723298 : BBP_desc(i) = NULL;
2866 15723298 : BATdestroy(b);
2867 : }
2868 : }
2869 : }
2870 15736449 : }
2871 :
2872 : /*
2873 : * @- BBPclear
2874 : * BBPclear removes a BAT from the BBP directory forever.
2875 : */
2876 : static inline void
2877 68898 : BBPhandover(struct freebats *t, uint32_t n)
2878 : {
2879 68898 : bat *p, bid;
2880 : /* take one bat from our private free list and hand it over to
2881 : * the global free list */
2882 68898 : if (n >= t->nfreebats) {
2883 45387 : bid = t->freebats;
2884 45387 : t->freebats = 0;
2885 45387 : BBP_nfree += t->nfreebats;
2886 45387 : t->nfreebats = 0;
2887 : } else {
2888 23511 : p = &t->freebats;
2889 258621 : for (uint32_t i = n; i < t->nfreebats; i++)
2890 235110 : p = &BBP_next(*p);
2891 23511 : bid = *p;
2892 23511 : *p = 0;
2893 23511 : BBP_nfree += n;
2894 23511 : t->nfreebats -= n;
2895 : }
2896 : p = &BBP_free;
2897 1549523 : while (bid != 0) {
2898 7851921 : while (*p && *p < bid)
2899 6371296 : p = &BBP_next(*p);
2900 1480625 : bat i = BBP_next(bid);
2901 1480625 : BBP_next(bid) = *p;
2902 1480625 : *p = bid;
2903 1480625 : bid = i;
2904 : }
2905 68898 : }
2906 :
2907 : #ifndef NDEBUG
2908 : extern void printlist(bat bid) __attribute__((__cold__));
2909 : /* print a bat free list, pass start of free list as argument
2910 : * to be used from the debugger */
2911 : void
2912 0 : printlist(bat bid)
2913 : {
2914 0 : int n = 0;
2915 0 : while (bid) {
2916 0 : printf("%d ", bid);
2917 0 : bid = BBP_next(bid);
2918 0 : n++;
2919 : }
2920 0 : printf("(%d)\n", n);
2921 0 : }
2922 : #endif
2923 :
2924 : static inline void
2925 15284740 : bbpclear(bat i, bool lock)
2926 : {
2927 15284740 : struct freebats *t = MT_thread_getfreebats();
2928 :
2929 15282929 : TRC_DEBUG(BAT_, "clear %d (%s)\n", (int) i, BBP_logical(i));
2930 15282929 : BBPuncacheit(i, true);
2931 15292605 : TRC_DEBUG(BAT_, "set to unloading %d\n", i);
2932 15292605 : if (lock) {
2933 15287945 : MT_lock_set(&GDKswapLock(i));
2934 : }
2935 :
2936 15288387 : BBP_status_set(i, BBPUNLOADING);
2937 15288387 : BBP_refs(i) = 0;
2938 15288387 : BBP_lrefs(i) = 0;
2939 15288387 : if (lock)
2940 15287466 : MT_lock_unset(&GDKswapLock(i));
2941 15287939 : if (!BBPtmpcheck(BBP_logical(i))) {
2942 2682 : MT_lock_set(&BBPnameLock);
2943 2682 : BBP_delete(i);
2944 2682 : MT_lock_unset(&BBPnameLock);
2945 : }
2946 15287939 : if (BBP_logical(i) != BBP_bak(i))
2947 2682 : GDKfree(BBP_logical(i));
2948 15287775 : BBP_status_set(i, 0);
2949 15287775 : BBP_logical(i) = NULL;
2950 15287775 : bat *p;
2951 47597817 : for (p = &t->freebats; *p && *p < i; p = &BBP_next(*p))
2952 : ;
2953 15287775 : BBP_next(i) = *p;
2954 15287775 : *p = i;
2955 15287775 : t->nfreebats++;
2956 15287775 : BBP_pid(i) = ~(MT_Id)0; /* not zero, not a valid thread id */
2957 15287775 : if (t->nfreebats > BBP_FREE_HIWATER) {
2958 23511 : if (lock)
2959 23511 : MT_lock_set(&GDKcacheLock);
2960 23511 : BBPhandover(t, t->nfreebats - BBP_FREE_LOWATER);
2961 23511 : if (lock)
2962 23511 : MT_lock_unset(&GDKcacheLock);
2963 : }
2964 15287775 : }
2965 :
2966 : void
2967 15286837 : BBPclear(bat i)
2968 : {
2969 15286837 : if (BBPcheck(i)) {
2970 15284812 : bool lock = locked_by == 0 || locked_by != MT_getpid();
2971 15284812 : bbpclear(i, lock);
2972 : }
2973 15286173 : }
2974 :
2975 : void
2976 49789 : BBPrelinquishbats(void)
2977 : {
2978 49789 : struct freebats *t = MT_thread_getfreebats();
2979 49789 : if (t == NULL || t->nfreebats == 0)
2980 : return;
2981 45387 : MT_lock_set(&GDKcacheLock);
2982 90774 : while (t->nfreebats > 0) {
2983 45387 : BBPhandover(t, t->nfreebats);
2984 : }
2985 45387 : MT_lock_unset(&GDKcacheLock);
2986 : }
2987 :
2988 : /*
2989 : * @- BBP rename
2990 : *
2991 : * Each BAT has a logical name that is globally unique.
2992 : * The batId is the same as the logical BAT name.
2993 : *
2994 : * The default logical name of a BAT is tmp_X, where X is the
2995 : * batCacheid. Apart from being globally unique, new logical bat
2996 : * names cannot be of the form tmp_X, unless X is the batCacheid.
2997 : *
2998 : * Physical names consist of a directory name followed by a logical
2999 : * name suffix. The directory name is derived from the batCacheid,
3000 : * and is currently organized in a hierarchy that puts max 64 bats in
3001 : * each directory (see BBPgetsubdir).
3002 : *
3003 : * Concerning the physical suffix: it is almost always bat_X. This
3004 : * saves us a whole lot of trouble, as bat_X is always unique and no
3005 : * conflicts can occur. Other suffixes are only supported in order
3006 : * just for backward compatibility with old repositories (you won't
3007 : * see them anymore in new repositories).
3008 : */
3009 : int
3010 38475 : BBPrename(BAT *b, const char *nme)
3011 : {
3012 38475 : if (b == NULL)
3013 : return 0;
3014 :
3015 38475 : char dirname[24];
3016 38475 : bat bid = b->batCacheid;
3017 38475 : bat tmpid = 0, i;
3018 :
3019 38475 : if (nme == NULL) {
3020 12278 : if (BBP_bak(bid)[0] == 0 &&
3021 0 : snprintf(BBP_bak(bid), sizeof(BBP_bak(bid)), "tmp_%o", (unsigned) bid) >= (int) sizeof(BBP_bak(bid))) {
3022 : /* cannot happen */
3023 0 : TRC_CRITICAL(GDK, "BBP default filename too long\n");
3024 0 : return BBPRENAME_LONG;
3025 : }
3026 12278 : nme = BBP_bak(bid);
3027 : }
3028 :
3029 : /* If name stays same, do nothing */
3030 38475 : if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0)
3031 : return 0;
3032 :
3033 38475 : BBPgetsubdir(dirname, bid);
3034 :
3035 38475 : if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) {
3036 0 : GDKerror("illegal temporary name: '%s'\n", nme);
3037 0 : return BBPRENAME_ILLEGAL;
3038 : }
3039 38475 : if (strlen(dirname) + strLen(nme) + 1 >= IDLENGTH) {
3040 0 : GDKerror("illegal temporary name: '%s'\n", nme);
3041 0 : return BBPRENAME_LONG;
3042 : }
3043 :
3044 38475 : MT_lock_set(&BBPnameLock);
3045 38475 : i = BBP_find(nme, false);
3046 38475 : if (i != 0) {
3047 1 : MT_lock_unset(&BBPnameLock);
3048 1 : GDKerror("name is in use: '%s'.\n", nme);
3049 1 : return BBPRENAME_ALREADY;
3050 : }
3051 :
3052 38474 : char *nnme;
3053 38474 : if (nme == BBP_bak(bid) || strcmp(nme, BBP_bak(bid)) == 0) {
3054 38474 : nnme = BBP_bak(bid);
3055 : } else {
3056 26196 : nnme = GDKstrdup(nme);
3057 26196 : if (nnme == NULL) {
3058 0 : MT_lock_unset(&BBPnameLock);
3059 0 : return BBPRENAME_MEMORY;
3060 : }
3061 : }
3062 :
3063 : /* carry through the name change */
3064 38474 : if (BBP_logical(bid) && !BBPtmpcheck(BBP_logical(bid))) {
3065 12278 : BBP_delete(bid);
3066 : }
3067 38474 : if (BBP_logical(bid) != BBP_bak(bid))
3068 12278 : GDKfree(BBP_logical(bid));
3069 38474 : BBP_logical(bid) = nnme;
3070 38474 : if (tmpid == 0) {
3071 26196 : BBP_insert(bid);
3072 : }
3073 38474 : MT_lock_set(&b->theaplock);
3074 38474 : bool transient = b->batTransient;
3075 38474 : MT_lock_unset(&b->theaplock);
3076 38474 : if (!transient) {
3077 8978 : bool lock = locked_by == 0 || locked_by != MT_getpid();
3078 :
3079 8978 : if (lock)
3080 8978 : MT_lock_set(&GDKswapLock(i));
3081 8978 : BBP_status_on(bid, BBPRENAMED);
3082 8978 : if (lock)
3083 8978 : MT_lock_unset(&GDKswapLock(i));
3084 : }
3085 38474 : MT_lock_unset(&BBPnameLock);
3086 38474 : return 0;
3087 : }
3088 :
3089 : /*
3090 : * @+ BBP swapping Policy
3091 : * The BAT can be moved back to disk using the routine BBPfree. It
3092 : * frees the storage for other BATs. After this call BAT* references
3093 : * maintained for the BAT are wrong. We should keep track of dirty
3094 : * unloaded BATs. They may have to be committed later on, which may
3095 : * include reading them in again.
3096 : *
3097 : * BBPswappable: may this bat be unloaded? Only real bats without
3098 : * memory references can be unloaded.
3099 : */
3100 : static inline void
3101 4618131 : BBPspin(bat i, const char *s, unsigned event)
3102 : {
3103 4618131 : if (BBPcheck(i) && (BBP_status(i) & event)) {
3104 : lng spin = LL_CONSTANT(0);
3105 :
3106 56942 : do {
3107 56942 : MT_sleep_ms(KITTENNAP);
3108 56941 : spin++;
3109 56941 : } while (BBP_status(i) & event);
3110 330 : TRC_DEBUG(BAT_, "%d,%s,%u: " LLFMT " loops\n", (int) i, s, event, spin);
3111 : }
3112 4617948 : }
3113 :
3114 : void
3115 6765129 : BBPcold(bat i)
3116 : {
3117 6765129 : if (!is_bat_nil(i)) {
3118 6765459 : BAT *b = BBP_desc(i);
3119 6765459 : if (b == NULL || b->batRole == PERSISTENT)
3120 695 : BBP_status_off(i, BBPHOT);
3121 : }
3122 6765129 : }
3123 :
3124 : /* This function can fail if the input parameter (i) is incorrect
3125 : * (unlikely). */
3126 : static inline int
3127 122277480 : incref(bat i, bool logical, bool lock)
3128 : {
3129 122277480 : int refs;
3130 122277480 : BAT *b;
3131 :
3132 122277480 : if (!BBPcheck(i))
3133 : return 0;
3134 :
3135 122202985 : if (lock) {
3136 32150022 : for (;;) {
3137 32150022 : MT_lock_set(&GDKswapLock(i));
3138 32249175 : if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
3139 : break;
3140 : /* the BATs is "unstable", try again */
3141 0 : MT_lock_unset(&GDKswapLock(i));
3142 0 : BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
3143 : }
3144 : }
3145 : /* we have the lock */
3146 :
3147 122302138 : b = BBP_desc(i);
3148 122302138 : if (b == NULL) {
3149 : /* should not have happened */
3150 0 : if (lock)
3151 0 : MT_lock_unset(&GDKswapLock(i));
3152 0 : return 0;
3153 : }
3154 :
3155 122302138 : assert(BBP_refs(i) + BBP_lrefs(i) ||
3156 : BBP_status(i) & (BBPDELETED | BBPSWAPPED));
3157 122302138 : if (logical) {
3158 32153869 : refs = ++BBP_lrefs(i);
3159 32153869 : BBP_pid(i) = 0;
3160 : } else {
3161 90148269 : refs = ++BBP_refs(i);
3162 90148269 : BBP_status_on(i, BBPHOT);
3163 : }
3164 122302138 : if (lock)
3165 32152930 : MT_lock_unset(&GDKswapLock(i));
3166 :
3167 : return refs;
3168 : }
3169 :
3170 : /* increment the physical reference counter for the given bat
3171 : * returns the new reference count
3172 : * also increments the physical reference count of the parent bat(s) (if
3173 : * any) */
3174 : int
3175 48431 : BBPfix(bat i)
3176 : {
3177 48431 : return BATdescriptor(i) ? 1 : 0;
3178 : }
3179 :
3180 : /* increment the logical reference count for the given bat
3181 : * returns the new reference count */
3182 : int
3183 27184042 : BBPretain(bat i)
3184 : {
3185 27184042 : bool lock = locked_by == 0 || locked_by != MT_getpid();
3186 :
3187 27184042 : return incref(i, true, lock);
3188 : }
3189 :
3190 : static inline int
3191 137200496 : decref(bat i, bool logical, bool lock, const char *func)
3192 : {
3193 137200496 : int refs = 0, lrefs;
3194 137200496 : bool swap = false;
3195 137200496 : bool locked = false;
3196 137200496 : int farmid = 0;
3197 137200496 : BAT *b;
3198 :
3199 137200496 : if (is_bat_nil(i))
3200 : return -1;
3201 137200471 : assert(i > 0);
3202 137200471 : if (BBPcheck(i) == 0)
3203 : return -1;
3204 :
3205 137100121 : if (lock)
3206 137100032 : MT_lock_set(&GDKswapLock(i));
3207 :
3208 136955397 : while (BBP_status(i) & BBPUNLOADING) {
3209 0 : if (lock)
3210 0 : MT_lock_unset(&GDKswapLock(i));
3211 0 : BBPspin(i, func, BBPUNLOADING);
3212 0 : if (lock)
3213 136955397 : MT_lock_set(&GDKswapLock(i));
3214 : }
3215 :
3216 137081711 : b = BBP_cache(i);
3217 :
3218 : /* decrement references by one */
3219 137081711 : if (logical) {
3220 31954474 : if (BBP_lrefs(i) == 0) {
3221 0 : GDKerror("%s: %s does not have logical references.\n", func, BBP_logical(i));
3222 0 : assert(0);
3223 : } else {
3224 31954474 : refs = --BBP_lrefs(i);
3225 : }
3226 : /* cannot release last logical ref if still shared */
3227 : // but we could still have a bat iterator on it
3228 : //assert(!BATshared(BBP_desc(i)) || refs > 0);
3229 : } else {
3230 105127237 : if (BBP_refs(i) == 0) {
3231 0 : GDKerror("%s: %s does not have pointer fixes.\n", func, BBP_logical(i));
3232 0 : assert(0);
3233 : } else {
3234 105127237 : refs = --BBP_refs(i);
3235 105127237 : if (b && refs == 0) {
3236 87493865 : MT_lock_set(&b->theaplock);
3237 87584816 : locked = true;
3238 87584816 : if (VIEWtparent(b) || VIEWvtparent(b))
3239 11000078 : BBP_status_on(i, BBPHOT);
3240 : }
3241 : }
3242 : }
3243 137172662 : if (b) {
3244 136972190 : if (!locked) {
3245 49616807 : MT_lock_set(&b->theaplock);
3246 49658181 : locked = true;
3247 : }
3248 : #if 0
3249 : if (b->batCount > b->batInserted && !isVIEW(b)) {
3250 : /* if batCount is larger than batInserted and
3251 : * the dirty bits are off, it may be that a
3252 : * (sub)commit happened in parallel to an
3253 : * update; we must undo the turning off of the
3254 : * dirty bits */
3255 : if (b->theap && b->theap->parentid == i)
3256 : b->theap->dirty = true;
3257 : if (b->tvheap && b->tvheap->parentid == i)
3258 : b->tvheap->dirty = true;
3259 : }
3260 : #endif
3261 136985315 : if (b->theap)
3262 136985315 : farmid = b->theap->farmid;
3263 : }
3264 :
3265 : /* we destroy transients asap and unload persistent bats only
3266 : * if they have been made cold or are not dirty */
3267 137185787 : unsigned chkflag = BBPSYNCING;
3268 137185787 : bool swapdirty = false;
3269 137185787 : if (b) {
3270 136990332 : size_t cursize;
3271 136990332 : if ((cursize = GDKvm_cursize()) < (size_t) (GDK_vm_maxsize * 0.75)) {
3272 136982571 : if (!locked) {
3273 0 : MT_lock_set(&b->theaplock);
3274 0 : locked = true;
3275 : }
3276 136982571 : if (((b->theap ? b->theap->size : 0) + (b->tvheap ? b->tvheap->size : 0)) < (GDK_vm_maxsize - cursize) / 32)
3277 136981045 : chkflag |= BBPHOT;
3278 0 : } else if (cursize > (size_t) (GDK_vm_maxsize * 0.85))
3279 137178026 : swapdirty = true;
3280 : }
3281 : /* only consider unloading if refs is 0; if, in addition, lrefs
3282 : * is 0, we can definitely unload, else only if some more
3283 : * conditions are met */
3284 240018881 : if (BBP_refs(i) == 0 &&
3285 118122499 : (BBP_lrefs(i) == 0 ||
3286 102807894 : (b != NULL && b->theap != NULL
3287 102810963 : ? ((swapdirty || !BATdirty(b)) &&
3288 10119324 : !(BBP_status(i) & chkflag) &&
3289 15527 : (BBP_status(i) & BBPPERSISTENT) &&
3290 : /* cannot unload in-memory data */
3291 7536 : !GDKinmemory(farmid) &&
3292 : /* do not unload views or parents of views */
3293 7536 : !BATshared(b) &&
3294 102815247 : b->batCacheid == b->theap->parentid &&
3295 7353 : (b->tvheap == NULL || b->batCacheid == b->tvheap->parentid))
3296 32961 : : (BBP_status(i) & BBPTMP)))) {
3297 : /* bat will be unloaded now. set the UNLOADING bit
3298 : * while locked so no other thread thinks it's
3299 : * available anymore */
3300 15293945 : assert((BBP_status(i) & BBPUNLOADING) == 0);
3301 15293945 : TRC_DEBUG(BAT_, "%s set to unloading BAT %d (status %u, lrefs %d)\n", func, i, BBP_status(i), BBP_lrefs(i));
3302 15293945 : BBP_status_on(i, BBPUNLOADING);
3303 15293945 : swap = true;
3304 : } /* else: bat cannot be swapped out */
3305 137178026 : lrefs = BBP_lrefs(i);
3306 137178026 : if (locked)
3307 137014323 : MT_lock_unset(&b->theaplock);
3308 :
3309 : /* unlock before re-locking in unload; as saving a dirty
3310 : * persistent bat may take a long time */
3311 137207816 : if (lock)
3312 137107553 : MT_lock_unset(&GDKswapLock(i));
3313 :
3314 137240975 : if (swap) {
3315 15296375 : if (b != NULL) {
3316 15285533 : if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
3317 : /* free memory (if loaded) and delete from
3318 : * disk (if transient but saved) */
3319 15278366 : BBPdestroy(b);
3320 : } else {
3321 7167 : TRC_DEBUG(BAT_, "%s unload and free bat %d\n", func, i);
3322 : /* free memory of transient */
3323 7167 : if (BBPfree(b) != GDK_SUCCEED)
3324 : return -1; /* indicate failure */
3325 : }
3326 10842 : } else if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
3327 5894 : if ((b = BBP_desc(i)) != NULL)
3328 5894 : BATdelete(b);
3329 5894 : BBPclear(i);
3330 : } else {
3331 4948 : BBP_status_off(i, BBPUNLOADING);
3332 : }
3333 : }
3334 : return refs;
3335 : }
3336 :
3337 : int
3338 101373117 : BBPunfix(bat i)
3339 : {
3340 101373117 : return decref(i, false, true, __func__);
3341 : }
3342 :
3343 : int
3344 31969120 : BBPrelease(bat i)
3345 : {
3346 31969120 : return decref(i, true, true, __func__);
3347 : }
3348 :
3349 : void
3350 4980336 : BBPkeepref(BAT *b)
3351 : {
3352 4980336 : assert(b != NULL);
3353 4980336 : bool lock = locked_by == 0 || locked_by != MT_getpid();
3354 4980336 : int i = b->batCacheid;
3355 4980336 : int refs = incref(i, true, lock);
3356 4982927 : if (refs == 1) {
3357 4738835 : MT_lock_set(&b->theaplock);
3358 4736398 : BATsettrivprop(b);
3359 4734937 : MT_lock_unset(&b->theaplock);
3360 : }
3361 4979614 : if (ATOMIC_GET(&GDKdebug) & CHECKMASK)
3362 4935676 : BATassertProps(b);
3363 4982369 : if (BATsetaccess(b, BAT_READ) == NULL)
3364 : return; /* already decreffed */
3365 :
3366 3956441 : refs = decref(i, false, lock, __func__);
3367 3955721 : (void) refs;
3368 3955721 : assert(refs >= 0);
3369 : }
3370 :
3371 : BAT *
3372 90286711 : BATdescriptor(bat i)
3373 : {
3374 90286711 : BAT *b = NULL;
3375 :
3376 90286711 : if (BBPcheck(i)) {
3377 90222545 : bool lock = locked_by == 0 || locked_by != MT_getpid();
3378 : if (lock) {
3379 90222545 : for (;;) {
3380 90222545 : MT_lock_set(&GDKswapLock(i));
3381 90177696 : if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
3382 : break;
3383 : /* the BATs is "unstable", try again */
3384 0 : MT_lock_unset(&GDKswapLock(i));
3385 0 : BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
3386 : }
3387 : }
3388 90177696 : if (incref(i, false, false) > 0) {
3389 90312874 : b = BBP_cache(i);
3390 90312874 : if (b == NULL) {
3391 20140 : b = getBBPdescriptor(i);
3392 20139 : if (b == NULL) {
3393 : /* if loading failed, we need to
3394 : * compensate for the incref */
3395 0 : decref(i, false, false, __func__);
3396 : }
3397 : }
3398 : }
3399 90312873 : if (lock)
3400 90305670 : MT_lock_unset(&GDKswapLock(i));
3401 : }
3402 90224515 : return b;
3403 : }
3404 :
3405 : /*
3406 : * BBPdescriptor checks whether BAT needs loading and does so if
3407 : * necessary. You must have at least one fix on the BAT before calling
3408 : * this.
3409 : */
3410 : static BAT *
3411 20139 : getBBPdescriptor(bat i)
3412 : {
3413 20139 : bool load = false;
3414 20139 : BAT *b = NULL;
3415 :
3416 20139 : assert(i > 0);
3417 20139 : if (!BBPcheck(i)) {
3418 0 : GDKerror("BBPcheck failed for bat id %d\n", i);
3419 0 : return NULL;
3420 : }
3421 20139 : assert(BBP_refs(i));
3422 20139 : if ((b = BBP_cache(i)) == NULL || BBP_status(i) & BBPWAITING) {
3423 :
3424 20140 : while (BBP_status(i) & BBPWAITING) { /* wait for bat to be loaded by other thread */
3425 1 : MT_lock_unset(&GDKswapLock(i));
3426 1 : BBPspin(i, __func__, BBPWAITING);
3427 20140 : MT_lock_set(&GDKswapLock(i));
3428 : }
3429 20139 : if (BBPvalid(i)) {
3430 20139 : b = BBP_cache(i);
3431 20139 : if (b == NULL) {
3432 20139 : load = true;
3433 20139 : TRC_DEBUG(BAT_, "set to loading BAT %d\n", i);
3434 20139 : BBP_status_on(i, BBPLOADING);
3435 : }
3436 : }
3437 : }
3438 20139 : if (load) {
3439 20139 : TRC_DEBUG(IO_, "load %s\n", BBP_logical(i));
3440 :
3441 20139 : b = BATload_intern(i, false);
3442 :
3443 : /* clearing bits can be done without the lock */
3444 20139 : BBP_status_off(i, BBPLOADING);
3445 20139 : CHECKDEBUG if (b != NULL)
3446 16713 : BATassertProps(b);
3447 : }
3448 : return b;
3449 : }
3450 :
3451 : /*
3452 : * In BBPsave executes unlocked; it just marks the BBP_status of the
3453 : * BAT to BBPsaving, so others that want to save or unload this BAT
3454 : * must spin lock on the BBP_status field.
3455 : */
3456 : gdk_return
3457 12353 : BBPsave(BAT *b)
3458 : {
3459 12353 : bool lock = locked_by == 0 || locked_by != MT_getpid();
3460 12353 : bat bid = b->batCacheid;
3461 12353 : gdk_return ret = GDK_SUCCEED;
3462 :
3463 12353 : MT_lock_set(&b->theaplock);
3464 12353 : if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirty(b)) {
3465 : /* do nothing */
3466 10850 : MT_lock_unset(&b->theaplock);
3467 10850 : MT_rwlock_rdlock(&b->thashlock);
3468 10850 : if (b->thash && b->thash != (Hash *) 1 &&
3469 185 : (b->thash->heaplink.dirty || b->thash->heapbckt.dirty))
3470 112 : BAThashsave(b, (BBP_status(bid) & BBPPERSISTENT) != 0);
3471 10850 : MT_rwlock_rdunlock(&b->thashlock);
3472 10850 : return GDK_SUCCEED;
3473 : }
3474 1503 : MT_lock_unset(&b->theaplock);
3475 1503 : if (lock)
3476 1503 : MT_lock_set(&GDKswapLock(bid));
3477 :
3478 1503 : if (BBP_status(bid) & BBPSAVING) {
3479 : /* wait until save in other thread completes */
3480 0 : if (lock)
3481 0 : MT_lock_unset(&GDKswapLock(bid));
3482 0 : BBPspin(bid, __func__, BBPSAVING);
3483 : } else {
3484 : /* save it */
3485 1503 : unsigned flags = BBPSAVING;
3486 :
3487 1503 : MT_lock_set(&b->theaplock);
3488 1503 : if (DELTAdirty(b)) {
3489 441 : flags |= BBPSWAPPED;
3490 : }
3491 1503 : if (b->batTransient) {
3492 1493 : flags |= BBPTMP;
3493 : }
3494 1503 : MT_lock_unset(&b->theaplock);
3495 1503 : BBP_status_on(bid, flags);
3496 1503 : if (lock)
3497 1503 : MT_lock_unset(&GDKswapLock(bid));
3498 :
3499 1503 : TRC_DEBUG(IO_, "save " ALGOBATFMT "\n", ALGOBATPAR(b));
3500 :
3501 : /* do the time-consuming work unlocked */
3502 1503 : if (BBP_status(bid) & BBPEXISTING && b->batInserted > 0)
3503 2 : ret = BBPbackup(b, false);
3504 2 : if (ret == GDK_SUCCEED) {
3505 1503 : ret = BATsave(b);
3506 : }
3507 : /* clearing bits can be done without the lock */
3508 1503 : BBP_status_off(bid, BBPSAVING);
3509 : }
3510 : return ret;
3511 : }
3512 :
3513 : /*
3514 : * TODO merge BBPfree with BATfree? Its function is to prepare a BAT
3515 : * for being unloaded (or even destroyed, if the BAT is not
3516 : * persistent).
3517 : */
3518 : static void
3519 15277867 : BBPdestroy(BAT *b)
3520 : {
3521 15277867 : bat tp = VIEWtparent(b);
3522 15277867 : bat vtp = VIEWvtparent(b);
3523 :
3524 15277867 : if (tp == 0) {
3525 : /* bats that get destroyed must unfix their atoms */
3526 8472868 : gdk_return (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix;
3527 8472868 : if (tunfix) {
3528 0 : BUN p, q;
3529 0 : BATiter bi = bat_iterator_nolock(b);
3530 :
3531 0 : BATloop(b, p, q) {
3532 : /* ignore errors */
3533 0 : (void) (*tunfix)(BUNtail(bi, p));
3534 : }
3535 : }
3536 : }
3537 15277867 : if (b->theap) {
3538 15278356 : HEAPdecref(b->theap, tp == 0);
3539 15280946 : b->theap = NULL;
3540 15280946 : if (tp != 0)
3541 6807827 : BBPrelease(tp);
3542 : }
3543 15280175 : if (b->tvheap) {
3544 2236250 : HEAPdecref(b->tvheap, vtp == 0);
3545 2236277 : b->tvheap = NULL;
3546 2236277 : if (vtp != 0)
3547 1544163 : BBPrelease(vtp);
3548 : }
3549 15280179 : if (b->oldtail) {
3550 2 : ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
3551 2 : HEAPdecref(b->oldtail, true);
3552 2 : b->oldtail = NULL;
3553 : }
3554 15280179 : BATdelete(b);
3555 :
3556 15279535 : BBPclear(b->batCacheid); /* if destroyed; de-register from BBP */
3557 15280085 : }
3558 :
3559 : static gdk_return
3560 12353 : BBPfree(BAT *b)
3561 : {
3562 12353 : bat bid = b->batCacheid;
3563 12353 : gdk_return ret;
3564 :
3565 12353 : assert(bid > 0);
3566 12353 : assert(BBPswappable(b));
3567 12353 : assert(!isVIEW(b));
3568 :
3569 12353 : BBP_unload_inc();
3570 : /* write dirty BATs before unloading */
3571 12353 : ret = BBPsave(b);
3572 12353 : if (ret == GDK_SUCCEED) {
3573 12353 : if (BBP_cache(bid))
3574 12353 : BATfree(b); /* free memory */
3575 12353 : BBPuncacheit(bid, false);
3576 : }
3577 12353 : TRC_DEBUG(BAT_, "turn off unloading %d\n", bid);
3578 12353 : BBP_status_off(bid, BBPUNLOADING);
3579 12353 : BBP_unload_dec();
3580 12353 : return ret;
3581 : }
3582 :
3583 : /*
3584 : * BBPquickdesc loads a BAT descriptor without loading the entire BAT,
3585 : * of which the result be used only for a *limited* number of
3586 : * purposes. Specifically, during the global sync/commit, we do not
3587 : * want to load any BATs that are not already loaded, both because
3588 : * this costs performance, and because getting into memory shortage
3589 : * during a commit is extremely dangerous. Loading a BAT tends not to
3590 : * be required, since the commit actions mostly involve moving some
3591 : * pointers in the BAT descriptor.
3592 : */
3593 : BAT *
3594 1183955 : BBPquickdesc(bat bid)
3595 : {
3596 1183955 : BAT *b;
3597 :
3598 1183955 : if (!BBPcheck(bid)) {
3599 0 : if (!is_bat_nil(bid)) {
3600 0 : GDKerror("called with invalid batid.\n");
3601 0 : assert(0);
3602 : }
3603 : return NULL;
3604 : }
3605 1183733 : BBPspin(bid, __func__, BBPWAITING);
3606 1183611 : b = BBP_desc(bid);
3607 1183611 : if (b && b->ttype < 0) {
3608 241 : const char *aname = ATOMunknown_name(b->ttype);
3609 241 : int tt = ATOMindex(aname);
3610 241 : if (tt < 0) {
3611 0 : GDKwarning("atom '%s' unknown in bat '%s'.\n",
3612 : aname, BBP_physical(bid));
3613 : } else {
3614 241 : b->ttype = tt;
3615 : }
3616 : }
3617 : return b;
3618 : }
3619 :
3620 : /*
3621 : * @+ Global Commit
3622 : */
3623 : static BAT *
3624 3442461 : dirty_bat(bat *i, bool subcommit)
3625 : {
3626 3442461 : if (BBPvalid(*i)) {
3627 3434459 : BAT *b;
3628 3434459 : BBPspin(*i, __func__, BBPSAVING);
3629 3434459 : b = BBP_cache(*i);
3630 3434459 : if (b != NULL) {
3631 3255343 : MT_lock_set(&b->theaplock);
3632 3493649 : if ((BBP_status(*i) & BBPNEW) &&
3633 238306 : BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */
3634 0 : *i = -*i; /* error */
3635 3255343 : else if ((BBP_status(*i) & BBPPERSISTENT) &&
3636 0 : (subcommit || BATdirty(b))) {
3637 3076936 : MT_lock_unset(&b->theaplock);
3638 3076936 : return b; /* the bat is loaded, persistent and dirty */
3639 : }
3640 178407 : MT_lock_unset(&b->theaplock);
3641 179116 : } else if (subcommit)
3642 174064 : return BBP_desc(*i);
3643 : }
3644 : return NULL;
3645 : }
3646 :
3647 : /*
3648 : * @- backup-bat
3649 : * Backup-bat moves all files of a BAT to a backup directory. Only
3650 : * after this succeeds, it may be saved. If some failure occurs
3651 : * halfway saving, we can thus always roll back.
3652 : */
3653 : static gdk_return
3654 204720 : file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext)
3655 : {
3656 204720 : if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext, false) == GDK_SUCCEED) {
3657 : return GDK_SUCCEED;
3658 : } else {
3659 0 : char *path;
3660 0 : struct stat st;
3661 :
3662 0 : path = GDKfilepath(farmid, srcdir, name, ext);
3663 0 : if (path == NULL)
3664 0 : return GDK_FAIL;
3665 0 : if (MT_stat(path, &st)) {
3666 : /* source file does not exist; the best
3667 : * recovery is to give an error but continue
3668 : * by considering the BAT as not saved; making
3669 : * sure that this time it does get saved.
3670 : */
3671 0 : GDKsyserror("file_move: cannot stat %s\n", path);
3672 0 : GDKfree(path);
3673 0 : return GDK_FAIL; /* fishy, but not fatal */
3674 : }
3675 0 : GDKfree(path);
3676 : }
3677 0 : return GDK_FAIL;
3678 : }
3679 :
3680 : /* returns true if the file exists */
3681 : static bool
3682 2814554 : file_exists(int farmid, const char *dir, const char *name, const char *ext)
3683 : {
3684 2814554 : char *path;
3685 2814554 : struct stat st;
3686 2814554 : int ret = -1;
3687 :
3688 2814554 : path = GDKfilepath(farmid, dir, name, ext);
3689 2814554 : if (path) {
3690 2814554 : ret = MT_stat(path, &st);
3691 2814554 : TRC_DEBUG(IO_, "stat(%s) = %d\n", path, ret);
3692 2814554 : GDKfree(path);
3693 : }
3694 2814554 : return (ret == 0);
3695 : }
3696 :
3697 : static gdk_return
3698 204716 : heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext)
3699 : {
3700 : /* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap
3701 : * heap modes */
3702 204716 : if (file_exists(hp->farmid, dstdir, nme, ext)) {
3703 : /* dont overwrite heap with the committed state
3704 : * already in dstdir */
3705 : return GDK_SUCCEED;
3706 204716 : } else if (hp->newstorage == STORE_PRIV &&
3707 0 : !file_exists(hp->farmid, srcdir, nme, ext)) {
3708 :
3709 : /* In order to prevent half-saved X.new files
3710 : * surviving a recover we create a dummy file in the
3711 : * BACKUP(dstdir) whose presence will trigger
3712 : * BBPrecover to remove them. Thus, X will prevail
3713 : * where it otherwise wouldn't have. If X already has
3714 : * a saved X.new, that one is backed up as normal.
3715 : */
3716 :
3717 0 : FILE *fp;
3718 0 : long_str kill_ext;
3719 0 : char *path;
3720 :
3721 0 : strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill", NULL);
3722 0 : path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext);
3723 0 : if (path == NULL)
3724 : return GDK_FAIL;
3725 0 : fp = MT_fopen(path, "w");
3726 0 : if (fp == NULL)
3727 0 : GDKsyserror("heap_move: cannot open file %s\n", path);
3728 0 : TRC_DEBUG(IO_, "open %s = %d\n", path, fp ? 0 : -1);
3729 0 : GDKfree(path);
3730 :
3731 0 : if (fp != NULL) {
3732 0 : fclose(fp);
3733 0 : return GDK_SUCCEED;
3734 : } else {
3735 : return GDK_FAIL;
3736 : }
3737 : }
3738 204716 : return file_move(hp->farmid, srcdir, dstdir, nme, ext);
3739 : }
3740 :
3741 : /*
3742 : * @- BBPprepare
3743 : *
3744 : * this routine makes sure there is a BAKDIR/, and initiates one if
3745 : * not. For subcommits, it does the same with SUBDIR.
3746 : *
3747 : * It is now locked, to get proper file counters, and also to prevent
3748 : * concurrent BBPrecovers, etc.
3749 : *
3750 : * backup_dir == 0 => no backup BBP.dir
3751 : * backup_dir == 1 => BBP.dir saved in BACKUP/
3752 : * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/
3753 : */
3754 :
3755 : static gdk_return
3756 24201 : BBPprepare(bool subcommit)
3757 : {
3758 24201 : bool start_subcommit;
3759 24201 : int set = 1 + subcommit;
3760 24201 : gdk_return ret = GDK_SUCCEED;
3761 :
3762 24201 : start_subcommit = (subcommit && backup_subdir == 0);
3763 11925 : if (start_subcommit) {
3764 : /* starting a subcommit. Make sure SUBDIR and DELDIR
3765 : * are clean */
3766 11925 : ret = BBPrecover_subdir();
3767 11925 : if (ret != GDK_SUCCEED)
3768 : return ret;
3769 : }
3770 24201 : if (backup_files == 0) {
3771 343 : backup_dir = 0;
3772 343 : ret = BBPrecover(0);
3773 343 : if (ret != GDK_SUCCEED)
3774 : return ret;
3775 343 : str bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL);
3776 343 : if (bakdirpath == NULL) {
3777 : return GDK_FAIL;
3778 : }
3779 :
3780 343 : if (MT_mkdir(bakdirpath) < 0 && errno != EEXIST) {
3781 0 : GDKsyserror("cannot create directory %s\n", bakdirpath);
3782 0 : GDKfree(bakdirpath);
3783 0 : return GDK_FAIL;
3784 : }
3785 : /* if BAKDIR already exists, don't signal error */
3786 343 : TRC_DEBUG(IO_, "mkdir %s = %d\n", bakdirpath, (int) ret);
3787 343 : GDKfree(bakdirpath);
3788 : }
3789 24201 : if (start_subcommit) {
3790 : /* make a new SUBDIR (subdir of BAKDIR) */
3791 11925 : str subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
3792 11925 : if (subdirpath == NULL) {
3793 : return GDK_FAIL;
3794 : }
3795 :
3796 11925 : if (MT_mkdir(subdirpath) < 0) {
3797 0 : GDKsyserror("cannot create directory %s\n", subdirpath);
3798 0 : GDKfree(subdirpath);
3799 0 : return GDK_FAIL;
3800 : }
3801 11925 : TRC_DEBUG(IO_, "mkdir %s\n", subdirpath);
3802 11925 : GDKfree(subdirpath);
3803 : }
3804 24201 : if (backup_dir != set) {
3805 : /* a valid backup dir *must* at least contain BBP.dir */
3806 48729 : if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP", "dir", subcommit ? SUBDIR : BAKDIR, "BBP", "dir", true)) != GDK_SUCCEED)
3807 : return ret;
3808 24193 : backup_dir = set;
3809 : }
3810 : /* increase counters */
3811 24201 : backup_subdir += subcommit;
3812 24201 : backup_files++;
3813 :
3814 24201 : return ret;
3815 : }
3816 :
3817 : static gdk_return
3818 1109190 : do_backup(Heap *h, bool dirty, bool subcommit)
3819 : {
3820 1109190 : gdk_return ret = GDK_SUCCEED;
3821 1109190 : char extnew[16];
3822 :
3823 1109190 : if (h->wasempty) {
3824 : return GDK_SUCCEED;
3825 : }
3826 :
3827 : /* direct mmap is unprotected (readonly usage, or has WAL
3828 : * protection) */
3829 1109190 : if (h->storage != STORE_MMAP) {
3830 : /* STORE_PRIV saves into X.new files. Two cases could
3831 : * happen. The first is when a valid X.new exists
3832 : * because of an access change or a previous
3833 : * commit. This X.new should be backed up as
3834 : * usual. The second case is when X.new doesn't
3835 : * exist. In that case we could have half written
3836 : * X.new files (after a crash). To protect against
3837 : * these we write X.new.kill files in the backup
3838 : * directory (see heap_move). */
3839 1100203 : gdk_return mvret = GDK_SUCCEED;
3840 :
3841 1100203 : char *srcdir = GDKfilepath(NOFARM, BATDIR, h->filename, NULL);
3842 1100203 : if (srcdir == NULL)
3843 : return GDK_FAIL;
3844 1100203 : char *nme = strrchr(srcdir, DIR_SEP);
3845 1100203 : assert(nme != NULL);
3846 1100203 : *nme++ = '\0';
3847 1100203 : char *ext = strchr(nme, '.');
3848 1100203 : assert(ext != NULL);
3849 1100203 : *ext++ = '\0';
3850 :
3851 1100203 : strconcat_len(extnew, sizeof(extnew), ext, ".new", NULL);
3852 1304919 : if (dirty &&
3853 409432 : !file_exists(h->farmid, BAKDIR, nme, extnew) &&
3854 204716 : !file_exists(h->farmid, BAKDIR, nme, ext)) {
3855 : /* if the heap is dirty and there is no heap
3856 : * file (with or without .new extension) in
3857 : * the BAKDIR, move the heap (preferably with
3858 : * .new extension) to the correct backup
3859 : * directory */
3860 204716 : if (file_exists(h->farmid, srcdir, nme, extnew)) {
3861 0 : mvret = heap_move(h, srcdir,
3862 : subcommit ? SUBDIR : BAKDIR,
3863 : nme, extnew);
3864 204716 : } else if (file_exists(h->farmid, srcdir, nme, ext)) {
3865 204718 : mvret = heap_move(h, srcdir,
3866 : subcommit ? SUBDIR : BAKDIR,
3867 : nme, ext);
3868 204716 : if (mvret == GDK_SUCCEED) {
3869 : /* file no longer in "standard"
3870 : * location */
3871 204716 : h->hasfile = false;
3872 : }
3873 : }
3874 895487 : } else if (subcommit) {
3875 : /* if subcommit, we may need to move an
3876 : * already made backup from BAKDIR to
3877 : * SUBDIR */
3878 895487 : if (file_exists(h->farmid, BAKDIR, nme, extnew))
3879 0 : mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew);
3880 895487 : else if (file_exists(h->farmid, BAKDIR, nme, ext))
3881 4 : mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext);
3882 : }
3883 : /* there is a situation where the move may fail,
3884 : * namely if this heap was not supposed to be existing
3885 : * before, i.e. after a BATmaterialize on a persistent
3886 : * bat; as a workaround, do not complain about move
3887 : * failure if the source file is nonexistent
3888 : */
3889 204720 : if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) {
3890 1100203 : ret = GDK_FAIL;
3891 : }
3892 1100203 : if (subcommit &&
3893 1100201 : (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) {
3894 0 : long_str kill_ext;
3895 :
3896 0 : strconcat_len(kill_ext, sizeof(kill_ext),
3897 : ext, ".new.kill", NULL);
3898 0 : if (file_exists(h->farmid, BAKDIR, nme, kill_ext) &&
3899 0 : file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) {
3900 0 : ret = GDK_FAIL;
3901 : }
3902 : }
3903 1100203 : GDKfree(srcdir);
3904 : }
3905 : return ret;
3906 : }
3907 :
3908 : static gdk_return
3909 883531 : BBPbackup(BAT *b, bool subcommit)
3910 : {
3911 883531 : gdk_return rc = GDK_SUCCEED;
3912 :
3913 883531 : MT_lock_set(&b->theaplock);
3914 883531 : BATiter bi = bat_iterator_nolock(b);
3915 883531 : if (!bi.copiedtodisk || bi.transient) {
3916 1 : MT_lock_unset(&b->theaplock);
3917 1 : return GDK_SUCCEED;
3918 : }
3919 883530 : assert(b->theap->parentid == b->batCacheid);
3920 883530 : if (b->oldtail && b->oldtail != (Heap *) 1) {
3921 1685 : bi.h = b->oldtail;
3922 1685 : bi.hdirty = b->oldtail->dirty;
3923 : }
3924 : #ifndef NDEBUG
3925 883530 : bi.locked = true;
3926 : #endif
3927 883530 : HEAPincref(bi.h);
3928 883530 : if (bi.vh)
3929 225660 : HEAPincref(bi.vh);
3930 883530 : MT_lock_unset(&b->theaplock);
3931 :
3932 : /* determine location dir and physical suffix */
3933 883530 : if (bi.type != TYPE_void) {
3934 883530 : rc = do_backup(bi.h, bi.hdirty, subcommit);
3935 883530 : if (rc == GDK_SUCCEED && bi.vh != NULL)
3936 225660 : rc = do_backup(bi.vh, bi.vhdirty, subcommit);
3937 : }
3938 883530 : bat_iterator_end(&bi);
3939 883530 : return rc;
3940 : }
3941 :
3942 : static inline void
3943 0 : BBPcheckHeap(Heap *h)
3944 : {
3945 0 : struct stat statb;
3946 0 : char *path;
3947 :
3948 0 : char *s = strrchr(h->filename, DIR_SEP);
3949 0 : if (s)
3950 0 : s++;
3951 : else
3952 : s = h->filename;
3953 0 : path = GDKfilepath(0, BAKDIR, s, NULL);
3954 0 : if (path == NULL)
3955 0 : return;
3956 0 : if (MT_stat(path, &statb) < 0) {
3957 0 : GDKfree(path);
3958 0 : path = GDKfilepath(0, BATDIR, h->filename, NULL);
3959 0 : if (path == NULL)
3960 : return;
3961 0 : if (MT_stat(path, &statb) < 0) {
3962 0 : GDKsyserror("cannot stat file %s (expected size %zu)\n",
3963 : path, h->free);
3964 0 : assert(0);
3965 : GDKfree(path);
3966 : return;
3967 : }
3968 : }
3969 0 : assert((statb.st_mode & S_IFMT) == S_IFREG);
3970 0 : assert((size_t) statb.st_size >= h->free);
3971 0 : if ((size_t) statb.st_size < h->free) {
3972 : GDKerror("file %s too small (expected %zu, actual %zu)\n", path, h->free, (size_t) statb.st_size);
3973 : GDKfree(path);
3974 : return;
3975 : }
3976 0 : GDKfree(path);
3977 : }
3978 :
3979 : static void
3980 0 : BBPcheckBBPdir(void)
3981 : {
3982 0 : FILE *fp;
3983 0 : int lineno = 0;
3984 0 : bat bbpsize = 0;
3985 0 : unsigned bbpversion;
3986 0 : lng logno, transid;
3987 :
3988 0 : fp = GDKfileopen(0, BAKDIR, "BBP", "dir", "r");
3989 0 : assert(fp != NULL);
3990 0 : if (fp == NULL) {
3991 : fp = GDKfileopen(0, BATDIR, "BBP", "dir", "r");
3992 : assert(fp != NULL);
3993 : if (fp == NULL)
3994 : return;
3995 : }
3996 0 : bbpversion = BBPheader(fp, &lineno, &bbpsize, &logno, &transid, false);
3997 0 : if (bbpversion == 0) {
3998 0 : fclose(fp);
3999 0 : return; /* error reading file */
4000 : }
4001 0 : assert(bbpversion == GDKLIBRARY);
4002 :
4003 0 : for (;;) {
4004 0 : BAT b;
4005 0 : Heap h;
4006 0 : Heap vh;
4007 0 : vh = h = (Heap) {
4008 : .free = 0,
4009 : };
4010 0 : b = (BAT) {
4011 : .theap = &h,
4012 : .tvheap = &vh,
4013 : };
4014 0 : char filename[sizeof(BBP_physical(0))];
4015 0 : char batname[129];
4016 : #ifdef GDKLIBRARY_HASHASH
4017 0 : int hashash;
4018 : #endif
4019 :
4020 0 : switch (BBPreadBBPline(fp, bbpversion, &lineno, &b,
4021 : #ifdef GDKLIBRARY_HASHASH
4022 : &hashash,
4023 : #endif
4024 : batname, filename, NULL)) {
4025 0 : case 0:
4026 : /* end of file */
4027 0 : fclose(fp);
4028 : /* don't leak errors, this is just debug code */
4029 0 : GDKclrerr();
4030 0 : return;
4031 : case 1:
4032 : /* successfully read an entry */
4033 0 : break;
4034 0 : default:
4035 : /* error */
4036 0 : fclose(fp);
4037 0 : return;
4038 : }
4039 : #ifdef GDKLIBRARY_HASHASH
4040 0 : assert(hashash == 0);
4041 : #endif
4042 0 : assert(b.batCacheid < (bat) ATOMIC_GET(&BBPsize));
4043 0 : assert(BBP_desc(b.batCacheid) != NULL);
4044 0 : assert(b.hseqbase <= GDK_oid_max);
4045 0 : if (b.ttype == TYPE_void) {
4046 : /* no files needed */
4047 0 : continue;
4048 : }
4049 0 : if (b.theap->free > 0)
4050 0 : BBPcheckHeap(b.theap);
4051 0 : if (b.tvheap != NULL && b.tvheap->free > 0)
4052 0 : BBPcheckHeap(b.tvheap);
4053 : }
4054 : }
4055 :
4056 : /*
4057 : * @+ Atomic Write
4058 : * The atomic BBPsync() function first safeguards the old images of
4059 : * all files to be written in BAKDIR. It then saves all files. If that
4060 : * succeeds fully, BAKDIR is renamed to DELDIR. The rename is
4061 : * considered an atomic action. If it succeeds, the DELDIR is removed.
4062 : * If something fails, the pre-sync status can be obtained by moving
4063 : * back all backed up files; this is done by BBPrecover().
4064 : *
4065 : * The BBP.dir is also moved into the BAKDIR.
4066 : */
4067 : gdk_return
4068 11933 : BBPsync(int cnt, bat *restrict subcommit, BUN *restrict sizes, lng logno, lng transid)
4069 : {
4070 11933 : gdk_return ret = GDK_SUCCEED;
4071 11933 : lng t0 = 0, t1 = 0;
4072 11933 : str bakdir, deldir;
4073 11933 : const bool lock = locked_by == 0 || locked_by != MT_getpid();
4074 11933 : char buf[3000];
4075 11933 : int n = subcommit ? 0 : -1;
4076 8 : FILE *obbpf, *nbbpf;
4077 :
4078 11933 : if ((bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL)) == NULL)
4079 : return GDK_FAIL;
4080 11933 : if ((deldir = GDKfilepath(0, NULL, DELDIR, NULL)) == NULL) {
4081 0 : GDKfree(bakdir);
4082 0 : return GDK_FAIL;
4083 : }
4084 :
4085 11933 : TRC_DEBUG_IF(PERF) t0 = t1 = GDKusec();
4086 :
4087 11933 : if ((ATOMIC_GET(&GDKdebug) & TAILCHKMASK) && !GDKinmemory(0))
4088 0 : BBPcheckBBPdir();
4089 :
4090 11933 : ret = BBPprepare(subcommit != NULL);
4091 :
4092 : /* PHASE 1: safeguard everything in a backup-dir */
4093 1828908 : for (int idx = 1; ret == GDK_SUCCEED && idx < cnt; idx++) {
4094 1816975 : bat i = subcommit ? subcommit[idx] : idx;
4095 1816975 : const bat bid = i;
4096 1816975 : if (lock)
4097 1806447 : MT_lock_set(&GDKswapLock(bid));
4098 : /* set flag that we're syncing, i.e. that we'll
4099 : * be between moving heap to backup dir and
4100 : * saving the new version, in other words, the
4101 : * heap may not exist in the usual location */
4102 1816975 : BBP_status_on(bid, BBPSYNCING);
4103 : /* wait until unloading is finished before
4104 : * attempting to make a backup */
4105 1816975 : while (BBP_status(bid) & BBPUNLOADING) {
4106 0 : if (lock)
4107 0 : MT_lock_unset(&GDKswapLock(bid));
4108 0 : BBPspin(bid, __func__, BBPUNLOADING);
4109 0 : if (lock)
4110 1816975 : MT_lock_set(&GDKswapLock(bid));
4111 : }
4112 1816975 : BAT *b = dirty_bat(&i, subcommit != NULL);
4113 1816975 : if (i <= 0 ||
4114 1816975 : (BBP_status(bid) & BBPEXISTING &&
4115 1503329 : b != NULL &&
4116 2386858 : b->batInserted > 0 &&
4117 883529 : BBPbackup(b, subcommit != NULL) != GDK_SUCCEED)) {
4118 : ret = GDK_FAIL;
4119 : }
4120 1816975 : if (lock)
4121 1816975 : MT_lock_unset(&GDKswapLock(bid));
4122 : }
4123 11933 : TRC_DEBUG(PERF, "move time "LLFMT" usec, %d files\n", (t1 = GDKusec()) - t0, backup_files);
4124 :
4125 : /* PHASE 2: save the repository and write new BBP.dir file */
4126 11933 : if (ret == GDK_SUCCEED) {
4127 11933 : ret = BBPdir_first(subcommit != NULL, logno, transid,
4128 : &obbpf, &nbbpf);
4129 : }
4130 :
4131 1828908 : for (int idx = 1; ret == GDK_SUCCEED && idx < cnt; idx++) {
4132 1816975 : bat i = subcommit ? subcommit[idx] : idx;
4133 : /* BBP_desc(i) may be NULL */
4134 1816975 : BUN size = sizes ? sizes[idx] : BUN_NONE;
4135 1816975 : BATiter bi;
4136 :
4137 1816975 : if (BBP_status(i) & BBPPERSISTENT) {
4138 1625486 : BAT *b = dirty_bat(&i, subcommit != NULL);
4139 1625486 : if (i <= 0) {
4140 0 : ret = GDK_FAIL;
4141 0 : break;
4142 : }
4143 1625486 : bi = bat_iterator(BBP_desc(i));
4144 1625486 : assert(sizes == NULL || size <= bi.count);
4145 1621932 : assert(sizes == NULL || bi.width == 0 || (bi.type == TYPE_msk ? ((size + 31) / 32) * 4 : size << bi.shift) <= bi.hfree);
4146 1625486 : if (size > bi.count) /* includes sizes==NULL */
4147 : size = bi.count;
4148 1625486 : MT_lock_set(&bi.b->theaplock);
4149 1625486 : bi.b->batInserted = size;
4150 1625486 : if (bi.b->ttype >= 0 && ATOMvarsized(bi.b->ttype)) {
4151 : /* see epilogue() for other part of this */
4152 : /* remember the tail we're saving */
4153 360919 : if (BATsetprop_nolock(bi.b, (enum prop_t) 20, TYPE_ptr, &bi.h) == NULL) {
4154 0 : GDKerror("setprop failed\n");
4155 0 : ret = GDK_FAIL;
4156 : } else {
4157 360919 : if (bi.b->oldtail == NULL)
4158 358908 : bi.b->oldtail = (Heap *) 1;
4159 360919 : HEAPincref(bi.h);
4160 : }
4161 : }
4162 1625486 : MT_lock_unset(&bi.b->theaplock);
4163 1625486 : if (ret == GDK_SUCCEED && b && size != 0) {
4164 : /* wait for BBPSAVING so that we
4165 : * can set it, wait for
4166 : * BBPUNLOADING before
4167 : * attempting to save */
4168 970136 : for (;;) {
4169 970136 : if (lock)
4170 970136 : MT_lock_set(&GDKswapLock(i));
4171 970136 : if (!(BBP_status(i) & (BBPSAVING|BBPUNLOADING)))
4172 : break;
4173 0 : if (lock)
4174 0 : MT_lock_unset(&GDKswapLock(i));
4175 0 : BBPspin(i, __func__, BBPSAVING|BBPUNLOADING);
4176 : }
4177 970136 : BBP_status_on(i, BBPSAVING);
4178 970136 : if (lock)
4179 970136 : MT_lock_unset(&GDKswapLock(i));
4180 970136 : ret = BATsave_iter(b, &bi, size);
4181 970136 : BBP_status_off(i, BBPSAVING);
4182 : }
4183 : } else {
4184 191489 : bi = bat_iterator(NULL);
4185 : }
4186 1816975 : if (ret == GDK_SUCCEED) {
4187 1816975 : n = BBPdir_step(i, size, n, buf, sizeof(buf), &obbpf, nbbpf, &bi);
4188 1816975 : if (n < -1)
4189 0 : ret = GDK_FAIL;
4190 : }
4191 1816975 : bat_iterator_end(&bi);
4192 : /* we once again have a saved heap */
4193 : }
4194 :
4195 11933 : TRC_DEBUG(PERF, "write time "LLFMT" usec\n", (t0 = GDKusec()) - t1);
4196 :
4197 11933 : if (ret == GDK_SUCCEED) {
4198 11933 : ret = BBPdir_last(n, buf, sizeof(buf), obbpf, nbbpf);
4199 : }
4200 :
4201 11933 : TRC_DEBUG(PERF, "dir time "LLFMT" usec, %d bats\n", (t1 = GDKusec()) - t0, (bat) ATOMIC_GET(&BBPsize));
4202 :
4203 11933 : if (ret == GDK_SUCCEED) {
4204 : /* atomic switchover */
4205 : /* this is the big one: this call determines
4206 : * whether the operation of this function
4207 : * succeeded, so no changing of ret after this
4208 : * call anymore */
4209 :
4210 11933 : if (MT_rename(bakdir, deldir) < 0 &&
4211 : /* maybe there was an old deldir, so remove and try again */
4212 0 : (GDKremovedir(0, DELDIR) != GDK_SUCCEED ||
4213 0 : MT_rename(bakdir, deldir) < 0))
4214 0 : ret = GDK_FAIL;
4215 0 : if (ret != GDK_SUCCEED)
4216 0 : GDKsyserror("rename(%s,%s) failed\n", bakdir, deldir);
4217 11933 : TRC_DEBUG(IO_, "rename %s %s = %d\n", bakdir, deldir, (int) ret);
4218 : }
4219 :
4220 : /* AFTERMATH */
4221 11933 : if (ret == GDK_SUCCEED) {
4222 11933 : ATOMIC_SET(&BBPlogno, logno); /* the new value */
4223 11933 : ATOMIC_SET(&BBPtransid, transid);
4224 11933 : backup_files = subcommit ? (backup_files - backup_subdir) : 0;
4225 11933 : backup_dir = backup_subdir = 0;
4226 11933 : if (GDKremovedir(0, DELDIR) != GDK_SUCCEED)
4227 0 : fprintf(stderr, "#BBPsync: cannot remove directory %s\n", DELDIR);
4228 11933 : (void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */
4229 11933 : if (backup_files > 1) {
4230 11925 : TRC_DEBUG(PERF, "backup_files %d > 1\n", backup_files);
4231 11925 : backup_files = 1;
4232 : }
4233 : }
4234 11933 : TRC_DEBUG(PERF, "%s (ready time "LLFMT" usec)\n",
4235 : ret == GDK_SUCCEED ? "" : " failed",
4236 : (t0 = GDKusec()) - t1);
4237 :
4238 11933 : if (ret != GDK_SUCCEED) {
4239 : /* clean up extra refs we created */
4240 0 : for (int idx = 1; idx < cnt; idx++) {
4241 0 : bat i = subcommit ? subcommit[idx] : idx;
4242 0 : BAT *b = BBP_desc(i);
4243 0 : if (b && ATOMvarsized(b->ttype)) {
4244 0 : MT_lock_set(&b->theaplock);
4245 0 : ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
4246 0 : if (p != NULL) {
4247 0 : HEAPdecref(p->val.pval, false);
4248 0 : BATrmprop_nolock(b, (enum prop_t) 20);
4249 : }
4250 0 : MT_lock_unset(&b->theaplock);
4251 : }
4252 : }
4253 : }
4254 :
4255 : /* turn off the BBPSYNCING bits for all bats, even when things
4256 : * didn't go according to plan (i.e., don't check for ret ==
4257 : * GDK_SUCCEED) */
4258 1828908 : for (int idx = 1; idx < cnt; idx++) {
4259 1816975 : bat i = subcommit ? subcommit[idx] : idx;
4260 1816975 : BBP_status_off(i, BBPSYNCING);
4261 : }
4262 :
4263 11933 : GDKfree(bakdir);
4264 11933 : GDKfree(deldir);
4265 11933 : return ret;
4266 : }
4267 :
4268 : /*
4269 : * Recovery just moves all files back to their original location. this
4270 : * is an incremental process: if something fails, just stop with still
4271 : * files left for moving in BACKUP/. The recovery process can resume
4272 : * later with the left over files.
4273 : */
4274 : static gdk_return
4275 0 : force_move(int farmid, const char *srcdir, const char *dstdir, const char *name)
4276 : {
4277 0 : const char *p;
4278 0 : char *dstpath, *killfile;
4279 0 : gdk_return ret = GDK_SUCCEED;
4280 :
4281 0 : if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill") == 0) {
4282 : /* Found a X.new.kill file, ie remove the X.new file */
4283 0 : ptrdiff_t len = p - name;
4284 0 : long_str srcpath;
4285 :
4286 0 : strncpy(srcpath, name, len);
4287 0 : srcpath[len] = '\0';
4288 0 : if ((dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL)) == NULL) {
4289 : return GDK_FAIL;
4290 : }
4291 :
4292 : /* step 1: remove the X.new file that is going to be
4293 : * overridden by X */
4294 0 : if (MT_remove(dstpath) != 0 && errno != ENOENT) {
4295 : /* if it exists and cannot be removed, all
4296 : * this is going to fail */
4297 0 : GDKsyserror("force_move: remove(%s)\n", dstpath);
4298 0 : GDKfree(dstpath);
4299 0 : return GDK_FAIL;
4300 : }
4301 0 : GDKfree(dstpath);
4302 :
4303 : /* step 2: now remove the .kill file. This one is
4304 : * crucial, otherwise we'll never finish recovering */
4305 0 : if ((killfile = GDKfilepath(farmid, srcdir, name, NULL)) == NULL) {
4306 : return GDK_FAIL;
4307 : }
4308 0 : if (MT_remove(killfile) != 0) {
4309 0 : ret = GDK_FAIL;
4310 0 : GDKsyserror("force_move: remove(%s)\n", killfile);
4311 : }
4312 0 : GDKfree(killfile);
4313 0 : return ret;
4314 : }
4315 : /* try to rename it */
4316 0 : ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, false);
4317 :
4318 0 : if (ret != GDK_SUCCEED) {
4319 0 : char *srcpath;
4320 :
4321 0 : GDKclrerr();
4322 : /* two legal possible causes: file exists or dir
4323 : * doesn't exist */
4324 0 : if ((dstpath = GDKfilepath(farmid, dstdir, name, NULL)) == NULL)
4325 : return GDK_FAIL;
4326 0 : if ((srcpath = GDKfilepath(farmid, srcdir, name, NULL)) == NULL) {
4327 0 : GDKfree(dstpath);
4328 0 : return GDK_FAIL;
4329 : }
4330 0 : if (MT_remove(dstpath) != 0) /* clear destination */
4331 0 : ret = GDK_FAIL;
4332 0 : TRC_DEBUG(IO_, "remove %s = %d\n", dstpath, (int) ret);
4333 :
4334 0 : (void) GDKcreatedir(dstdir); /* if fails, move will fail */
4335 0 : ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, true);
4336 0 : TRC_DEBUG(IO_, "link %s %s = %d\n", srcpath, dstpath, (int) ret);
4337 0 : GDKfree(dstpath);
4338 0 : GDKfree(srcpath);
4339 : }
4340 : return ret;
4341 : }
4342 :
4343 : gdk_return
4344 343 : BBPrecover(int farmid)
4345 : {
4346 343 : str bakdirpath;
4347 343 : str leftdirpath;
4348 343 : DIR *dirp;
4349 343 : struct dirent *dent;
4350 343 : long_str path, dstpath;
4351 343 : bat i;
4352 343 : size_t j = strlen(BATDIR);
4353 343 : gdk_return ret = GDK_SUCCEED;
4354 343 : bool dirseen = false;
4355 343 : str dstdir;
4356 :
4357 343 : bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL);
4358 343 : leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL);
4359 343 : if (bakdirpath == NULL || leftdirpath == NULL) {
4360 0 : GDKfree(bakdirpath);
4361 0 : GDKfree(leftdirpath);
4362 0 : return GDK_FAIL;
4363 : }
4364 343 : dirp = opendir(bakdirpath);
4365 343 : if (dirp == NULL) {
4366 231 : if (errno != ENOENT)
4367 0 : GDKsyserror("cannot open directory %s\n", bakdirpath);
4368 231 : GDKfree(bakdirpath);
4369 231 : GDKfree(leftdirpath);
4370 231 : return GDK_SUCCEED; /* nothing to do */
4371 : }
4372 112 : memcpy(dstpath, BATDIR, j);
4373 112 : dstpath[j] = DIR_SEP;
4374 112 : dstpath[++j] = 0;
4375 112 : dstdir = dstpath + j;
4376 112 : TRC_DEBUG(IO_, "start\n");
4377 :
4378 112 : if (MT_mkdir(leftdirpath) < 0 && errno != EEXIST) {
4379 0 : GDKsyserror("cannot create directory %s\n", leftdirpath);
4380 0 : closedir(dirp);
4381 0 : GDKfree(bakdirpath);
4382 0 : GDKfree(leftdirpath);
4383 0 : return GDK_FAIL;
4384 : }
4385 :
4386 : /* move back all files */
4387 336 : while ((dent = readdir(dirp)) != NULL) {
4388 224 : const char *q = strchr(dent->d_name, '.');
4389 :
4390 224 : if (q == dent->d_name) {
4391 224 : char *fn;
4392 :
4393 224 : if (strcmp(dent->d_name, ".") == 0 ||
4394 112 : strcmp(dent->d_name, "..") == 0)
4395 224 : continue;
4396 0 : fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL);
4397 0 : if (fn) {
4398 0 : int uret = MT_remove(fn);
4399 0 : TRC_DEBUG(IO_, "remove %s = %d\n",
4400 : fn, uret);
4401 0 : GDKfree(fn);
4402 : }
4403 0 : continue;
4404 0 : } else if (strcmp(dent->d_name, "BBP.dir") == 0) {
4405 0 : dirseen = true;
4406 0 : continue;
4407 : }
4408 0 : if (q == NULL)
4409 0 : q = dent->d_name + strlen(dent->d_name);
4410 0 : if ((j = q - dent->d_name) + 1 > sizeof(path)) {
4411 : /* name too long: ignore */
4412 0 : continue;
4413 : }
4414 0 : strncpy(path, dent->d_name, j);
4415 0 : path[j] = 0;
4416 0 : if (GDKisdigit(*path)) {
4417 0 : i = strtol(path, NULL, 8);
4418 : } else {
4419 0 : i = BBP_find(path, false);
4420 0 : if (i < 0)
4421 0 : i = -i;
4422 : }
4423 0 : if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) {
4424 0 : force_move(farmid, BAKDIR, LEFTDIR, dent->d_name);
4425 : } else {
4426 0 : BBPgetsubdir(dstdir, i);
4427 0 : if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED) {
4428 : ret = GDK_FAIL;
4429 : break;
4430 : }
4431 : /* don't trust index files after recovery */
4432 0 : GDKunlink(farmid, dstpath, path, "thashl");
4433 0 : GDKunlink(farmid, dstpath, path, "thashb");
4434 0 : GDKunlink(farmid, dstpath, path, "timprints");
4435 0 : GDKunlink(farmid, dstpath, path, "torderidx");
4436 0 : GDKunlink(farmid, dstpath, path, "tstrimps");
4437 : }
4438 : }
4439 112 : closedir(dirp);
4440 112 : if (dirseen && ret == GDK_SUCCEED) { /* we have a saved BBP.dir; it should be moved back!! */
4441 0 : struct stat st;
4442 0 : char *fn;
4443 :
4444 0 : fn = GDKfilepath(farmid, BATDIR, "BBP", "dir");
4445 0 : if (fn == NULL) {
4446 : ret = GDK_FAIL;
4447 : } else {
4448 0 : ret = recover_dir(farmid, MT_stat(fn, &st) == 0);
4449 0 : GDKfree(fn);
4450 : }
4451 : }
4452 :
4453 112 : if (ret == GDK_SUCCEED) {
4454 112 : if (MT_rmdir(bakdirpath) < 0) {
4455 0 : GDKsyserror("cannot remove directory %s\n", bakdirpath);
4456 0 : ret = GDK_FAIL;
4457 : }
4458 112 : TRC_DEBUG(IO_, "rmdir %s = %d\n", bakdirpath, (int) ret);
4459 : }
4460 112 : if (ret != GDK_SUCCEED)
4461 0 : GDKerror("recovery failed.\n");
4462 :
4463 112 : TRC_DEBUG(IO_, "end\n");
4464 112 : GDKfree(bakdirpath);
4465 112 : GDKfree(leftdirpath);
4466 112 : return ret;
4467 : }
4468 :
4469 : /*
4470 : * SUBDIR recovery is quite mindlessly moving all files back to the
4471 : * parent (BAKDIR). We do recognize moving back BBP.dir and set
4472 : * backed_up_subdir accordingly.
4473 : */
4474 : gdk_return
4475 12260 : BBPrecover_subdir(void)
4476 : {
4477 12260 : str subdirpath;
4478 12260 : DIR *dirp;
4479 12260 : struct dirent *dent;
4480 12260 : gdk_return ret = GDK_SUCCEED;
4481 :
4482 12260 : subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
4483 12260 : if (subdirpath == NULL)
4484 : return GDK_FAIL;
4485 12260 : dirp = opendir(subdirpath);
4486 12260 : if (dirp == NULL && errno != ENOENT)
4487 0 : GDKsyserror("cannot open directory %s\n", subdirpath);
4488 12260 : GDKfree(subdirpath);
4489 12260 : if (dirp == NULL) {
4490 : return GDK_SUCCEED; /* nothing to do */
4491 : }
4492 0 : TRC_DEBUG(IO_, "start\n");
4493 :
4494 : /* move back all files */
4495 0 : while ((dent = readdir(dirp)) != NULL) {
4496 0 : if (dent->d_name[0] == '.')
4497 0 : continue;
4498 0 : ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL, true);
4499 0 : if (ret != GDK_SUCCEED)
4500 : break;
4501 0 : if (strcmp(dent->d_name, "BBP.dir") == 0)
4502 0 : backup_dir = 1;
4503 : }
4504 0 : closedir(dirp);
4505 :
4506 : /* delete the directory */
4507 0 : if (ret == GDK_SUCCEED) {
4508 0 : ret = GDKremovedir(0, SUBDIR);
4509 0 : if (backup_dir == 2) {
4510 0 : TRC_DEBUG(IO_, "%s%cBBP.dir had disappeared!\n", SUBDIR, DIR_SEP);
4511 0 : backup_dir = 0;
4512 : }
4513 : }
4514 0 : TRC_DEBUG(IO_, "end = %d\n", (int) ret);
4515 :
4516 0 : if (ret != GDK_SUCCEED)
4517 0 : GDKerror("recovery failed.\n");
4518 : return ret;
4519 : }
4520 :
4521 : /*
4522 : * @- The diskscan
4523 : * The BBPdiskscan routine walks through the BAT dir, cleans up
4524 : * leftovers, and measures disk occupancy. Leftovers are files that
4525 : * cannot belong to a BAT. in order to establish this for [ht]heap
4526 : * files, the BAT descriptor is loaded in order to determine whether
4527 : * these files are still required.
4528 : *
4529 : * The routine gathers all bat sizes in a bat that contains bat-ids
4530 : * and bytesizes. The return value is the number of bytes of space
4531 : * freed.
4532 : */
4533 : static bool
4534 26120 : persistent_bat(bat bid)
4535 : {
4536 26120 : if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) {
4537 26120 : BAT *b = BBP_cache(bid);
4538 :
4539 26120 : if (b == NULL || b->batCopiedtodisk) {
4540 : return true;
4541 : }
4542 : }
4543 : return false;
4544 : }
4545 :
4546 : static BAT *
4547 26120 : getdesc(bat bid)
4548 : {
4549 26120 : BAT *b = NULL;
4550 :
4551 26120 : if (is_bat_nil(bid))
4552 : return NULL;
4553 26120 : assert(bid > 0);
4554 26120 : if (bid < (bat) ATOMIC_GET(&BBPsize) && BBP_logical(bid))
4555 26120 : b = BBP_desc(bid);
4556 26120 : if (b == NULL)
4557 0 : BBPclear(bid);
4558 : return b;
4559 : }
4560 :
4561 : static bool
4562 1787 : BBPdiskscan(const char *parent, size_t baseoff)
4563 : {
4564 1787 : DIR *dirp = opendir(parent);
4565 1787 : struct dirent *dent;
4566 1787 : char fullname[FILENAME_MAX];
4567 1787 : str dst;
4568 1787 : size_t dstlen;
4569 1787 : const char *src = parent;
4570 :
4571 1787 : if (dirp == NULL) {
4572 173 : if (errno != ENOENT)
4573 0 : GDKsyserror("cannot open directory %s\n", parent);
4574 173 : return true; /* nothing to do */
4575 : }
4576 :
4577 1614 : dst = stpcpy(fullname, src);
4578 1614 : if (dst > fullname && dst[-1] != DIR_SEP)
4579 1614 : *dst++ = DIR_SEP;
4580 1614 : dstlen = sizeof(fullname) - (dst - fullname);
4581 :
4582 34198 : while ((dent = readdir(dirp)) != NULL) {
4583 30970 : const char *p;
4584 30970 : bat bid;
4585 30970 : bool ok, delete;
4586 :
4587 30970 : if (dent->d_name[0] == '.')
4588 3228 : continue; /* ignore .dot files and directories (. ..) */
4589 :
4590 : #ifdef GDKLIBRARY_JSON
4591 27742 : if (strcmp(dent->d_name, "jsonupgradeneeded") == 0) {
4592 8 : continue; /* ignore json upgrade signal file */
4593 : }
4594 : #endif
4595 :
4596 27734 : if (strncmp(dent->d_name, "BBP.", 4) == 0 &&
4597 335 : (strcmp(parent + baseoff, BATDIR) == 0 ||
4598 335 : strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 ||
4599 0 : strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0))
4600 335 : continue;
4601 :
4602 27399 : p = strchr(dent->d_name, '.');
4603 :
4604 27399 : if (strlen(dent->d_name) >= dstlen) {
4605 : /* found a file with too long a name
4606 : (i.e. unknown); stop pruning in this
4607 : subdir */
4608 0 : fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
4609 0 : break;
4610 : }
4611 27399 : strncpy(dst, dent->d_name, dstlen);
4612 27399 : fullname[sizeof(fullname) - 1] = 0;
4613 :
4614 27399 : if (p == NULL && !BBPdiskscan(fullname, baseoff)) {
4615 : /* it was a directory */
4616 1279 : continue;
4617 : }
4618 :
4619 26120 : if (p && strcmp(p + 1, "tmp") == 0) {
4620 : delete = true;
4621 : ok = true;
4622 26120 : bid = 0;
4623 : } else {
4624 26120 : bid = strtol(dent->d_name, NULL, 8);
4625 26120 : ok = p && bid;
4626 26120 : delete = false;
4627 :
4628 26120 : if (!ok || !persistent_bat(bid)) {
4629 : delete = true;
4630 26120 : } else if (strncmp(p + 1, "tail", 4) == 0) {
4631 19663 : BAT *b = getdesc(bid);
4632 19663 : delete = (b == NULL || !b->ttype || !b->batCopiedtodisk || b->batCount == 0);
4633 19663 : assert(b == NULL || b->batCount > 0 || b->theap->free == 0);
4634 19663 : if (!delete) {
4635 19661 : if (b->ttype == TYPE_str) {
4636 5269 : switch (b->twidth) {
4637 3068 : case 1:
4638 3068 : delete = strcmp(p + 1, "tail1") != 0;
4639 3068 : break;
4640 1824 : case 2:
4641 1824 : delete = strcmp(p + 1, "tail2") != 0;
4642 1824 : break;
4643 : #if SIZEOF_VAR_T == 8
4644 377 : case 4:
4645 377 : delete = strcmp(p + 1, "tail4") != 0;
4646 377 : break;
4647 : #endif
4648 0 : default:
4649 0 : delete = strcmp(p + 1, "tail") != 0;
4650 0 : break;
4651 : }
4652 : } else {
4653 14392 : delete = strcmp(p + 1, "tail") != 0;
4654 : }
4655 : }
4656 6457 : } else if (strncmp(p + 1, "theap", 5) == 0) {
4657 5537 : BAT *b = getdesc(bid);
4658 5537 : delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk || b->tvheap->free == 0);
4659 920 : } else if (strncmp(p + 1, "thashl", 6) == 0 ||
4660 461 : strncmp(p + 1, "thashb", 6) == 0) {
4661 : #ifdef PERSISTENTHASH
4662 918 : BAT *b = getdesc(bid);
4663 918 : delete = b == NULL;
4664 918 : if (!delete)
4665 918 : b->thash = (Hash *) 1;
4666 : #else
4667 : delete = true;
4668 : #endif
4669 2 : } else if (strncmp(p + 1, "thash", 5) == 0) {
4670 : /* older versions used .thash which we
4671 : * can simply ignore */
4672 : delete = true;
4673 2 : } else if (strncmp(p + 1, "thsh", 4) == 0) {
4674 : /* temporary hash files which we can
4675 : * simply ignore */
4676 : delete = true;
4677 2 : } else if (strncmp(p + 1, "timprints", 9) == 0) {
4678 0 : BAT *b = getdesc(bid);
4679 0 : delete = b == NULL;
4680 0 : if (!delete)
4681 0 : b->timprints = (Imprints *) 1;
4682 2 : } else if (strncmp(p + 1, "torderidx", 9) == 0) {
4683 : #ifdef PERSISTENTIDX
4684 0 : BAT *b = getdesc(bid);
4685 0 : delete = b == NULL;
4686 0 : if (!delete)
4687 0 : b->torderidx = (Heap *) 1;
4688 : #else
4689 : delete = true;
4690 : #endif
4691 2 : } else if (strncmp(p + 1, "tstrimps", 8) == 0) {
4692 2 : BAT *b = getdesc(bid);
4693 2 : delete = b == NULL;
4694 2 : if (!delete)
4695 2 : b->tstrimps = (Strimps *)1;
4696 0 : } else if (strncmp(p + 1, "new", 3) != 0) {
4697 26120 : ok = false;
4698 : }
4699 : }
4700 26120 : if (!ok) {
4701 : /* found an unknown file; stop pruning in this
4702 : * subdir */
4703 0 : fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
4704 0 : break;
4705 : }
4706 26120 : if (delete) {
4707 2 : if (MT_remove(fullname) != 0 && errno != ENOENT) {
4708 0 : GDKsyserror("remove(%s)", fullname);
4709 0 : continue;
4710 : }
4711 32586 : TRC_DEBUG(IO_, "remove(%s) = 0\n", fullname);
4712 : }
4713 : }
4714 1614 : closedir(dirp);
4715 1614 : return false;
4716 : }
4717 :
4718 : void
4719 334 : gdk_bbp_reset(void)
4720 : {
4721 334 : int i;
4722 :
4723 334 : BBP_free = 0;
4724 334 : BBP_nfree = 0;
4725 675 : while (BBPlimit > 0) {
4726 341 : BBPlimit -= BBPINIT;
4727 341 : assert(BBPlimit >= 0);
4728 341 : GDKfree(BBP[BBPlimit >> BBPINITLOG]);
4729 341 : BBP[BBPlimit >> BBPINITLOG] = NULL;
4730 : }
4731 334 : ATOMIC_SET(&BBPsize, 0);
4732 11022 : for (i = 0; i < MAXFARMS; i++)
4733 10688 : GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */
4734 334 : memset(BBPfarms, 0, sizeof(BBPfarms));
4735 334 : memset(BBP_hash, 0, sizeof(BBP_hash));
4736 :
4737 334 : locked_by = 0;
4738 334 : BBPunloadCnt = 0;
4739 334 : backup_files = 0;
4740 334 : backup_dir = 0;
4741 334 : backup_subdir = 0;
4742 334 : }
4743 :
4744 : static MT_Lock GDKCallbackListLock = MT_LOCK_INITIALIZER(GDKCallbackListLock);
4745 :
4746 : static struct {
4747 : int cnt;
4748 : gdk_callback *head;
4749 : } callback_list = {
4750 : .cnt = 0,
4751 : .head = NULL,
4752 : };
4753 :
4754 : /*
4755 : * @- Add a callback
4756 : * Adds new callback to the callback list.
4757 : */
4758 : gdk_return
4759 0 : gdk_add_callback(char *name, gdk_callback_func *f, int argc, void *argv[], int
4760 : interval)
4761 : {
4762 :
4763 0 : gdk_callback *callback = NULL;
4764 :
4765 0 : if (!(callback = GDKmalloc(sizeof(gdk_callback) + sizeof(void *) * argc))) {
4766 0 : TRC_CRITICAL(GDK, "Failed to allocate memory!");
4767 0 : return GDK_FAIL;
4768 : }
4769 :
4770 0 : *callback = (gdk_callback) {
4771 : .name = name,
4772 : .argc = argc,
4773 : .interval = interval,
4774 : .func = f,
4775 : };
4776 :
4777 0 : for (int i=0; i < argc; i++) {
4778 0 : callback->argv[i] = argv[i];
4779 : }
4780 :
4781 0 : MT_lock_set(&GDKCallbackListLock);
4782 0 : gdk_callback *p = callback_list.head;
4783 0 : if (p) {
4784 : int cnt = 1;
4785 0 : do {
4786 : /* check if already added */
4787 0 : if (strcmp(callback->name, p->name) == 0) {
4788 0 : MT_lock_unset(&GDKCallbackListLock);
4789 0 : GDKfree(callback);
4790 0 : return GDK_FAIL;
4791 : }
4792 0 : if (p->next == NULL) {
4793 0 : p->next = callback;
4794 0 : p = callback->next;
4795 : } else {
4796 : p = p->next;
4797 : }
4798 0 : cnt += 1;
4799 0 : } while(p);
4800 0 : callback_list.cnt = cnt;
4801 : } else {
4802 0 : callback_list.cnt = 1;
4803 0 : callback_list.head = callback;
4804 : }
4805 0 : MT_lock_unset(&GDKCallbackListLock);
4806 0 : return GDK_SUCCEED;
4807 : }
4808 :
4809 : /*
4810 : * @- Remove a callback
4811 : * Removes a callback from the callback list with a given name as an argument.
4812 : */
4813 : gdk_return
4814 0 : gdk_remove_callback(char *cb_name, gdk_callback_func *argsfree)
4815 : {
4816 0 : gdk_callback *prev = NULL;
4817 0 : gdk_return res = GDK_FAIL;
4818 :
4819 0 : MT_lock_set(&GDKCallbackListLock);
4820 0 : gdk_callback *curr = callback_list.head;
4821 0 : while(curr) {
4822 0 : if (strcmp(cb_name, curr->name) == 0) {
4823 0 : if (curr == callback_list.head && prev == NULL) {
4824 0 : callback_list.head = curr->next;
4825 : } else {
4826 0 : prev->next = curr->next;
4827 : }
4828 0 : if (argsfree)
4829 0 : argsfree(curr->argc, curr->argv);
4830 0 : GDKfree(curr);
4831 0 : curr = NULL;
4832 0 : callback_list.cnt -=1;
4833 0 : res = GDK_SUCCEED;
4834 : } else {
4835 0 : prev = curr;
4836 0 : curr = curr->next;
4837 : }
4838 : }
4839 0 : MT_lock_unset(&GDKCallbackListLock);
4840 0 : return res;
4841 : }
4842 :
4843 : static gdk_return
4844 0 : do_callback(gdk_callback *cb)
4845 : {
4846 0 : cb->last_called = GDKusec();
4847 0 : return cb->func(cb->argc, cb->argv);
4848 : }
4849 :
4850 : static bool
4851 0 : should_call(gdk_callback *cb)
4852 : {
4853 0 : if (cb->last_called && cb->interval) {
4854 0 : return (cb->last_called + cb->interval * 1000 * 1000) <
4855 0 : GDKusec();
4856 : }
4857 : return true;
4858 : }
4859 :
4860 : static void
4861 113 : BBPcallbacks(void)
4862 : {
4863 113 : MT_lock_set(&GDKCallbackListLock);
4864 113 : gdk_callback *next = callback_list.head;
4865 :
4866 113 : while (next) {
4867 0 : if(should_call(next))
4868 0 : do_callback(next);
4869 0 : next = next->next;
4870 : }
4871 113 : MT_lock_unset(&GDKCallbackListLock);
4872 113 : }
4873 :
4874 : /* GDKtmLock protects all accesses and changes to BAKDIR and SUBDIR.
4875 : * MUST use BBPtmlock()/BBPtmunlock() to set/unset the lock.
4876 : *
4877 : * This is at the end of the file on purpose: we don't want people to
4878 : * accidentally use GDKtmLock directly. */
4879 : static MT_Lock GDKtmLock = MT_LOCK_INITIALIZER(GDKtmLock);
4880 : static int lockfd;
4881 :
4882 : static void
4883 115169 : BBPtmlockFinish(void)
4884 : {
4885 115169 : if (!GDKinmemory(0) &&
4886 : /* also use an external lock file to synchronize with
4887 : * external programs */
4888 115169 : (lockfile != NULL ||
4889 335 : (lockfile = GDKfilepath(0, NULL, ".tm_lock", NULL)) != NULL)) {
4890 115169 : lockfd = MT_lockf(lockfile, F_LOCK);
4891 : }
4892 115169 : }
4893 :
4894 : void
4895 115169 : BBPtmlock(void)
4896 : {
4897 115169 : MT_lock_set(&GDKtmLock);
4898 115169 : BBPtmlockFinish();
4899 115169 : }
4900 :
4901 : void
4902 115169 : BBPtmunlock(void)
4903 : {
4904 115169 : if (lockfile && lockfd >= 0) {
4905 115169 : assert(!GDKinmemory(0));
4906 115169 : MT_lockf(lockfile, F_ULOCK);
4907 115169 : close(lockfd);
4908 115169 : lockfd = -1;
4909 : }
4910 115169 : MT_lock_unset(&GDKtmLock);
4911 115169 : }
4912 :
4913 : void
4914 114 : BBPprintinfo(void)
4915 : {
4916 : /* 32 categories for the bats, not all are expected to be filled */
4917 114 : struct counters {
4918 : size_t sz;
4919 : size_t vmsz;
4920 : int nr;
4921 114 : } bats[2][2][2][2][2] = {0};
4922 114 : int nbats = 0;
4923 :
4924 114 : BBPtmlock();
4925 114 : bat sz = (bat) ATOMIC_GET(&BBPsize);
4926 351024 : for (bat i = 1; i < sz; i++) {
4927 350910 : MT_lock_set(&GDKswapLock(i));
4928 350910 : int r;
4929 350910 : if ((r = BBP_refs(i)) > 0 || BBP_lrefs(i) > 0) {
4930 310468 : BAT *b = BBP_desc(i);
4931 310468 : if (b != NULL) {
4932 310468 : nbats++;
4933 310468 : MT_lock_set(&b->theaplock);
4934 310468 : ATOMIC_BASE_TYPE status = BBP_status(i);
4935 310468 : struct counters *bt = &bats[r > 0][BATdirty(b)][(status & BBPPERSISTENT) != 0][(status & BBPLOADED) != 0][(status & BBPHOT) != 0];
4936 310468 : bt->nr++;
4937 310468 : if (b->theap && b->batCacheid == b->theap->parentid) {
4938 310468 : bt->sz += HEAPmemsize(b->theap);
4939 310468 : bt->vmsz += HEAPvmsize(b->theap);
4940 : }
4941 310468 : if (b->tvheap && b->batCacheid == b->tvheap->parentid) {
4942 10838 : bt->sz += HEAPmemsize(b->tvheap);
4943 10838 : bt->vmsz += HEAPvmsize(b->tvheap);
4944 : }
4945 310468 : MT_lock_unset(&b->theaplock);
4946 : }
4947 : }
4948 350910 : MT_lock_unset(&GDKswapLock(i));
4949 : }
4950 114 : uint32_t nfree = BBP_nfree;
4951 114 : BBPtmunlock();
4952 114 : if (bats[1][1][1][1][1].nr > 0)
4953 114 : printf("fix, dirty, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][1][1].nr, bats[1][1][1][1][1].vmsz, bats[1][1][1][1][1].sz);
4954 114 : if (bats[1][1][1][1][0].nr > 0)
4955 0 : printf("fix, dirty, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][1][0].nr, bats[1][1][1][1][0].vmsz, bats[1][1][1][1][0].sz);
4956 114 : if (bats[1][1][1][0][1].nr > 0)
4957 0 : printf("fix, dirty, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][0][1].nr, bats[1][1][1][0][1].vmsz, bats[1][1][1][0][1].sz);
4958 114 : if (bats[1][1][1][0][0].nr > 0)
4959 0 : printf("fix, dirty, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][1][0][0].nr, bats[1][1][1][0][0].vmsz, bats[1][1][1][0][0].sz);
4960 114 : if (bats[1][1][0][1][1].nr > 0)
4961 114 : printf("fix, dirty, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][1][1].nr, bats[1][1][0][1][1].vmsz, bats[1][1][0][1][1].sz);
4962 114 : if (bats[1][1][0][1][0].nr > 0)
4963 1 : printf("fix, dirty, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][1][0].nr, bats[1][1][0][1][0].vmsz, bats[1][1][0][1][0].sz);
4964 114 : if (bats[1][1][0][0][1].nr > 0)
4965 0 : printf("fix, dirty, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][0][1].nr, bats[1][1][0][0][1].vmsz, bats[1][1][0][0][1].sz);
4966 114 : if (bats[1][1][0][0][0].nr > 0)
4967 0 : printf("fix, dirty, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][1][0][0][0].nr, bats[1][1][0][0][0].vmsz, bats[1][1][0][0][0].sz);
4968 114 : if (bats[1][0][1][1][1].nr > 0)
4969 106 : printf("fix, clean, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][1][1].nr, bats[1][0][1][1][1].vmsz, bats[1][0][1][1][1].sz);
4970 114 : if (bats[1][0][1][1][0].nr > 0)
4971 0 : printf("fix, clean, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][1][0].nr, bats[1][0][1][1][0].vmsz, bats[1][0][1][1][0].sz);
4972 114 : if (bats[1][0][1][0][1].nr > 0)
4973 0 : printf("fix, clean, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][0][1].nr, bats[1][0][1][0][1].vmsz, bats[1][0][1][0][1].sz);
4974 114 : if (bats[1][0][1][0][0].nr > 0)
4975 0 : printf("fix, clean, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][1][0][0].nr, bats[1][0][1][0][0].vmsz, bats[1][0][1][0][0].sz);
4976 114 : if (bats[1][0][0][1][1].nr > 0)
4977 0 : printf("fix, clean, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][1][1].nr, bats[1][0][0][1][1].vmsz, bats[1][0][0][1][1].sz);
4978 114 : if (bats[1][0][0][1][0].nr > 0)
4979 0 : printf("fix, clean, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][1][0].nr, bats[1][0][0][1][0].vmsz, bats[1][0][0][1][0].sz);
4980 114 : if (bats[1][0][0][0][1].nr > 0)
4981 0 : printf("fix, clean, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][0][1].nr, bats[1][0][0][0][1].vmsz, bats[1][0][0][0][1].sz);
4982 114 : if (bats[1][0][0][0][0].nr > 0)
4983 0 : printf("fix, clean, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[1][0][0][0][0].nr, bats[1][0][0][0][0].vmsz, bats[1][0][0][0][0].sz);
4984 114 : if (bats[0][1][1][1][1].nr > 0)
4985 107 : printf("no fix, dirty, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][1][1].nr, bats[0][1][1][1][1].vmsz, bats[0][1][1][1][1].sz);
4986 114 : if (bats[0][1][1][1][0].nr > 0)
4987 33 : printf("no fix, dirty, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][1][0].nr, bats[0][1][1][1][0].vmsz, bats[0][1][1][1][0].sz);
4988 114 : if (bats[0][1][1][0][1].nr > 0)
4989 0 : printf("no fix, dirty, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][0][1].nr, bats[0][1][1][0][1].vmsz, bats[0][1][1][0][1].sz);
4990 114 : if (bats[0][1][1][0][0].nr > 0)
4991 2 : printf("no fix, dirty, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][1][0][0].nr, bats[0][1][1][0][0].vmsz, bats[0][1][1][0][0].sz);
4992 114 : if (bats[0][1][0][1][1].nr > 0)
4993 87 : printf("no fix, dirty, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][1][1].nr, bats[0][1][0][1][1].vmsz, bats[0][1][0][1][1].sz);
4994 114 : if (bats[0][1][0][1][0].nr > 0)
4995 19 : printf("no fix, dirty, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][1][0].nr, bats[0][1][0][1][0].vmsz, bats[0][1][0][1][0].sz);
4996 114 : if (bats[0][1][0][0][1].nr > 0)
4997 0 : printf("no fix, dirty, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][0][1].nr, bats[0][1][0][0][1].vmsz, bats[0][1][0][0][1].sz);
4998 114 : if (bats[0][1][0][0][0].nr > 0)
4999 16 : printf("no fix, dirty, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][1][0][0][0].nr, bats[0][1][0][0][0].vmsz, bats[0][1][0][0][0].sz);
5000 114 : if (bats[0][0][1][1][1].nr > 0)
5001 113 : printf("no fix, clean, persistent, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][1][1].nr, bats[0][0][1][1][1].vmsz, bats[0][0][1][1][1].sz);
5002 114 : if (bats[0][0][1][1][0].nr > 0)
5003 25 : printf("no fix, clean, persistent, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][1][0].nr, bats[0][0][1][1][0].vmsz, bats[0][0][1][1][0].sz);
5004 114 : if (bats[0][0][1][0][1].nr > 0)
5005 0 : printf("no fix, clean, persistent, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][0][1].nr, bats[0][0][1][0][1].vmsz, bats[0][0][1][0][1].sz);
5006 114 : if (bats[0][0][1][0][0].nr > 0)
5007 18 : printf("no fix, clean, persistent, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][1][0][0].nr, bats[0][0][1][0][0].vmsz, bats[0][0][1][0][0].sz);
5008 114 : if (bats[0][0][0][1][1].nr > 0)
5009 4 : printf("no fix, clean, transient, loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][1][1].nr, bats[0][0][0][1][1].vmsz, bats[0][0][0][1][1].sz);
5010 114 : if (bats[0][0][0][1][0].nr > 0)
5011 1 : printf("no fix, clean, transient, not loaded, hot: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][1][0].nr, bats[0][0][0][1][0].vmsz, bats[0][0][0][1][0].sz);
5012 114 : if (bats[0][0][0][0][1].nr > 0)
5013 0 : printf("no fix, clean, transient, loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][0][1].nr, bats[0][0][0][0][1].vmsz, bats[0][0][0][0][1].sz);
5014 114 : if (bats[0][0][0][0][0].nr > 0)
5015 0 : printf("no fix, clean, transient, not loaded, cold: %d bats, %zu virtual, %zu malloc\n", bats[0][0][0][0][0].nr, bats[0][0][0][0][0].vmsz, bats[0][0][0][0][0].sz);
5016 :
5017 114 : printf("%d bats total, %d in use, %"PRIu32" free bats in common shared list\n",
5018 : sz - 1, nbats, nfree);
5019 114 : }
|