Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : *
16 : * @* Database Storage Management
17 : * Contains routines for writing and reading GDK data to and from
18 : * disk. This section contains the primitives to manage the
19 : * disk-based images of the BATs. It relies on the existence of a UNIX
20 : * file system, including memory mapped files. Solaris and IRIX have
21 : * different implementations of madvise().
22 : *
23 : * The current version assumes that all BATs are stored on a single
24 : * disk partition. This simplistic assumption should be replaced in
25 : * the near future by a multi-volume version. The intention is to use
26 : * several BAT home locations. The files should be owned by the
27 : * database server. Otherwise, IO operations are likely to fail. This
28 : * is accomplished by setting the GID and UID upon system start.
29 : */
30 : #include "monetdb_config.h"
31 : #include "gdk.h"
32 : #include "gdk_private.h"
33 : #include "mutils.h"
34 : #ifdef HAVE_FCNTL_H
35 : #include <fcntl.h>
36 : #endif
37 :
38 : #ifndef O_CLOEXEC
39 : #ifdef _O_NOINHERIT
40 : #define O_CLOEXEC _O_NOINHERIT /* Windows */
41 : #else
42 : #define O_CLOEXEC 0
43 : #endif
44 : #endif
45 :
46 : /* GDKfilepath returns a newly allocated string containing the path
47 : * name of a database farm.
48 : * The arguments are the farmID or -1, the name of a subdirectory
49 : * within the farm (i.e., something like BATDIR or BAKDIR -- see
50 : * gdk.h) or NULL, the name of a BAT (i.e. the name that is stored in
51 : * BBP.dir -- something like 07/714), and finally the file extension.
52 : *
53 : * If farmid is >= 0, GDKfilepath returns the complete path to the
54 : * specified farm concatenated with the other arguments with
55 : * appropriate separators. If farmid is -1, it returns the
56 : * concatenation of its other arguments (in this case, the result
57 : * cannot be used to access a file directly -- the farm needs to be
58 : * prepended in some other place). */
59 : char *
60 17443719 : GDKfilepath(int farmid, const char *dir, const char *name, const char *ext)
61 : {
62 17443719 : const char *sep;
63 17443719 : size_t pathlen;
64 17443719 : char *path;
65 :
66 17443719 : if (GDKinmemory(farmid))
67 1 : return GDKstrdup(":memory:");
68 :
69 17442920 : assert(dir == NULL || *dir != DIR_SEP);
70 17442920 : assert(farmid == NOFARM ||
71 : (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname));
72 17442920 : if (!GDKembedded() && MT_path_absolute(name)) {
73 0 : GDKerror("name should not be absolute\n");
74 0 : return NULL;
75 : }
76 17443233 : if (dir && *dir == DIR_SEP)
77 0 : dir++;
78 17379651 : if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) {
79 : sep = "";
80 : } else {
81 17343787 : sep = DIR_SEP_STR;
82 : }
83 17443233 : pathlen = (farmid == NOFARM ? 0 : strlen(BBPfarms[farmid].dirname) + 1) +
84 17443233 : (dir ? strlen(dir) : 0) + strlen(sep) + strlen(name) +
85 17443233 : (ext ? strlen(ext) + 1 : 0) + 1;
86 17443233 : path = GDKmalloc(pathlen);
87 17442863 : if (path == NULL)
88 : return NULL;
89 17442863 : if (farmid == NOFARM) {
90 2200410 : strconcat_len(path, pathlen,
91 : dir ? dir : "", sep, name,
92 : ext ? "." : NULL, ext, NULL);
93 : } else {
94 29058548 : strconcat_len(path, pathlen,
95 : BBPfarms[farmid].dirname, DIR_SEP_STR,
96 : dir ? dir : "", sep, name,
97 : ext ? "." : NULL, ext, NULL);
98 : }
99 : return path;
100 : }
101 :
102 : /* make sure the parent directory of DIR exists (the argument itself
103 : * is usually a file that is to be created) */
104 : gdk_return
105 4702 : GDKcreatedir(const char *dir)
106 : {
107 4702 : char path[FILENAME_MAX];
108 4702 : char *r;
109 4702 : DIR *dirp;
110 :
111 4702 : TRC_DEBUG(IO_, "GDKcreatedir(%s)\n", dir);
112 4702 : assert(!GDKinmemory(0));
113 4702 : if (!GDKembedded() && !MT_path_absolute(dir)) {
114 0 : GDKerror("directory '%s' is not absolute\n", dir);
115 0 : return GDK_FAIL;
116 : }
117 4702 : if (strlen(dir) >= FILENAME_MAX) {
118 0 : GDKerror("directory name too long\n");
119 0 : return GDK_FAIL;
120 : }
121 4702 : strcpy(path, dir); /* we know this fits (see above) */
122 : /* skip initial /, if any */
123 62817 : for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) {
124 58115 : *r = 0;
125 58115 : if (
126 : #ifdef WIN32
127 : strlen(path) > 3 &&
128 : #endif
129 58115 : MT_mkdir(path) < 0) {
130 53195 : if (errno != EEXIST) {
131 0 : GDKsyserror("cannot create directory %s\n", path);
132 0 : return GDK_FAIL;
133 : }
134 53195 : if ((dirp = opendir(path)) == NULL) {
135 0 : GDKsyserror("%s cannot open directory\n", path);
136 0 : return GDK_FAIL;
137 : }
138 : /* it's a directory, we can continue */
139 53195 : closedir(dirp);
140 : }
141 58115 : *r++ = DIR_SEP;
142 : }
143 : return GDK_SUCCEED;
144 : }
145 :
146 : /* remove the directory DIRNAME with its file contents; does not
147 : * recurse into subdirectories */
148 : gdk_return
149 12603 : GDKremovedir(int farmid, const char *dirname)
150 : {
151 12603 : str dirnamestr;
152 12603 : DIR *dirp;
153 12603 : char *path;
154 12603 : struct dirent *dent;
155 12603 : int ret;
156 :
157 12603 : assert(!GDKinmemory(farmid));
158 12603 : if ((dirnamestr = GDKfilepath(farmid, NULL, dirname, NULL)) == NULL)
159 : return GDK_FAIL;
160 :
161 12603 : TRC_DEBUG(IO_, "GDKremovedir(%s)\n", dirnamestr);
162 :
163 12603 : if ((dirp = opendir(dirnamestr)) == NULL) {
164 669 : GDKfree(dirnamestr);
165 669 : return GDK_SUCCEED;
166 : }
167 252456 : while ((dent = readdir(dirp)) != NULL) {
168 240522 : if (dent->d_name[0] == '.' &&
169 23868 : (dent->d_name[1] == 0 ||
170 11934 : (dent->d_name[1] == '.' && dent->d_name[2] == 0))) {
171 : /* skip . and .. */
172 23868 : continue;
173 : }
174 216654 : path = GDKfilepath(farmid, dirname, dent->d_name, NULL);
175 216654 : if (path == NULL) {
176 : /* most likely the rmdir will now fail causing
177 : * an error return */
178 : break;
179 : }
180 216654 : ret = MT_remove(path);
181 216654 : if (ret == -1)
182 0 : GDKsyserror("remove(%s) failed\n", path);
183 216654 : TRC_DEBUG(IO_, "Remove %s = %d\n", path, ret);
184 216654 : GDKfree(path);
185 : }
186 11934 : closedir(dirp);
187 11934 : ret = MT_rmdir(dirnamestr);
188 11934 : if (ret != 0)
189 0 : GDKsyserror("rmdir(%s) failed\n", dirnamestr);
190 11934 : TRC_DEBUG(IO_, "rmdir %s = %d\n", dirnamestr, ret);
191 11934 : GDKfree(dirnamestr);
192 11934 : return ret ? GDK_FAIL : GDK_SUCCEED;
193 : }
194 :
195 : #define _FUNBUF 0x040000
196 : #define _FWRTHR 0x080000
197 : #define _FRDSEQ 0x100000
198 :
199 : /* open a file and return its file descriptor; the file is specified
200 : * using farmid, name and extension; if opening for writing, we create
201 : * the parent directory if necessary; if opening for reading, we don't
202 : * necessarily report an error if it fails, but we make sure errno is
203 : * set */
204 : int
205 410717 : GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension)
206 : {
207 410717 : char *path = NULL;
208 410717 : int fd, flags = O_CLOEXEC;
209 :
210 410717 : assert(!GDKinmemory(farmid));
211 410707 : if (nme == NULL || *nme == 0) {
212 0 : GDKerror("no name specified\n");
213 0 : errno = EFAULT;
214 0 : return -1;
215 : }
216 :
217 410707 : assert(farmid != NOFARM || extension == NULL);
218 410707 : if (farmid != NOFARM) {
219 409152 : path = GDKfilepath(farmid, BATDIR, nme, extension);
220 409154 : if (path == NULL) {
221 0 : errno = ENOMEM;
222 0 : return -1;
223 : }
224 : nme = path;
225 : }
226 :
227 410709 : if (*mode == 'm') { /* file open for mmap? */
228 0 : mode++;
229 : #ifdef _CYGNUS_H_
230 : } else {
231 : flags |= _FRDSEQ; /* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */
232 : #endif
233 : }
234 :
235 410709 : if (strchr(mode, 'w')) {
236 : flags |= O_WRONLY | O_CREAT;
237 68740 : } else if (!strchr(mode, '+')) {
238 : flags |= O_RDONLY;
239 : } else {
240 45370 : flags |= O_RDWR;
241 : }
242 : #ifdef WIN32
243 : flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT;
244 : #endif
245 410709 : fd = MT_open(nme, flags);
246 410716 : if (fd < 0 && *mode == 'w') {
247 : /* try to create the directory, in case that was the problem */
248 4479 : if (GDKcreatedir(nme) == GDK_SUCCEED) {
249 4479 : fd = MT_open(nme, flags);
250 4479 : if (fd < 0)
251 0 : GDKsyserror("cannot open file %s\n", nme);
252 : }
253 : }
254 410716 : int err = errno; /* save */
255 : /* don't generate error if we can't open a file for reading */
256 410716 : GDKfree(path);
257 410719 : errno = err; /* restore */
258 410719 : return fd;
259 : }
260 :
261 : /* like GDKfdlocate, except return a FILE pointer */
262 : FILE *
263 12714 : GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension)
264 : {
265 12714 : int fd;
266 12714 : FILE *f;
267 :
268 12714 : if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0)
269 : return NULL;
270 12491 : if (*mode == 'm')
271 0 : mode++;
272 12491 : if ((f = fdopen(fd, mode)) == NULL) {
273 0 : GDKsyserror("cannot fdopen file\n");
274 0 : close(fd);
275 0 : return NULL;
276 : }
277 : return f;
278 : }
279 :
280 : FILE *
281 11925 : GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode)
282 : {
283 11925 : char *path;
284 :
285 : /* if name is null, try to get one from dir (in case it was a path) */
286 11925 : path = GDKfilepath(farmid, dir, name, extension);
287 :
288 11925 : if (path != NULL) {
289 11925 : FILE *f;
290 11925 : TRC_DEBUG(IO_, "GDKfileopen(%s)\n", path);
291 11925 : f = MT_fopen(path, mode);
292 11925 : int err = errno;
293 11925 : GDKfree(path);
294 11925 : errno = err;
295 11925 : return f;
296 : }
297 : return NULL;
298 : }
299 :
300 : /* remove the file */
301 : gdk_return
302 12110 : GDKunlink(int farmid, const char *dir, const char *nme, const char *ext)
303 : {
304 12110 : if (nme && *nme) {
305 12110 : char *path;
306 :
307 12110 : path = GDKfilepath(farmid, dir, nme, ext);
308 12110 : if (path == NULL)
309 : return GDK_FAIL;
310 : /* if file already doesn't exist, we don't care */
311 12110 : if (MT_remove(path) != 0 && errno != ENOENT) {
312 0 : GDKsyserror("remove(%s)\n", path);
313 0 : GDKfree(path);
314 0 : return GDK_FAIL;
315 : }
316 12110 : GDKfree(path);
317 12110 : return GDK_SUCCEED;
318 : }
319 0 : GDKerror("no name specified");
320 0 : return GDK_FAIL;
321 : }
322 :
323 : /*
324 : * A move routine is overloaded to deal with extensions.
325 : */
326 : gdk_return
327 229029 : GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2, bool report)
328 : {
329 229029 : char *path1;
330 229029 : char *path2;
331 229029 : int ret;
332 229029 : lng t0 = GDKusec();
333 :
334 229029 : if (nme1 == NULL || *nme1 == 0) {
335 0 : GDKerror("no file specified\n");
336 0 : return GDK_FAIL;
337 : }
338 229029 : path1 = GDKfilepath(farmid, dir1, nme1, ext1);
339 229029 : path2 = GDKfilepath(farmid, dir2, nme2, ext2);
340 229029 : if (path1 && path2) {
341 229029 : ret = MT_rename(path1, path2);
342 229029 : if (ret < 0 && report)
343 0 : GDKsyserror("cannot rename %s to %s\n", path1, path2);
344 :
345 229029 : TRC_DEBUG(IO_, "Move %s %s = %d ("LLFMT" usec)\n", path1, path2, ret, GDKusec() - t0);
346 : } else {
347 : ret = -1;
348 : }
349 229029 : GDKfree(path1);
350 229029 : GDKfree(path2);
351 229029 : return ret < 0 ? GDK_FAIL : GDK_SUCCEED;
352 : }
353 :
354 : gdk_return
355 3154 : GDKextendf(int fd, size_t size, const char *fn)
356 : {
357 3154 : struct stat stb;
358 3154 : int rt = 0;
359 3154 : lng t0 = GDKusec();
360 :
361 3154 : assert(!GDKinmemory(0));
362 : #ifdef __COVERITY__
363 : if (fd < 0) /* in real life, if fd < 0, fstat will fail */
364 : return GDK_FAIL;
365 : #endif
366 3154 : if (fstat(fd, &stb) < 0) {
367 : /* shouldn't happen */
368 0 : GDKsyserror("fstat failed unexpectedly\n");
369 0 : return GDK_FAIL;
370 : }
371 : /* if necessary, extend the underlying file */
372 3154 : if (stb.st_size < (off_t) size) {
373 : #ifdef HAVE_FALLOCATE
374 2187 : if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 &&
375 0 : errno == EOPNOTSUPP)
376 : /* on Linux, posix_fallocate uses a slow
377 : * method to allocate blocks if the underlying
378 : * file system doesn't support the operation,
379 : * so use fallocate instead and just resize
380 : * the file if it fails */
381 : #else
382 : #ifdef HAVE_POSIX_FALLOCATE
383 : /* posix_fallocate returns error number on failure,
384 : * not -1 :-( */
385 : if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL)
386 : /* on Solaris/OpenIndiana, this may mean that
387 : * the underlying file system doesn't support
388 : * the operation, so just resize the file */
389 : #endif
390 : #endif
391 : /* we get here when (posix_)fallocate fails
392 : * because it is not supported on the file
393 : * system, or if neither function exists */
394 0 : rt = ftruncate(fd, (off_t) size);
395 2187 : if (rt != 0) {
396 : /* extending failed, try to reduce file size
397 : * back to original */
398 0 : GDKsyserror("could not extend file\n");
399 0 : if (ftruncate(fd, stb.st_size))
400 0 : GDKsyserror("ftruncate to old size");
401 : }
402 : }
403 3154 : TRC_DEBUG(IO_, "GDKextend %s %zu -> %zu "LLFMT" usec%s\n",
404 : fn, (size_t) stb.st_size, size,
405 : GDKusec() - t0, rt != 0 ? " (failed)" : "");
406 : /* posix_fallocate returns != 0 on failure, fallocate and
407 : * ftruncate return -1 on failure, but all three return 0 on
408 : * success */
409 3151 : return rt != 0 ? GDK_FAIL : GDK_SUCCEED;
410 : }
411 :
412 : gdk_return
413 2553 : GDKextend(const char *fn, size_t size)
414 : {
415 2553 : int fd, flags = O_RDWR;
416 2553 : gdk_return rt = GDK_FAIL;
417 :
418 2553 : assert(!GDKinmemory(0));
419 : #ifdef O_BINARY
420 : /* On Windows, open() fails if the file is bigger than 2^32
421 : * bytes without O_BINARY. */
422 : flags |= O_BINARY;
423 : #endif
424 2553 : if ((fd = MT_open(fn, flags | O_CLOEXEC)) >= 0) {
425 2554 : rt = GDKextendf(fd, size, fn);
426 2554 : close(fd);
427 : } else {
428 0 : GDKsyserror("cannot open file %s\n", fn);
429 : }
430 2554 : return rt;
431 : }
432 :
433 : /*
434 : * @+ Save and load.
435 : * The BAT is saved on disk in several files. The extension DESC
436 : * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the
437 : * other heaps. The storage mechanism off a file can be memory mapped
438 : * (STORE_MMAP) or malloced (STORE_MEM).
439 : *
440 : * These modes indicates the disk-layout and the intended mapping.
441 : * The primary concern here is to handle STORE_MMAP and STORE_MEM.
442 : */
443 : gdk_return
444 329251 : GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync)
445 : {
446 329251 : int err = 0;
447 :
448 329251 : TRC_DEBUG(IO_, "GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync);
449 :
450 329251 : assert(!GDKinmemory(farmid));
451 329251 : if (mode == STORE_MMAP) {
452 1030 : if (dosync && size && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK))
453 137 : err = MT_msync(buf, size);
454 137 : if (err)
455 0 : GDKerror("error on: name=%s, ext=%s, mode=%d\n",
456 : nme, ext ? ext : "", (int) mode);
457 1030 : TRC_DEBUG(IO_, "MT_msync(buf %p, size %zu) = %d\n",
458 : buf, size, err);
459 : } else {
460 328221 : int fd;
461 :
462 328221 : if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) {
463 : /* write() on 64-bits Redhat for IA64 returns
464 : * 32-bits signed result (= OS BUG)! write()
465 : * on Windows only takes unsigned int as
466 : * size */
467 656442 : while (size > 0) {
468 : /* circumvent problems by writing huge
469 : * buffers in chunks <= 1GiB */
470 328221 : ssize_t ret;
471 :
472 656442 : ret = write(fd, buf,
473 : (unsigned) MIN(1 << 30, size));
474 328221 : if (ret < 0) {
475 0 : err = -1;
476 0 : GDKsyserror("GDKsave: error %zd"
477 : " on: name=%s, ext=%s, "
478 : "mode=%d\n", ret, nme,
479 : ext ? ext : "", (int) mode);
480 0 : break;
481 : }
482 328221 : size -= ret;
483 328221 : buf = (void *) ((char *) buf + ret);
484 984663 : TRC_DEBUG(IO_, "Write(fd %d, buf %p"
485 : ", size %u) = %zd\n",
486 : fd, buf,
487 : (unsigned) MIN(1 << 30, size),
488 : ret);
489 : }
490 328221 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
491 : #if defined(NATIVE_WIN32)
492 : && _commit(fd) < 0
493 : #elif defined(HAVE_FDATASYNC)
494 292 : && fdatasync(fd) < 0
495 : #elif defined(HAVE_FSYNC)
496 : && fsync(fd) < 0
497 : #endif
498 : ) {
499 0 : GDKsyserror("GDKsave: error on: name=%s, "
500 : "ext=%s, mode=%d\n", nme,
501 : ext ? ext : "", (int) mode);
502 0 : err = -1;
503 : }
504 328221 : err |= close(fd);
505 328221 : if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) {
506 : /* do not tolerate corrupt heap images
507 : * (BBPrecover on restart will kill
508 : * them) */
509 0 : GDKerror("could not remove: name=%s, "
510 : "ext=%s, mode %d\n", nme,
511 : ext ? ext : "", (int) mode);
512 0 : return GDK_FAIL;
513 : }
514 : } else {
515 0 : err = -1;
516 0 : GDKerror("failed name=%s, ext=%s, mode %d\n",
517 : nme, ext ? ext : "", (int) mode);
518 : }
519 : }
520 329251 : return err ? GDK_FAIL : GDK_SUCCEED;
521 : }
522 :
523 : /*
524 : * Space for the load is directly allocated and the heaps are mapped.
525 : * Further initialization of the atom heaps require a separate action
526 : * defined in their implementation.
527 : *
528 : * size -- how much to read
529 : * *maxsize -- (in/out) how much to allocate / how much was allocated
530 : */
531 : char *
532 24282 : GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode)
533 : {
534 24282 : char *ret = NULL;
535 :
536 24282 : assert(!GDKinmemory(farmid));
537 24279 : assert(size <= *maxsize);
538 24279 : assert(farmid != NOFARM || ext == NULL);
539 24279 : TRC_DEBUG(IO_, "GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode);
540 :
541 24281 : if (mode == STORE_MEM) {
542 21728 : int fd = GDKfdlocate(farmid, nme, "rb", ext);
543 :
544 21728 : if (fd >= 0) {
545 21728 : char *dst = ret = GDKmalloc(*maxsize);
546 21728 : ssize_t n_expected, n = 0;
547 :
548 21728 : if (ret) {
549 : /* read in chunks, some OSs do not
550 : * give you all at once and Windows
551 : * only accepts int */
552 43456 : for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) {
553 21728 : n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected));
554 21728 : if (n < 0)
555 0 : GDKsyserror("GDKload: cannot read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "", size, n_expected);
556 : #ifndef __COVERITY__
557 : /* Coverity doesn't seem to
558 : * recognize that we're just
559 : * printing the value of ptr,
560 : * not its contents */
561 21728 : TRC_DEBUG(IO_, "read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n);
562 : #endif
563 :
564 21728 : if (n <= 0)
565 : break;
566 21728 : dst += n;
567 : }
568 21728 : if (n_expected > 0) {
569 : /* we couldn't read all, error
570 : * already generated */
571 0 : GDKfree(ret);
572 0 : if (n >= 0) /* don't report error twice */
573 0 : GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected);
574 : ret = NULL;
575 : }
576 : #ifndef NDEBUG
577 : /* just to make valgrind happy, we
578 : * initialize the whole thing */
579 21728 : if (ret && *maxsize > size)
580 14890 : memset(ret + size, 0, *maxsize - size);
581 : #endif
582 : }
583 21728 : close(fd);
584 : } else {
585 0 : GDKsyserror("cannot open: name=%s, ext=%s\n", nme, ext ? ext : "");
586 : }
587 : } else {
588 2553 : char *path = NULL;
589 :
590 : /* round up to multiple of GDK_mmap_pagesize with a
591 : * minimum of one */
592 2553 : size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
593 2553 : if (size == 0)
594 0 : size = GDK_mmap_pagesize;
595 2553 : if (farmid != NOFARM) {
596 995 : path = GDKfilepath(farmid, BATDIR, nme, ext);
597 995 : nme = path;
598 : }
599 2553 : if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) {
600 2554 : int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL;
601 :
602 2554 : if (mode == STORE_PRIV)
603 : mod |= MMAP_COPY;
604 : else
605 2554 : mod |= MMAP_SYNC;
606 2554 : ret = GDKmmap(nme, mod, size);
607 2554 : if (ret != NULL) {
608 : /* success: update allocated size */
609 2554 : *maxsize = size;
610 : }
611 2554 : TRC_DEBUG(IO_, "mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret);
612 : }
613 2554 : GDKfree(path);
614 : }
615 24282 : return ret;
616 : }
617 :
618 : /*
619 : * @+ BAT disk storage
620 : *
621 : * Between sessions the BATs comprising the database are saved on
622 : * disk. To simplify code, we assume a UNIX directory called its
623 : * physical @%home@ where they are to be located. The subdirectories
624 : * BAT and PRG contain what its name says.
625 : *
626 : * A BAT created by @%COLnew@ is considered temporary until one calls
627 : * the routine @%BATsave@. This routine reserves disk space and checks
628 : * for name clashes.
629 : *
630 : * Saving and restoring BATs is left to the upper layers. The library
631 : * merely copies the data into place. Failure to read or write the
632 : * BAT results in a NULL, otherwise it returns the BAT pointer.
633 : */
634 : static BAT *
635 20139 : DESCload(int i)
636 : {
637 20139 : const char *s, *nme = BBP_physical(i);
638 20139 : BAT *b = NULL;
639 20139 : int tt;
640 :
641 20139 : TRC_DEBUG(IO_, "DESCload: %s\n", nme ? nme : "<noname>");
642 :
643 20139 : b = BBP_desc(i);
644 :
645 20139 : if (b == NULL) {
646 0 : GDKerror("no descriptor for BAT %d\n", i);
647 0 : return NULL;
648 : }
649 :
650 20139 : MT_lock_set(&b->theaplock);
651 20138 : tt = b->ttype;
652 20138 : if (tt < 0) {
653 0 : if ((tt = ATOMindex(s = ATOMunknown_name(tt))) < 0) {
654 0 : MT_lock_unset(&b->theaplock);
655 0 : GDKerror("atom '%s' unknown, in BAT '%s'.\n", s, nme);
656 0 : return NULL;
657 : }
658 0 : b->ttype = tt;
659 : }
660 :
661 : /* reconstruct mode from BBP status (BATmode doesn't flush
662 : * descriptor, so loaded mode may be stale) */
663 20138 : b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0;
664 20138 : b->batCopiedtodisk = true;
665 20138 : MT_lock_unset(&b->theaplock);
666 20137 : return b;
667 : }
668 :
669 : gdk_return
670 971639 : BATsave_iter(BAT *b, BATiter *bi, BUN size)
671 : {
672 971639 : gdk_return err = GDK_SUCCEED;
673 971639 : bool dosync;
674 971639 : bool locked = false;
675 :
676 971639 : BATcheck(b, GDK_FAIL);
677 :
678 971639 : if (MT_rwlock_rdtry(&b->thashlock))
679 971628 : locked = true;
680 :
681 971639 : dosync = (BBP_status(b->batCacheid) & BBPPERSISTENT) != 0;
682 971639 : assert(!GDKinmemory(bi->h->farmid));
683 : /* views cannot be saved, but make an exception for
684 : * force-remapped views */
685 971639 : if (isVIEW(b)) {
686 0 : if (locked)
687 0 : MT_rwlock_rdunlock(&b->thashlock);
688 0 : GDKerror("%s is a view on %s; cannot be saved\n", BATgetId(b), BBP_logical(VIEWtparent(b)));
689 0 : return GDK_FAIL;
690 : }
691 971639 : if (!BATdirtybi(*bi)) {
692 709715 : if (locked)
693 709714 : MT_rwlock_rdunlock(&b->thashlock);
694 709715 : return GDK_SUCCEED;
695 : }
696 :
697 : /* start saving data */
698 261924 : if (bi->type != TYPE_void && bi->base == NULL) {
699 0 : assert(BBP_status(b->batCacheid) & BBPSWAPPED);
700 0 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
701 0 : int fd = GDKfdlocate(bi->h->farmid, bi->h->filename, "rb+", NULL);
702 0 : if (fd < 0) {
703 0 : GDKsyserror("cannot open file %s for sync\n",
704 : bi->h->filename);
705 0 : err = GDK_FAIL;
706 : } else {
707 0 : if (
708 : #if defined(NATIVE_WIN32)
709 : _commit(fd) < 0
710 : #elif defined(HAVE_FDATASYNC)
711 0 : fdatasync(fd) < 0
712 : #elif defined(HAVE_FSYNC)
713 : fsync(fd) < 0
714 : #endif
715 : )
716 0 : GDKsyserror("sync failed for %s\n",
717 : bi->h->filename);
718 0 : close(fd);
719 : }
720 0 : if (bi->vh) {
721 0 : fd = GDKfdlocate(bi->vh->farmid, bi->vh->filename, "rb+", NULL);
722 0 : if (fd < 0) {
723 0 : GDKsyserror("cannot open file %s for sync\n",
724 : bi->vh->filename);
725 0 : err = GDK_FAIL;
726 : } else {
727 0 : if (
728 : #if defined(NATIVE_WIN32)
729 : _commit(fd) < 0
730 : #elif defined(HAVE_FDATASYNC)
731 0 : fdatasync(fd) < 0
732 : #elif defined(HAVE_FSYNC)
733 : fsync(fd) < 0
734 : #endif
735 : )
736 0 : GDKsyserror("sync failed for %s\n", bi->vh->filename);
737 0 : close(fd);
738 : }
739 : }
740 : }
741 : } else {
742 261924 : const char *nme = BBP_physical(b->batCacheid);
743 261924 : if ((!bi->copiedtodisk || bi->hdirty)
744 261896 : && (err == GDK_SUCCEED && bi->type)) {
745 261896 : const char *tail = strchr(bi->h->filename, '.') + 1;
746 261896 : err = HEAPsave(bi->h, nme, tail, dosync, bi->hfree, &b->theaplock);
747 : }
748 261924 : if (bi->vh
749 49220 : && (!bi->copiedtodisk || bi->vhdirty)
750 43321 : && ATOMvarsized(bi->type)
751 43321 : && err == GDK_SUCCEED)
752 43321 : err = HEAPsave(bi->vh, nme, "theap", dosync, bi->vhfree, &b->theaplock);
753 : }
754 :
755 261924 : if (err == GDK_SUCCEED) {
756 261924 : MT_lock_set(&b->theaplock);
757 261924 : if (b->theap != bi->h) {
758 2 : assert(b->theap->dirty);
759 2 : b->theap->wasempty = bi->h->wasempty;
760 2 : b->theap->hasfile |= bi->h->hasfile;
761 : }
762 261924 : if (b->tvheap && b->tvheap != bi->vh) {
763 0 : assert(b->tvheap->dirty);
764 0 : b->tvheap->wasempty = bi->vh->wasempty;
765 0 : b->tvheap->hasfile |= bi->vh->hasfile;
766 : }
767 261924 : if (size != b->batCount) {
768 : /* if the size doesn't match, the BAT must be dirty */
769 33155 : b->theap->dirty = true;
770 33155 : if (b->tvheap)
771 8250 : b->tvheap->dirty = true;
772 : }
773 : /* there is something on disk now */
774 261924 : b->batCopiedtodisk = true;
775 261924 : MT_lock_unset(&b->theaplock);
776 261924 : if (locked && b->thash && b->thash != (Hash *) 1)
777 13277 : BAThashsave(b, dosync);
778 : }
779 261914 : if (locked)
780 261914 : MT_rwlock_rdunlock(&b->thashlock);
781 : return err;
782 : }
783 :
784 : gdk_return
785 1503 : BATsave(BAT *b)
786 : {
787 1503 : gdk_return rc;
788 :
789 1503 : BATiter bi = bat_iterator(b);
790 1503 : rc = BATsave_iter(b, &bi, bi.count);
791 1503 : bat_iterator_end(&bi);
792 1503 : return rc;
793 : }
794 :
795 : /*
796 : * TODO: move to gdk_bbp.c
797 : */
798 : BAT *
799 20140 : BATload_intern(bat bid, bool lock)
800 : {
801 20140 : const char *nme;
802 20140 : BAT *b;
803 :
804 20140 : assert(!GDKinmemory(0));
805 20139 : assert(bid > 0);
806 :
807 20139 : nme = BBP_physical(bid);
808 20139 : b = DESCload(bid);
809 :
810 20139 : if (b == NULL) {
811 : return NULL;
812 : }
813 20139 : assert(!GDKinmemory(b->theap->farmid));
814 :
815 : /* LOAD bun heap */
816 20143 : if (b->ttype != TYPE_void) {
817 20140 : b->theap->storage = b->theap->newstorage = STORE_INVALID;
818 20137 : if ((b->batCount == 0 ?
819 2593 : HEAPalloc(b->theap, b->batCapacity, b->twidth) :
820 22730 : HEAPload(b->theap, b->theap->filename, NULL, b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
821 0 : HEAPfree(b->theap, false);
822 0 : return NULL;
823 : }
824 20137 : if (ATOMstorage(b->ttype) == TYPE_msk) {
825 4584 : b->batCapacity = (BUN) (b->theap->size * 8);
826 : } else {
827 15553 : assert(b->theap->size >> b->tshift <= BUN_MAX);
828 15553 : b->batCapacity = (BUN) (b->theap->size >> b->tshift);
829 : }
830 : } else {
831 3 : b->theap->base = NULL;
832 : }
833 :
834 : /* LOAD tail heap */
835 20140 : if (ATOMvarsized(b->ttype)) {
836 4497 : b->tvheap->storage = b->tvheap->newstorage = STORE_INVALID;
837 4497 : if ((b->tvheap->free == 0 ?
838 214 : ATOMheap(b->ttype, b->tvheap, b->batCapacity) :
839 4711 : HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
840 0 : HEAPfree(b->theap, false);
841 0 : HEAPfree(b->tvheap, false);
842 0 : return NULL;
843 : }
844 4497 : if (ATOMstorage(b->ttype) == TYPE_str) {
845 4362 : strCleanHash(b->tvheap, false); /* ensure consistency */
846 : } else {
847 135 : HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0),
848 : BATcount(b));
849 : }
850 : }
851 :
852 : /* initialize descriptor */
853 20140 : b->theap->parentid = b->batCacheid;
854 :
855 : /* load succeeded; register it in BBP */
856 20140 : if (BBPcacheit(b, lock) != GDK_SUCCEED) {
857 0 : HEAPfree(b->theap, false);
858 0 : if (b->tvheap)
859 0 : HEAPfree(b->tvheap, false);
860 0 : return NULL;
861 : }
862 : return b;
863 : }
864 :
865 : /*
866 : * @- BATdelete
867 : * The new behavior is to let the routine produce warnings but always
868 : * succeed. rationale: on a delete, we must get rid of *all* the
869 : * files. We do not have to care about preserving them or be too much
870 : * concerned if a file that had to be deleted was not found (end
871 : * result is still that it does not exist). The past behavior to
872 : * delete some files and then fail was erroneous. The BAT would
873 : * continue to exist with an incorrect disk status, causing havoc
874 : * later on.
875 : *
876 : * NT forces us to close all files before deleting them; in case of
877 : * memory mapped files this means that we have to unload the BATs
878 : * before deleting. This is enforced now.
879 : */
880 : void
881 15289006 : BATdelete(BAT *b)
882 : {
883 15289006 : bat bid = b->batCacheid;
884 15289006 : BAT *loaded = BBP_cache(bid);
885 15289006 : char o[12];
886 :
887 15289006 : assert(bid > 0);
888 15289006 : snprintf(o, sizeof(o), "%o", (unsigned) bid);
889 15289006 : if (loaded) {
890 15282978 : b = loaded;
891 : }
892 15289006 : HASHdestroy(b);
893 15288499 : IMPSdestroy(b);
894 15288000 : OIDXdestroy(b);
895 15286832 : PROPdestroy_nolock(b);
896 15287323 : STRMPdestroy(b);
897 15286785 : RTREEdestroy(b);
898 15287099 : if (b->theap) {
899 6028 : HEAPfree(b->theap, true);
900 : }
901 15287036 : if (b->tvheap) {
902 494 : HEAPfree(b->tvheap, true);
903 : }
904 15287036 : b->batCopiedtodisk = false;
905 15287036 : }
906 :
907 : /*
908 : * BAT specific printing
909 : */
910 :
911 : gdk_return
912 687 : BATprintcolumns(stream *s, int argc, BAT *argv[])
913 : {
914 687 : int i;
915 687 : BUN n, cnt;
916 687 : struct colinfo {
917 : ssize_t (*s) (str *, size_t *, const void *, bool);
918 : BATiter i;
919 : } *colinfo;
920 687 : char *buf;
921 687 : size_t buflen = 0;
922 687 : ssize_t len;
923 687 : gdk_return rc = GDK_SUCCEED;
924 :
925 : /* error checking */
926 2128 : for (i = 0; i < argc; i++) {
927 1443 : if (argv[i] == NULL) {
928 0 : GDKerror("Columns missing\n");
929 0 : return GDK_FAIL;
930 : }
931 1443 : if (BATcount(argv[0]) != BATcount(argv[i])) {
932 2 : GDKerror("Columns must be the same size\n");
933 2 : return GDK_FAIL;
934 : }
935 : }
936 :
937 685 : if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) {
938 0 : GDKerror("Cannot allocate memory\n");
939 0 : return GDK_FAIL;
940 : }
941 :
942 2120 : for (i = 0; i < argc; i++) {
943 1435 : colinfo[i].i = bat_iterator(argv[i]);
944 1435 : colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr;
945 : }
946 :
947 685 : mnstr_write(s, "#--------------------------#\n", 1, 29);
948 685 : mnstr_write(s, "# ", 1, 2);
949 2805 : for (i = 0; i < argc; i++) {
950 1435 : if (i > 0)
951 750 : mnstr_write(s, "\t", 1, 1);
952 1435 : const char *nm = ATOMname(argv[i]->ttype);
953 1435 : mnstr_write(s, nm, 1, strlen(nm));
954 : }
955 685 : mnstr_write(s, " # type\n", 1, 9);
956 685 : mnstr_write(s, "#--------------------------#\n", 1, 29);
957 685 : buf = NULL;
958 :
959 3896 : for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) {
960 3211 : mnstr_write(s, "[ ", 1, 2);
961 13168 : for (i = 0; i < argc; i++) {
962 6746 : len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true);
963 6746 : if (len < 0) {
964 0 : rc = GDK_FAIL;
965 0 : goto bailout;
966 : }
967 6746 : if (i > 0)
968 3535 : mnstr_write(s, ",\t", 1, 2);
969 6746 : mnstr_write(s, buf, 1, len);
970 : }
971 3211 : mnstr_write(s, " ]\n", 1, 4);
972 : }
973 :
974 685 : bailout:
975 2120 : for (i = 0; i < argc; i++) {
976 1435 : bat_iterator_end(&colinfo[i].i);
977 : }
978 685 : GDKfree(buf);
979 685 : GDKfree(colinfo);
980 :
981 685 : return rc;
982 : }
983 :
984 : gdk_return
985 634 : BATprint(stream *fdout, BAT *b)
986 : {
987 634 : if (complex_cand(b)) {
988 0 : struct canditer ci;
989 0 : canditer_init(&ci, NULL, b);
990 0 : oid hseq = ci.hseq;
991 :
992 0 : mnstr_printf(fdout,
993 : "#--------------------------#\n"
994 : "# void\toid # type\n"
995 : "#--------------------------#\n");
996 0 : for (BUN i = 0; i < ci.ncand; i++) {
997 0 : oid o = canditer_next(&ci);
998 0 : mnstr_printf(fdout,
999 : "[ " OIDFMT "@0,\t" OIDFMT "@0 ]\n",
1000 : (oid) (i + hseq), o);
1001 : }
1002 0 : return GDK_SUCCEED;
1003 : }
1004 :
1005 634 : BAT *argv[2];
1006 634 : gdk_return ret = GDK_FAIL;
1007 :
1008 634 : argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
1009 634 : if (argv[0]) {
1010 634 : argv[1] = b;
1011 634 : ret = BATprintcolumns(fdout, 2, argv);
1012 634 : BBPunfix(argv[0]->batCacheid);
1013 : }
1014 : return ret;
1015 : }
|