Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : *
16 : * @* Database Storage Management
17 : * Contains routines for writing and reading GDK data to and from
18 : * disk. This section contains the primitives to manage the
19 : * disk-based images of the BATs. It relies on the existence of a UNIX
20 : * file system, including memory mapped files. Solaris and IRIX have
21 : * different implementations of madvise().
22 : *
23 : * The current version assumes that all BATs are stored on a single
24 : * disk partition. This simplistic assumption should be replaced in
25 : * the near future by a multi-volume version. The intention is to use
26 : * several BAT home locations. The files should be owned by the
27 : * database server. Otherwise, IO operations are likely to fail. This
28 : * is accomplished by setting the GID and UID upon system start.
29 : */
30 : #include "monetdb_config.h"
31 : #include "gdk.h"
32 : #include "gdk_private.h"
33 : #include "mutils.h"
34 : #ifdef HAVE_FCNTL_H
35 : #include <fcntl.h>
36 : #endif
37 :
38 : #ifndef O_CLOEXEC
39 : #ifdef _O_NOINHERIT
40 : #define O_CLOEXEC _O_NOINHERIT /* Windows */
41 : #else
42 : #define O_CLOEXEC 0
43 : #endif
44 : #endif
45 :
46 : /* GDKfilepath returns a newly allocated string containing the path
47 : * name of a database farm.
48 : * The arguments are the farmID or -1, the name of a subdirectory
49 : * within the farm (i.e., something like BATDIR or BAKDIR -- see
50 : * gdk.h) or NULL, the name of a BAT (i.e. the name that is stored in
51 : * BBP.dir -- something like 07/714), and finally the file extension.
52 : *
53 : * If farmid is >= 0, GDKfilepath returns the complete path to the
54 : * specified farm concatenated with the other arguments with
55 : * appropriate separators. If farmid is -1, it returns the
56 : * concatenation of its other arguments (in this case, the result
57 : * cannot be used to access a file directly -- the farm needs to be
58 : * prepended in some other place). */
59 : char *
60 23768860 : GDKfilepath(int farmid, const char *dir, const char *name, const char *ext)
61 : {
62 23768860 : const char *sep;
63 23768860 : size_t pathlen;
64 23768860 : char *path;
65 :
66 23768860 : if (GDKinmemory(farmid))
67 1 : return GDKstrdup(":memory:");
68 :
69 23767524 : assert(dir == NULL || *dir != DIR_SEP);
70 23767524 : assert(farmid == NOFARM ||
71 : (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname));
72 23767524 : if (!GDKembedded() && MT_path_absolute(name)) {
73 0 : GDKerror("name should not be absolute\n");
74 0 : return NULL;
75 : }
76 23768795 : if (dir && *dir == DIR_SEP)
77 0 : dir++;
78 23700570 : if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) {
79 : sep = "";
80 : } else {
81 23662864 : sep = DIR_SEP_STR;
82 : }
83 23768795 : pathlen = (farmid == NOFARM ? 0 : strlen(BBPfarms[farmid].dirname) + 1) +
84 23768795 : (dir ? strlen(dir) : 0) + strlen(sep) + strlen(name) +
85 23768795 : (ext ? strlen(ext) + 1 : 0) + 1;
86 23768795 : path = GDKmalloc(pathlen);
87 23767437 : if (path == NULL)
88 : return NULL;
89 23767437 : if (farmid == NOFARM) {
90 2509832 : strconcat_len(path, pathlen,
91 : dir ? dir : "", sep, name,
92 : ext ? "." : NULL, ext, NULL);
93 : } else {
94 35900903 : strconcat_len(path, pathlen,
95 : BBPfarms[farmid].dirname, DIR_SEP_STR,
96 : dir ? dir : "", sep, name,
97 : ext ? "." : NULL, ext, NULL);
98 : }
99 : return path;
100 : }
101 :
102 : /* make sure the parent directory of DIR exists (the argument itself
103 : * is usually a file that is to be created) */
104 : gdk_return
105 3078 : GDKcreatedir(const char *dir)
106 : {
107 3078 : char path[FILENAME_MAX];
108 3078 : char *r;
109 3078 : DIR *dirp;
110 :
111 3078 : TRC_DEBUG(IO_, "GDKcreatedir(%s)\n", dir);
112 3078 : assert(!GDKinmemory(0));
113 3078 : if (!GDKembedded() && !MT_path_absolute(dir)) {
114 0 : GDKerror("directory '%s' is not absolute\n", dir);
115 0 : return GDK_FAIL;
116 : }
117 3078 : if (strlen(dir) >= FILENAME_MAX) {
118 0 : GDKerror("directory name too long\n");
119 0 : return GDK_FAIL;
120 : }
121 3078 : strcpy(path, dir); /* we know this fits (see above) */
122 : /* skip initial /, if any */
123 40072 : for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) {
124 36994 : *r = 0;
125 36994 : if (
126 : #ifdef WIN32
127 : strlen(path) > 3 &&
128 : #endif
129 36993 : MT_mkdir(path) < 0) {
130 33691 : if (errno != EEXIST) {
131 0 : GDKsyserror("cannot create directory %s\n", path);
132 0 : return GDK_FAIL;
133 : }
134 33691 : if ((dirp = opendir(path)) == NULL) {
135 0 : GDKsyserror("%s cannot open directory\n", path);
136 0 : return GDK_FAIL;
137 : }
138 : /* it's a directory, we can continue */
139 33691 : closedir(dirp);
140 : }
141 36994 : *r++ = DIR_SEP;
142 : }
143 : return GDK_SUCCEED;
144 : }
145 :
146 : /* remove the directory DIRNAME with its file contents; does not
147 : * recurse into subdirectories */
148 : gdk_return
149 13538 : GDKremovedir(int farmid, const char *dirname)
150 : {
151 13538 : str dirnamestr;
152 13538 : DIR *dirp;
153 13538 : char *path;
154 13538 : struct dirent *dent;
155 13538 : int ret;
156 :
157 13538 : assert(!GDKinmemory(farmid));
158 13538 : if ((dirnamestr = GDKfilepath(farmid, NULL, dirname, NULL)) == NULL)
159 : return GDK_FAIL;
160 :
161 13538 : TRC_DEBUG(IO_, "GDKremovedir(%s)\n", dirnamestr);
162 :
163 13538 : if ((dirp = opendir(dirnamestr)) == NULL) {
164 653 : GDKfree(dirnamestr);
165 653 : return GDK_SUCCEED;
166 : }
167 274210 : while ((dent = readdir(dirp)) != NULL) {
168 261325 : if (dent->d_name[0] == '.' &&
169 25770 : (dent->d_name[1] == 0 ||
170 12885 : (dent->d_name[1] == '.' && dent->d_name[2] == 0))) {
171 : /* skip . and .. */
172 25770 : continue;
173 : }
174 235555 : path = GDKfilepath(farmid, dirname, dent->d_name, NULL);
175 235555 : if (path == NULL) {
176 : /* most likely the rmdir will now fail causing
177 : * an error return */
178 : break;
179 : }
180 235555 : ret = MT_remove(path);
181 235555 : if (ret == -1)
182 0 : GDKsyserror("remove(%s) failed\n", path);
183 235555 : TRC_DEBUG(IO_, "Remove %s = %d\n", path, ret);
184 235555 : GDKfree(path);
185 : }
186 12885 : closedir(dirp);
187 12885 : ret = MT_rmdir(dirnamestr);
188 12885 : if (ret != 0)
189 0 : GDKsyserror("rmdir(%s) failed\n", dirnamestr);
190 12885 : TRC_DEBUG(IO_, "rmdir %s = %d\n", dirnamestr, ret);
191 12885 : GDKfree(dirnamestr);
192 12885 : return ret ? GDK_FAIL : GDK_SUCCEED;
193 : }
194 :
195 : #define _FUNBUF 0x040000
196 : #define _FWRTHR 0x080000
197 : #define _FRDSEQ 0x100000
198 :
199 : /* open a file and return its file descriptor; the file is specified
200 : * using farmid, name and extension; if opening for writing, we create
201 : * the parent directory if necessary; if opening for reading, we don't
202 : * necessarily report an error if it fails, but we make sure errno is
203 : * set */
204 : int
205 437634 : GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension)
206 : {
207 437634 : char *path = NULL;
208 437634 : int fd, flags = O_CLOEXEC;
209 :
210 437634 : assert(!GDKinmemory(farmid));
211 437630 : if (nme == NULL || *nme == 0) {
212 0 : GDKerror("no name specified\n");
213 0 : errno = EFAULT;
214 0 : return -1;
215 : }
216 :
217 437630 : assert(farmid != NOFARM || extension == NULL);
218 437630 : if (farmid != NOFARM) {
219 435984 : path = GDKfilepath(farmid, BATDIR, nme, extension);
220 435988 : if (path == NULL) {
221 0 : errno = ENOMEM;
222 0 : return -1;
223 : }
224 : nme = path;
225 : }
226 :
227 437634 : if (*mode == 'm') { /* file open for mmap? */
228 0 : mode++;
229 : #ifdef _CYGNUS_H_
230 : } else {
231 : flags |= _FRDSEQ; /* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */
232 : #endif
233 : }
234 :
235 437634 : if (strchr(mode, 'w')) {
236 : flags |= O_WRONLY | O_CREAT;
237 69506 : } else if (!strchr(mode, '+')) {
238 : flags |= O_RDONLY;
239 : } else {
240 46416 : flags |= O_RDWR;
241 : }
242 : #ifdef WIN32
243 : flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT;
244 : #endif
245 437634 : fd = MT_open(nme, flags);
246 437639 : if (fd < 0 && *mode == 'w') {
247 : /* try to create the directory, in case that was the problem */
248 2853 : if (GDKcreatedir(nme) == GDK_SUCCEED) {
249 2853 : fd = MT_open(nme, flags);
250 2853 : if (fd < 0)
251 0 : GDKsyserror("cannot open file %s\n", nme);
252 : }
253 : }
254 437639 : int err = errno; /* save */
255 : /* don't generate error if we can't open a file for reading */
256 437639 : GDKfree(path);
257 437640 : errno = err; /* restore */
258 437640 : return fd;
259 : }
260 :
261 : /* like GDKfdlocate, except return a FILE pointer */
262 : FILE *
263 13661 : GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension)
264 : {
265 13661 : int fd;
266 13661 : FILE *f;
267 :
268 13661 : if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0)
269 : return NULL;
270 13436 : if (*mode == 'm')
271 0 : mode++;
272 13436 : if ((f = fdopen(fd, mode)) == NULL) {
273 0 : GDKsyserror("cannot fdopen file\n");
274 0 : close(fd);
275 0 : return NULL;
276 : }
277 : return f;
278 : }
279 :
280 : FILE *
281 12876 : GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode)
282 : {
283 12876 : char *path;
284 :
285 : /* if name is null, try to get one from dir (in case it was a path) */
286 12876 : path = GDKfilepath(farmid, dir, name, extension);
287 :
288 12876 : if (path != NULL) {
289 12876 : FILE *f;
290 12876 : TRC_DEBUG(IO_, "GDKfileopen(%s)\n", path);
291 12876 : f = MT_fopen(path, mode);
292 12876 : int err = errno;
293 12876 : GDKfree(path);
294 12876 : errno = err;
295 12876 : return f;
296 : }
297 : return NULL;
298 : }
299 :
300 : /* remove the file */
301 : gdk_return
302 12792 : GDKunlink(int farmid, const char *dir, const char *nme, const char *ext)
303 : {
304 12792 : if (nme && *nme) {
305 12792 : char *path;
306 :
307 12792 : path = GDKfilepath(farmid, dir, nme, ext);
308 12792 : if (path == NULL)
309 : return GDK_FAIL;
310 : /* if file already doesn't exist, we don't care */
311 12792 : if (MT_remove(path) != 0 && errno != ENOENT) {
312 0 : GDKsyserror("remove(%s)\n", path);
313 0 : GDKfree(path);
314 0 : return GDK_FAIL;
315 : }
316 12792 : GDKfree(path);
317 12792 : return GDK_SUCCEED;
318 : }
319 0 : GDKerror("no name specified");
320 0 : return GDK_FAIL;
321 : }
322 :
323 : /*
324 : * A move routine is overloaded to deal with extensions.
325 : */
326 : gdk_return
327 2770221 : GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2, bool report)
328 : {
329 2770221 : char *path1;
330 2770221 : char *path2;
331 2770221 : int ret;
332 2770221 : lng t0 = GDKusec();
333 :
334 2770221 : if (nme1 == NULL || *nme1 == 0) {
335 0 : GDKerror("no file specified\n");
336 0 : return GDK_FAIL;
337 : }
338 2770221 : path1 = GDKfilepath(farmid, dir1, nme1, ext1);
339 2770221 : path2 = GDKfilepath(farmid, dir2, nme2, ext2);
340 2770221 : if (path1 && path2) {
341 2770221 : ret = MT_rename(path1, path2);
342 2770221 : if (ret < 0 && report)
343 0 : GDKsyserror("cannot rename %s to %s\n", path1, path2);
344 :
345 2770221 : TRC_DEBUG(IO_, "Move %s %s = %d ("LLFMT" usec)\n", path1, path2, ret, GDKusec() - t0);
346 : } else {
347 : ret = -1;
348 : }
349 2770221 : GDKfree(path1);
350 2770221 : GDKfree(path2);
351 2770221 : return ret < 0 ? GDK_FAIL : GDK_SUCCEED;
352 : }
353 :
354 : gdk_return
355 3297 : GDKextendf(int fd, size_t size, const char *fn)
356 : {
357 3297 : struct stat stb;
358 3297 : int rt = 0;
359 3297 : lng t0 = GDKusec();
360 :
361 3296 : assert(!GDKinmemory(0));
362 : #ifdef __COVERITY__
363 : if (fd < 0) /* in real life, if fd < 0, fstat will fail */
364 : return GDK_FAIL;
365 : #endif
366 3297 : if (fstat(fd, &stb) < 0) {
367 : /* shouldn't happen */
368 0 : GDKsyserror("fstat failed unexpectedly\n");
369 0 : return GDK_FAIL;
370 : }
371 : /* if necessary, extend the underlying file */
372 3295 : if (stb.st_size < (off_t) size) {
373 : #ifdef HAVE_FALLOCATE
374 2273 : if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 &&
375 0 : errno == EOPNOTSUPP)
376 : /* on Linux, posix_fallocate uses a slow
377 : * method to allocate blocks if the underlying
378 : * file system doesn't support the operation,
379 : * so use fallocate instead and just resize
380 : * the file if it fails */
381 : #else
382 : #ifdef HAVE_POSIX_FALLOCATE
383 : /* posix_fallocate returns error number on failure,
384 : * not -1 :-( */
385 : if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL)
386 : /* on Solaris/OpenIndiana, this may mean that
387 : * the underlying file system doesn't support
388 : * the operation, so just resize the file */
389 : #endif
390 : #endif
391 : /* we get here when (posix_)fallocate fails
392 : * because it is not supported on the file
393 : * system, or if neither function exists */
394 0 : rt = ftruncate(fd, (off_t) size);
395 2274 : if (rt != 0) {
396 : /* extending failed, try to reduce file size
397 : * back to original */
398 0 : GDKsyserror("could not extend file\n");
399 0 : if (ftruncate(fd, stb.st_size))
400 0 : GDKsyserror("ftruncate to old size");
401 : }
402 : }
403 3296 : TRC_DEBUG(IO_, "GDKextend %s %zu -> %zu "LLFMT" usec%s\n",
404 : fn, (size_t) stb.st_size, size,
405 : GDKusec() - t0, rt != 0 ? " (failed)" : "");
406 : /* posix_fallocate returns != 0 on failure, fallocate and
407 : * ftruncate return -1 on failure, but all three return 0 on
408 : * success */
409 3291 : return rt != 0 ? GDK_FAIL : GDK_SUCCEED;
410 : }
411 :
412 : gdk_return
413 2703 : GDKextend(const char *fn, size_t size)
414 : {
415 2703 : int fd, flags = O_RDWR;
416 2703 : gdk_return rt = GDK_FAIL;
417 :
418 2703 : assert(!GDKinmemory(0));
419 : #ifdef O_BINARY
420 : /* On Windows, open() fails if the file is bigger than 2^32
421 : * bytes without O_BINARY. */
422 : flags |= O_BINARY;
423 : #endif
424 2703 : if ((fd = MT_open(fn, flags | O_CLOEXEC)) >= 0) {
425 2703 : rt = GDKextendf(fd, size, fn);
426 2701 : close(fd);
427 : } else {
428 0 : GDKsyserror("cannot open file %s\n", fn);
429 : }
430 2703 : return rt;
431 : }
432 :
433 : /*
434 : * @+ Save and load.
435 : * The BAT is saved on disk in several files. The extension DESC
436 : * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the
437 : * other heaps. The storage mechanism off a file can be memory mapped
438 : * (STORE_MMAP) or malloced (STORE_MEM).
439 : *
440 : * These modes indicates the disk-layout and the intended mapping.
441 : * The primary concern here is to handle STORE_MMAP and STORE_MEM.
442 : */
443 : gdk_return
444 354408 : GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync)
445 : {
446 354408 : int err = 0;
447 :
448 354408 : TRC_DEBUG(IO_, "GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync);
449 :
450 354408 : assert(!GDKinmemory(farmid));
451 354408 : if (mode == STORE_MMAP) {
452 1067 : if (dosync && size && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK))
453 179 : err = MT_msync(buf, size);
454 179 : if (err)
455 0 : GDKerror("error on: name=%s, ext=%s, mode=%d\n",
456 : nme, ext ? ext : "", (int) mode);
457 1067 : TRC_DEBUG(IO_, "MT_msync(buf %p, size %zu) = %d\n",
458 : buf, size, err);
459 : } else {
460 353341 : int fd;
461 :
462 353341 : if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) {
463 : /* write() on 64-bits Redhat for IA64 returns
464 : * 32-bits signed result (= OS BUG)! write()
465 : * on Windows only takes unsigned int as
466 : * size */
467 706682 : while (size > 0) {
468 : /* circumvent problems by writing huge
469 : * buffers in chunks <= 1GiB */
470 353341 : ssize_t ret;
471 :
472 706682 : ret = write(fd, buf,
473 : (unsigned) MIN(1 << 30, size));
474 353341 : if (ret < 0) {
475 0 : err = -1;
476 0 : GDKsyserror("GDKsave: error %zd"
477 : " on: name=%s, ext=%s, "
478 : "mode=%d\n", ret, nme,
479 : ext ? ext : "", (int) mode);
480 0 : break;
481 : }
482 353341 : size -= ret;
483 353341 : buf = (void *) ((char *) buf + ret);
484 1060023 : TRC_DEBUG(IO_, "Write(fd %d, buf %p"
485 : ", size %u) = %zd\n",
486 : fd, buf,
487 : (unsigned) MIN(1 << 30, size),
488 : ret);
489 : }
490 353341 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
491 : #if defined(NATIVE_WIN32)
492 : && _commit(fd) < 0
493 : #elif defined(HAVE_FDATASYNC)
494 301 : && fdatasync(fd) < 0
495 : #elif defined(HAVE_FSYNC)
496 : && fsync(fd) < 0
497 : #endif
498 : ) {
499 0 : GDKsyserror("GDKsave: error on: name=%s, "
500 : "ext=%s, mode=%d\n", nme,
501 : ext ? ext : "", (int) mode);
502 0 : err = -1;
503 : }
504 353341 : err |= close(fd);
505 353341 : if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) {
506 : /* do not tolerate corrupt heap images
507 : * (BBPrecover on restart will kill
508 : * them) */
509 0 : GDKerror("could not remove: name=%s, "
510 : "ext=%s, mode %d\n", nme,
511 : ext ? ext : "", (int) mode);
512 0 : return GDK_FAIL;
513 : }
514 : } else {
515 0 : err = -1;
516 0 : GDKerror("failed name=%s, ext=%s, mode %d\n",
517 : nme, ext ? ext : "", (int) mode);
518 : }
519 : }
520 354408 : return err ? GDK_FAIL : GDK_SUCCEED;
521 : }
522 :
523 : /*
524 : * Space for the load is directly allocated and the heaps are mapped.
525 : * Further initialization of the atom heaps require a separate action
526 : * defined in their implementation.
527 : *
528 : * size -- how much to read
529 : * *maxsize -- (in/out) how much to allocate / how much was allocated
530 : */
531 : char *
532 24126 : GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode)
533 : {
534 24126 : char *ret = NULL;
535 :
536 24126 : assert(!GDKinmemory(farmid));
537 24125 : assert(size <= *maxsize);
538 24125 : assert(farmid != NOFARM || ext == NULL);
539 24125 : TRC_DEBUG(IO_, "GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode);
540 :
541 24126 : if (mode == STORE_MEM) {
542 21423 : int fd = GDKfdlocate(farmid, nme, "rb", ext);
543 :
544 21423 : if (fd >= 0) {
545 21423 : char *dst = ret = GDKmalloc(*maxsize);
546 21423 : ssize_t n_expected, n = 0;
547 :
548 21423 : if (ret) {
549 : /* read in chunks, some OSs do not
550 : * give you all at once and Windows
551 : * only accepts int */
552 42846 : for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) {
553 21423 : n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected));
554 21423 : if (n < 0)
555 0 : GDKsyserror("GDKload: cannot read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "", size, n_expected);
556 : #ifndef __COVERITY__
557 : /* Coverity doesn't seem to
558 : * recognize that we're just
559 : * printing the value of ptr,
560 : * not its contents */
561 21423 : TRC_DEBUG(IO_, "read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n);
562 : #endif
563 :
564 21423 : if (n <= 0)
565 : break;
566 21423 : dst += n;
567 : }
568 21423 : if (n_expected > 0) {
569 : /* we couldn't read all, error
570 : * already generated */
571 0 : GDKfree(ret);
572 0 : if (n >= 0) /* don't report error twice */
573 0 : GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected);
574 : ret = NULL;
575 : }
576 : #ifndef NDEBUG
577 : /* just to make valgrind happy, we
578 : * initialize the whole thing */
579 21423 : if (ret && *maxsize > size)
580 14893 : memset(ret + size, 0, *maxsize - size);
581 : #endif
582 : }
583 21423 : close(fd);
584 : } else {
585 0 : GDKsyserror("cannot open: name=%s, ext=%s\n", nme, ext ? ext : "");
586 : }
587 : } else {
588 2703 : char *path = NULL;
589 :
590 : /* round up to multiple of GDK_mmap_pagesize with a
591 : * minimum of one */
592 2703 : size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
593 2703 : if (size == 0)
594 0 : size = GDK_mmap_pagesize;
595 2703 : if (farmid != NOFARM) {
596 1053 : path = GDKfilepath(farmid, BATDIR, nme, ext);
597 1053 : nme = path;
598 : }
599 2703 : if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) {
600 2703 : int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL;
601 :
602 2703 : if (mode == STORE_PRIV)
603 : mod |= MMAP_COPY;
604 : else
605 2703 : mod |= MMAP_SYNC;
606 2703 : ret = GDKmmap(nme, mod, size);
607 2703 : if (ret != NULL) {
608 : /* success: update allocated size */
609 2703 : *maxsize = size;
610 : }
611 2703 : TRC_DEBUG(IO_, "mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret);
612 : }
613 2703 : GDKfree(path);
614 : }
615 24126 : return ret;
616 : }
617 :
618 : /*
619 : * @+ BAT disk storage
620 : *
621 : * Between sessions the BATs comprising the database are saved on
622 : * disk. To simplify code, we assume a UNIX directory called its
623 : * physical @%home@ where they are to be located. The subdirectories
624 : * BAT and PRG contain what its name says.
625 : *
626 : * A BAT created by @%COLnew@ is considered temporary until one calls
627 : * the routine @%BATsave@. This routine reserves disk space and checks
628 : * for name clashes.
629 : *
630 : * Saving and restoring BATs is left to the upper layers. The library
631 : * merely copies the data into place. Failure to read or write the
632 : * BAT results in a NULL, otherwise it returns the BAT pointer.
633 : */
634 : static BAT *
635 19859 : DESCload(int i)
636 : {
637 19859 : const char *s, *nme = BBP_physical(i);
638 19859 : BAT *b = NULL;
639 19859 : int tt;
640 :
641 19859 : TRC_DEBUG(IO_, "DESCload: %s\n", nme ? nme : "<noname>");
642 :
643 19859 : b = BBP_desc(i);
644 :
645 19859 : if (b->batCacheid == 0) {
646 0 : GDKerror("no descriptor for BAT %d\n", i);
647 0 : return NULL;
648 : }
649 :
650 19859 : MT_lock_set(&b->theaplock);
651 19859 : tt = b->ttype;
652 19859 : if (tt < 0) {
653 0 : if ((tt = ATOMindex(s = ATOMunknown_name(tt))) < 0) {
654 0 : MT_lock_unset(&b->theaplock);
655 0 : GDKerror("atom '%s' unknown, in BAT '%s'.\n", s, nme);
656 0 : return NULL;
657 : }
658 0 : b->ttype = tt;
659 : }
660 :
661 : /* reconstruct mode from BBP status (BATmode doesn't flush
662 : * descriptor, so loaded mode may be stale) */
663 19859 : b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0;
664 19859 : b->batCopiedtodisk = true;
665 19859 : MT_lock_unset(&b->theaplock);
666 19859 : return b;
667 : }
668 :
669 : gdk_return
670 1098588 : BATsave_iter(BAT *b, BATiter *bi, BUN size)
671 : {
672 1098588 : gdk_return err = GDK_SUCCEED;
673 1098588 : bool dosync;
674 1098588 : bool locked = false;
675 :
676 1098588 : BATcheck(b, GDK_FAIL);
677 :
678 1098588 : if (MT_rwlock_rdtry(&b->thashlock))
679 1098576 : locked = true;
680 :
681 1098588 : dosync = (BBP_status(b->batCacheid) & BBPPERSISTENT) != 0;
682 1098588 : assert(!GDKinmemory(bi->h->farmid));
683 : /* views cannot be saved, but make an exception for
684 : * force-remapped views */
685 1098588 : if (isVIEW(b)) {
686 0 : if (locked)
687 0 : MT_rwlock_rdunlock(&b->thashlock);
688 0 : GDKerror("%s is a view on %s; cannot be saved\n", BATgetId(b), BBP_logical(VIEWtparent(b)));
689 0 : return GDK_FAIL;
690 : }
691 1098588 : if (!BATdirtybi(*bi)) {
692 810796 : if (locked)
693 810795 : MT_rwlock_rdunlock(&b->thashlock);
694 810796 : return GDK_SUCCEED;
695 : }
696 :
697 : /* start saving data */
698 287792 : if (bi->type != TYPE_void && bi->base == NULL) {
699 0 : assert(BBP_status(b->batCacheid) & BBPSWAPPED);
700 0 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
701 0 : int fd = GDKfdlocate(bi->h->farmid, bi->h->filename, "rb+", NULL);
702 0 : if (fd < 0) {
703 0 : GDKsyserror("cannot open file %s for sync\n",
704 : bi->h->filename);
705 0 : err = GDK_FAIL;
706 : } else {
707 0 : if (
708 : #if defined(NATIVE_WIN32)
709 : _commit(fd) < 0
710 : #elif defined(HAVE_FDATASYNC)
711 0 : fdatasync(fd) < 0
712 : #elif defined(HAVE_FSYNC)
713 : fsync(fd) < 0
714 : #endif
715 : )
716 0 : GDKsyserror("sync failed for %s\n",
717 : bi->h->filename);
718 0 : close(fd);
719 : }
720 0 : if (bi->vh) {
721 0 : fd = GDKfdlocate(bi->vh->farmid, bi->vh->filename, "rb+", NULL);
722 0 : if (fd < 0) {
723 0 : GDKsyserror("cannot open file %s for sync\n",
724 : bi->vh->filename);
725 0 : err = GDK_FAIL;
726 : } else {
727 0 : if (
728 : #if defined(NATIVE_WIN32)
729 : _commit(fd) < 0
730 : #elif defined(HAVE_FDATASYNC)
731 0 : fdatasync(fd) < 0
732 : #elif defined(HAVE_FSYNC)
733 : fsync(fd) < 0
734 : #endif
735 : )
736 0 : GDKsyserror("sync failed for %s\n", bi->vh->filename);
737 0 : close(fd);
738 : }
739 : }
740 : }
741 : } else {
742 287792 : const char *nme = BBP_physical(b->batCacheid);
743 287792 : if ((!bi->copiedtodisk || bi->hdirty)
744 287760 : && (err == GDK_SUCCEED && bi->type)) {
745 287760 : const char *tail = strchr(bi->h->filename, '.') + 1;
746 287760 : err = HEAPsave(bi->h, nme, tail, dosync, bi->hfree, &b->theaplock);
747 : }
748 287792 : if (bi->vh
749 57827 : && (!bi->copiedtodisk || bi->vhdirty)
750 50751 : && ATOMvarsized(bi->type)
751 50751 : && err == GDK_SUCCEED)
752 50751 : err = HEAPsave(bi->vh, nme, "theap", dosync, bi->vhfree, &b->theaplock);
753 : }
754 :
755 287792 : if (err == GDK_SUCCEED) {
756 287792 : MT_lock_set(&b->theaplock);
757 287792 : if (b->theap != bi->h) {
758 0 : assert(b->theap->dirty);
759 0 : b->theap->wasempty = bi->h->wasempty;
760 0 : b->theap->hasfile |= bi->h->hasfile;
761 : }
762 287792 : if (b->tvheap && b->tvheap != bi->vh) {
763 0 : assert(b->tvheap->dirty);
764 0 : b->tvheap->wasempty = bi->vh->wasempty;
765 0 : b->tvheap->hasfile |= bi->vh->hasfile;
766 : }
767 287792 : if (size != b->batCount) {
768 : /* if the size doesn't match, the BAT must be dirty */
769 38361 : b->theap->dirty = true;
770 38361 : if (b->tvheap)
771 9875 : b->tvheap->dirty = true;
772 : }
773 : /* there is something on disk now */
774 287792 : b->batCopiedtodisk = true;
775 287792 : MT_lock_unset(&b->theaplock);
776 287792 : if (locked && b->thash && b->thash != (Hash *) 1)
777 14400 : BAThashsave(b, dosync);
778 : }
779 287781 : if (locked)
780 287781 : MT_rwlock_rdunlock(&b->thashlock);
781 : return err;
782 : }
783 :
784 : gdk_return
785 8692 : BATsave(BAT *b)
786 : {
787 8692 : gdk_return rc;
788 :
789 8692 : BATiter bi = bat_iterator(b);
790 8692 : rc = BATsave_iter(b, &bi, bi.count);
791 8692 : bat_iterator_end(&bi);
792 8692 : return rc;
793 : }
794 :
795 : /*
796 : * TODO: move to gdk_bbp.c
797 : */
798 : BAT *
799 19859 : BATload_intern(bat bid, bool lock)
800 : {
801 19859 : const char *nme;
802 19859 : BAT *b;
803 :
804 19859 : assert(!GDKinmemory(0));
805 19859 : assert(bid > 0);
806 :
807 19859 : nme = BBP_physical(bid);
808 19859 : b = DESCload(bid);
809 :
810 19859 : if (b == NULL) {
811 : return NULL;
812 : }
813 19859 : assert(!GDKinmemory(b->theap->farmid));
814 :
815 : /* LOAD bun heap */
816 19859 : if (b->ttype != TYPE_void) {
817 19859 : b->theap->storage = b->theap->newstorage = STORE_INVALID;
818 19859 : if ((b->batCount == 0 ?
819 2613 : HEAPalloc(b->theap, b->batCapacity, b->twidth) :
820 22472 : HEAPload(b->theap, b->theap->filename, NULL, b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
821 0 : HEAPfree(b->theap, false);
822 0 : return NULL;
823 : }
824 19859 : if (ATOMstorage(b->ttype) == TYPE_msk) {
825 4336 : b->batCapacity = (BUN) (b->theap->size * 8);
826 : } else {
827 15523 : assert(b->theap->size >> b->tshift <= BUN_MAX);
828 15523 : b->batCapacity = (BUN) (b->theap->size >> b->tshift);
829 : }
830 : } else {
831 0 : b->theap->base = NULL;
832 : }
833 :
834 : /* LOAD tail heap */
835 19859 : if (ATOMvarsized(b->ttype)) {
836 4620 : b->tvheap->storage = b->tvheap->newstorage = STORE_INVALID;
837 4620 : if ((b->tvheap->free == 0 ?
838 275 : ATOMheap(b->ttype, b->tvheap, b->batCapacity) :
839 4895 : HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
840 0 : HEAPfree(b->theap, false);
841 0 : HEAPfree(b->tvheap, false);
842 0 : return NULL;
843 : }
844 4620 : if (ATOMstorage(b->ttype) == TYPE_str) {
845 4485 : strCleanHash(b->tvheap, false); /* ensure consistency */
846 : } else {
847 135 : HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0),
848 : BATcount(b));
849 : }
850 : }
851 :
852 : /* initialize descriptor */
853 19859 : b->theap->parentid = b->batCacheid;
854 :
855 : /* load succeeded; register it in BBP */
856 19859 : if (BBPcacheit(b, lock) != GDK_SUCCEED) {
857 0 : HEAPfree(b->theap, false);
858 0 : if (b->tvheap)
859 0 : HEAPfree(b->tvheap, false);
860 0 : return NULL;
861 : }
862 : return b;
863 : }
864 :
865 : /*
866 : * @- BATdelete
867 : * The new behavior is to let the routine produce warnings but always
868 : * succeed. rationale: on a delete, we must get rid of *all* the
869 : * files. We do not have to care about preserving them or be too much
870 : * concerned if a file that had to be deleted was not found (end
871 : * result is still that it does not exist). The past behavior to
872 : * delete some files and then fail was erroneous. The BAT would
873 : * continue to exist with an incorrect disk status, causing havoc
874 : * later on.
875 : *
876 : * NT forces us to close all files before deleting them; in case of
877 : * memory mapped files this means that we have to unload the BATs
878 : * before deleting. This is enforced now.
879 : */
880 : void
881 19104015 : BATdelete(BAT *b)
882 : {
883 19104015 : HASHdestroy(b);
884 19102444 : IMPSdestroy(b);
885 19102346 : OIDXdestroy(b);
886 19103264 : PROPdestroy_nolock(b);
887 19103921 : STRMPdestroy(b);
888 19103590 : RTREEdestroy(b);
889 19103514 : if (b->theap) {
890 13363 : HEAPfree(b->theap, true);
891 : }
892 19103490 : if (b->tvheap) {
893 3393 : HEAPfree(b->tvheap, true);
894 : }
895 19103490 : b->batCopiedtodisk = false;
896 19103490 : }
897 :
898 : /*
899 : * BAT specific printing
900 : */
901 :
902 : gdk_return
903 687 : BATprintcolumns(stream *s, int argc, BAT *argv[])
904 : {
905 687 : int i;
906 687 : BUN n, cnt;
907 687 : struct colinfo {
908 : ssize_t (*s) (str *, size_t *, const void *, bool);
909 : BATiter i;
910 : } *colinfo;
911 687 : char *buf;
912 687 : size_t buflen = 0;
913 687 : ssize_t len;
914 687 : gdk_return rc = GDK_SUCCEED;
915 :
916 : /* error checking */
917 2128 : for (i = 0; i < argc; i++) {
918 1443 : if (argv[i] == NULL) {
919 0 : GDKerror("Columns missing\n");
920 0 : return GDK_FAIL;
921 : }
922 1443 : if (BATcount(argv[0]) != BATcount(argv[i])) {
923 2 : GDKerror("Columns must be the same size\n");
924 2 : return GDK_FAIL;
925 : }
926 : }
927 :
928 685 : if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) {
929 0 : GDKerror("Cannot allocate memory\n");
930 0 : return GDK_FAIL;
931 : }
932 :
933 2120 : for (i = 0; i < argc; i++) {
934 1435 : colinfo[i].i = bat_iterator(argv[i]);
935 1435 : colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr;
936 : }
937 :
938 685 : mnstr_write(s, "#--------------------------#\n", 1, 29);
939 685 : mnstr_write(s, "# ", 1, 2);
940 2805 : for (i = 0; i < argc; i++) {
941 1435 : if (i > 0)
942 750 : mnstr_write(s, "\t", 1, 1);
943 1435 : const char *nm = ATOMname(argv[i]->ttype);
944 1435 : mnstr_write(s, nm, 1, strlen(nm));
945 : }
946 685 : mnstr_write(s, " # type\n", 1, 9);
947 685 : mnstr_write(s, "#--------------------------#\n", 1, 29);
948 685 : buf = NULL;
949 :
950 3896 : for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) {
951 3211 : mnstr_write(s, "[ ", 1, 2);
952 13168 : for (i = 0; i < argc; i++) {
953 6746 : len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true);
954 6746 : if (len < 0) {
955 0 : rc = GDK_FAIL;
956 0 : goto bailout;
957 : }
958 6746 : if (i > 0)
959 3535 : mnstr_write(s, ",\t", 1, 2);
960 6746 : mnstr_write(s, buf, 1, len);
961 : }
962 3211 : mnstr_write(s, " ]\n", 1, 4);
963 : }
964 :
965 685 : bailout:
966 2120 : for (i = 0; i < argc; i++) {
967 1435 : bat_iterator_end(&colinfo[i].i);
968 : }
969 685 : GDKfree(buf);
970 685 : GDKfree(colinfo);
971 :
972 685 : return rc;
973 : }
974 :
975 : gdk_return
976 634 : BATprint(stream *fdout, BAT *b)
977 : {
978 634 : if (complex_cand(b)) {
979 0 : struct canditer ci;
980 0 : canditer_init(&ci, NULL, b);
981 0 : oid hseq = ci.hseq;
982 :
983 0 : mnstr_printf(fdout,
984 : "#--------------------------#\n"
985 : "# void\toid # type\n"
986 : "#--------------------------#\n");
987 0 : for (BUN i = 0; i < ci.ncand; i++) {
988 0 : oid o = canditer_next(&ci);
989 0 : mnstr_printf(fdout,
990 : "[ " OIDFMT "@0,\t" OIDFMT "@0 ]\n",
991 : (oid) (i + hseq), o);
992 : }
993 0 : return GDK_SUCCEED;
994 : }
995 :
996 634 : BAT *argv[2];
997 634 : gdk_return ret = GDK_FAIL;
998 :
999 634 : argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
1000 634 : if (argv[0]) {
1001 634 : argv[1] = b;
1002 634 : ret = BATprintcolumns(fdout, 2, argv);
1003 634 : BBPunfix(argv[0]->batCacheid);
1004 : }
1005 : return ret;
1006 : }
|