Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : *
16 : * @* Database Storage Management
17 : * Contains routines for writing and reading GDK data to and from
18 : * disk. This section contains the primitives to manage the
19 : * disk-based images of the BATs. It relies on the existence of a UNIX
20 : * file system, including memory mapped files. Solaris and IRIX have
21 : * different implementations of madvise().
22 : *
23 : * The current version assumes that all BATs are stored on a single
24 : * disk partition. This simplistic assumption should be replaced in
25 : * the near future by a multi-volume version. The intention is to use
26 : * several BAT home locations. The files should be owned by the
27 : * database server. Otherwise, IO operations are likely to fail. This
28 : * is accomplished by setting the GID and UID upon system start.
29 : */
30 : #include "monetdb_config.h"
31 : #include "gdk.h"
32 : #include "gdk_private.h"
33 : #include "mutils.h"
34 : #ifdef HAVE_FCNTL_H
35 : #include <fcntl.h>
36 : #endif
37 :
38 : #ifndef O_CLOEXEC
39 : #ifdef _O_NOINHERIT
40 : #define O_CLOEXEC _O_NOINHERIT /* Windows */
41 : #else
42 : #define O_CLOEXEC 0
43 : #endif
44 : #endif
45 :
46 : /* GDKfilepath writes the path name of a database file into path and
47 : * returns a pointer to it. The arguments are the buffer into which the
48 : * path is written, the size of that buffer, the farmID or -1, the name
49 : * of a subdirectory within the farm (i.e., something like BATDIR or
50 : * BAKDIR -- see gdk.h) or NULL, the name of a BAT (i.e. the name that
51 : * is stored in BBP.dir -- something like 07/714), and finally the file
52 : * extension.
53 : *
54 : * If farmid is >= 0, GDKfilepath returns the complete path to the
55 : * specified farm concatenated with the other arguments with appropriate
56 : * separators. If farmid is NOFARM (i.e. -1) , it returns the
57 : * concatenation of its other arguments (in this case, the result cannot
58 : * be used to access a file directly -- the farm needs to be prepended
59 : * in some other place). */
60 : gdk_return
61 27459989 : GDKfilepath(char *path, size_t pathlen, int farmid, const char *dir, const char *name, const char *ext)
62 : {
63 27459989 : const char *sep;
64 :
65 27459989 : if (GDKinmemory(farmid)) {
66 1 : if (strcpy_len(path, ":memory:", pathlen) >= pathlen)
67 : return GDK_FAIL;
68 : return GDK_SUCCEED;
69 : }
70 :
71 27458144 : assert(dir == NULL || *dir != DIR_SEP);
72 27458144 : assert(farmid == NOFARM ||
73 : (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname));
74 27458144 : if (!GDKembedded() && MT_path_absolute(name)) {
75 0 : GDKerror("name should not be absolute\n");
76 0 : return GDK_FAIL;
77 : }
78 27459064 : if (dir && *dir == DIR_SEP)
79 0 : dir++;
80 27326602 : if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) {
81 : sep = "";
82 : } else {
83 27286811 : sep = DIR_SEP_STR;
84 : }
85 27459064 : size_t len;
86 27459064 : if (farmid == NOFARM) {
87 2599176 : len = strconcat_len(path, pathlen,
88 : dir ? dir : "", sep, name,
89 : ext ? "." : NULL, ext, NULL);
90 : } else {
91 42963579 : len = strconcat_len(path, pathlen,
92 : BBPfarms[farmid].dirname, DIR_SEP_STR,
93 : dir ? dir : "", sep, name,
94 : ext ? "." : NULL, ext, NULL);
95 : }
96 27493320 : if (len >= pathlen) {
97 0 : GDKerror("path name too long\n");
98 0 : return GDK_FAIL;
99 : }
100 : return GDK_SUCCEED;
101 : }
102 :
103 : /* make sure the parent directory of DIR exists (the argument itself
104 : * is usually a file that is to be created) */
105 : gdk_return
106 3725 : GDKcreatedir(const char *dir)
107 : {
108 3725 : char path[FILENAME_MAX];
109 3725 : char *r;
110 3725 : DIR *dirp;
111 :
112 3725 : TRC_DEBUG(IO, "GDKcreatedir(%s)\n", dir);
113 3725 : assert(!GDKinmemory(0));
114 3724 : if (!GDKembedded() && !MT_path_absolute(dir)) {
115 0 : GDKerror("directory '%s' is not absolute\n", dir);
116 0 : return GDK_FAIL;
117 : }
118 3724 : if (strlen(dir) >= FILENAME_MAX) {
119 0 : GDKerror("directory name too long\n");
120 0 : return GDK_FAIL;
121 : }
122 3724 : strcpy(path, dir); /* we know this fits (see above) */
123 : /* skip initial /, if any */
124 49026 : for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) {
125 45297 : *r = 0;
126 45297 : if (
127 : #ifdef WIN32
128 : strlen(path) > 3 &&
129 : #endif
130 45316 : MT_mkdir(path) < 0) {
131 41379 : if (errno != EEXIST) {
132 0 : GDKsyserror("cannot create directory %s\n", path);
133 0 : return GDK_FAIL;
134 : }
135 41379 : if ((dirp = opendir(path)) == NULL) {
136 0 : GDKsyserror("%s cannot open directory\n", path);
137 0 : return GDK_FAIL;
138 : }
139 : /* it's a directory, we can continue */
140 41326 : closedir(dirp);
141 : }
142 45302 : *r++ = DIR_SEP;
143 : }
144 : return GDK_SUCCEED;
145 : }
146 :
147 : /* remove the directory DIRNAME with its file contents; does not
148 : * recurse into subdirectories */
149 : gdk_return
150 13395 : GDKremovedir(int farmid, const char *dirname)
151 : {
152 13395 : char dirnamestr[MAXPATH];
153 13395 : DIR *dirp;
154 13395 : char path[MAXPATH];
155 13395 : struct dirent *dent;
156 13395 : int ret;
157 :
158 13395 : assert(!GDKinmemory(farmid));
159 13395 : if (GDKfilepath(dirnamestr, sizeof(dirnamestr), farmid, NULL, dirname, NULL) != GDK_SUCCEED)
160 : return GDK_FAIL;
161 :
162 13395 : TRC_DEBUG(IO, "GDKremovedir(%s)\n", dirnamestr);
163 :
164 13395 : if ((dirp = opendir(dirnamestr)) == NULL) {
165 : return GDK_SUCCEED;
166 : }
167 286668 : while ((dent = readdir(dirp)) != NULL) {
168 273986 : if (dent->d_name[0] == '.' &&
169 25364 : (dent->d_name[1] == 0 ||
170 12682 : (dent->d_name[1] == '.' && dent->d_name[2] == 0))) {
171 : /* skip . and .. */
172 25364 : continue;
173 : }
174 248622 : if (GDKfilepath(path, sizeof(path), farmid, dirname, dent->d_name, NULL) != GDK_SUCCEED) {
175 : /* most likely the rmdir will now fail causing
176 : * an error return */
177 : break;
178 : }
179 248622 : ret = MT_remove(path);
180 248622 : if (ret == -1)
181 0 : GDKsyserror("remove(%s) failed\n", path);
182 535290 : TRC_DEBUG(IO, "Remove %s = %d\n", path, ret);
183 : }
184 12682 : closedir(dirp);
185 12682 : ret = MT_rmdir(dirnamestr);
186 12682 : if (ret != 0)
187 0 : GDKsyserror("rmdir(%s) failed\n", dirnamestr);
188 12682 : TRC_DEBUG(IO, "rmdir %s = %d\n", dirnamestr, ret);
189 12682 : return ret ? GDK_FAIL : GDK_SUCCEED;
190 : }
191 :
192 : #define _FUNBUF 0x040000
193 : #define _FWRTHR 0x080000
194 : #define _FRDSEQ 0x100000
195 :
196 : /* open a file and return its file descriptor; the file is specified
197 : * using farmid, name and extension; if opening for writing, we create
198 : * the parent directory if necessary; if opening for reading, we don't
199 : * necessarily report an error if it fails, but we make sure errno is
200 : * set */
201 : int
202 456882 : GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension)
203 : {
204 456882 : char path[MAXPATH];
205 456882 : int fd, flags = O_CLOEXEC;
206 :
207 456882 : assert(!GDKinmemory(farmid));
208 456850 : if (nme == NULL || *nme == 0) {
209 0 : GDKerror("no name specified\n");
210 0 : errno = EFAULT;
211 0 : return -1;
212 : }
213 :
214 456850 : assert(farmid != NOFARM || extension == NULL);
215 456850 : if (farmid != NOFARM) {
216 455290 : if (GDKfilepath(path, sizeof(path), farmid, BATDIR, nme, extension) != GDK_SUCCEED) {
217 0 : errno = ENOMEM;
218 0 : return -1;
219 : }
220 : nme = path;
221 : }
222 :
223 456869 : if (*mode == 'm') { /* file open for mmap? */
224 0 : mode++;
225 : #ifdef _CYGNUS_H_
226 : } else {
227 : flags |= _FRDSEQ; /* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */
228 : #endif
229 : }
230 :
231 456869 : if (strchr(mode, 'w')) {
232 : flags |= O_WRONLY | O_CREAT;
233 78024 : } else if (!strchr(mode, '+')) {
234 : flags |= O_RDONLY;
235 : } else {
236 50876 : flags |= O_RDWR;
237 : }
238 : #ifdef WIN32
239 : flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT;
240 : #endif
241 456869 : fd = MT_open(nme, flags);
242 456925 : if (fd < 0 && *mode == 'w') {
243 : /* try to create the directory, in case that was the problem */
244 3499 : if (GDKcreatedir(nme) == GDK_SUCCEED) {
245 3499 : fd = MT_open(nme, flags);
246 3500 : if (fd < 0)
247 0 : GDKsyserror("cannot open file %s\n", nme);
248 : }
249 : }
250 456926 : int err = errno; /* save */
251 : /* don't generate error if we can't open a file for reading */
252 456926 : errno = err; /* restore */
253 456926 : return fd;
254 : }
255 :
256 : /* like GDKfdlocate, except return a FILE pointer */
257 : FILE *
258 13496 : GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension)
259 : {
260 13496 : int fd;
261 13496 : FILE *f;
262 :
263 13496 : if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0)
264 : return NULL;
265 13267 : if (*mode == 'm')
266 0 : mode++;
267 13267 : if ((f = fdopen(fd, mode)) == NULL) {
268 0 : GDKsyserror("cannot fdopen file\n");
269 0 : close(fd);
270 0 : return NULL;
271 : }
272 : return f;
273 : }
274 :
275 : FILE *
276 12673 : GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode)
277 : {
278 12673 : char path[MAXPATH];
279 :
280 : /* if name is null, try to get one from dir (in case it was a path) */
281 12673 : if (GDKfilepath(path, sizeof(path), farmid, dir, name, extension) == GDK_SUCCEED) {
282 12673 : FILE *f;
283 12673 : TRC_DEBUG(IO, "GDKfileopen(%s)\n", path);
284 12673 : f = MT_fopen(path, mode);
285 12673 : int err = errno;
286 12673 : errno = err;
287 12673 : return f;
288 : }
289 : return NULL;
290 : }
291 :
292 : /* remove the file */
293 : gdk_return
294 13230 : GDKunlink(int farmid, const char *dir, const char *nme, const char *ext)
295 : {
296 13230 : if (nme && *nme) {
297 13230 : char path[MAXPATH];
298 :
299 13230 : if (GDKfilepath(path, sizeof(path), farmid, dir, nme, ext) != GDK_SUCCEED)
300 : return GDK_FAIL;
301 : /* if file already doesn't exist, we don't care */
302 13230 : if (MT_remove(path) != 0 && errno != ENOENT) {
303 0 : GDKsyserror("remove(%s)\n", path);
304 0 : return GDK_FAIL;
305 : }
306 : return GDK_SUCCEED;
307 : }
308 0 : GDKerror("no name specified");
309 0 : return GDK_FAIL;
310 : }
311 :
312 : /*
313 : * A move routine is overloaded to deal with extensions.
314 : */
315 : gdk_return
316 2845516 : GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2, bool report)
317 : {
318 2845516 : char path1[MAXPATH];
319 2845516 : char path2[MAXPATH];
320 2845516 : int ret;
321 2845516 : lng t0 = GDKusec();
322 :
323 2845516 : if (nme1 == NULL || *nme1 == 0) {
324 0 : GDKerror("no file specified\n");
325 0 : return GDK_FAIL;
326 : }
327 5691032 : if (GDKfilepath(path1, sizeof(path1), farmid, dir1, nme1, ext1) == GDK_SUCCEED &&
328 2845516 : GDKfilepath(path2, sizeof(path2), farmid, dir2, nme2, ext2) == GDK_SUCCEED) {
329 2845516 : ret = MT_rename(path1, path2);
330 2845516 : if (ret < 0 && report)
331 0 : GDKsyserror("cannot rename %s to %s\n", path1, path2);
332 :
333 2845516 : TRC_DEBUG(IO, "Move %s %s = %d ("LLFMT" usec)\n", path1, path2, ret, GDKusec() - t0);
334 : } else {
335 : ret = -1;
336 : }
337 2845516 : return ret < 0 ? GDK_FAIL : GDK_SUCCEED;
338 : }
339 :
340 : gdk_return
341 3113 : GDKextendf(int fd, size_t size, const char *fn)
342 : {
343 3113 : struct stat stb;
344 3113 : int rt = 0;
345 3113 : lng t0 = GDKusec();
346 :
347 3113 : assert(!GDKinmemory(0));
348 : #ifdef __COVERITY__
349 : if (fd < 0) /* in real life, if fd < 0, fstat will fail */
350 : return GDK_FAIL;
351 : #endif
352 3114 : if (fstat(fd, &stb) < 0) {
353 : /* shouldn't happen */
354 0 : GDKsyserror("fstat failed unexpectedly\n");
355 0 : return GDK_FAIL;
356 : }
357 : /* if necessary, extend the underlying file */
358 3115 : if (stb.st_size < (off_t) size) {
359 : #ifdef HAVE_FALLOCATE
360 2145 : if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 &&
361 0 : errno == EOPNOTSUPP)
362 : /* on Linux, posix_fallocate uses a slow
363 : * method to allocate blocks if the underlying
364 : * file system doesn't support the operation,
365 : * so use fallocate instead and just resize
366 : * the file if it fails */
367 : #else
368 : #ifdef HAVE_POSIX_FALLOCATE
369 : /* posix_fallocate returns error number on failure,
370 : * not -1 :-( */
371 : if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL)
372 : /* on Solaris/OpenIndiana, this may mean that
373 : * the underlying file system doesn't support
374 : * the operation, so just resize the file */
375 : #endif
376 : #endif
377 : /* we get here when (posix_)fallocate fails
378 : * because it is not supported on the file
379 : * system, or if neither function exists */
380 0 : rt = ftruncate(fd, (off_t) size);
381 2136 : if (rt != 0) {
382 : /* extending failed, try to reduce file size
383 : * back to original */
384 0 : GDKsyserror("could not extend file\n");
385 0 : if (ftruncate(fd, stb.st_size))
386 0 : GDKsyserror("ftruncate to old size");
387 : }
388 : }
389 3106 : TRC_DEBUG(IO, "GDKextend %s %zu -> %zu "LLFMT" usec%s\n",
390 : fn, (size_t) stb.st_size, size,
391 : GDKusec() - t0, rt != 0 ? " (failed)" : "");
392 : /* posix_fallocate returns != 0 on failure, fallocate and
393 : * ftruncate return -1 on failure, but all three return 0 on
394 : * success */
395 3108 : return rt != 0 ? GDK_FAIL : GDK_SUCCEED;
396 : }
397 :
398 : gdk_return
399 2613 : GDKextend(const char *fn, size_t size)
400 : {
401 2613 : int fd, flags = O_RDWR;
402 2613 : gdk_return rt = GDK_FAIL;
403 :
404 2613 : assert(!GDKinmemory(0));
405 : #ifdef O_BINARY
406 : /* On Windows, open() fails if the file is bigger than 2^32
407 : * bytes without O_BINARY. */
408 : flags |= O_BINARY;
409 : #endif
410 2612 : if ((fd = MT_open(fn, flags | O_CLOEXEC)) >= 0) {
411 2619 : rt = GDKextendf(fd, size, fn);
412 2610 : close(fd);
413 : } else {
414 0 : GDKsyserror("cannot open file %s\n", fn);
415 : }
416 2613 : return rt;
417 : }
418 :
419 : /*
420 : * @+ Save and load.
421 : * The BAT is saved on disk in several files. The extension DESC
422 : * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the
423 : * other heaps. The storage mechanism off a file can be memory mapped
424 : * (STORE_MMAP) or malloced (STORE_MEM).
425 : *
426 : * These modes indicates the disk-layout and the intended mapping.
427 : * The primary concern here is to handle STORE_MMAP and STORE_MEM.
428 : */
429 : gdk_return
430 365423 : GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync)
431 : {
432 365423 : int err = 0;
433 :
434 365423 : TRC_DEBUG(IO, "GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync);
435 :
436 365423 : assert(!GDKinmemory(farmid));
437 365423 : if (mode == STORE_MMAP) {
438 1077 : if (dosync && size && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK))
439 0 : err = MT_msync(buf, size);
440 0 : if (err)
441 0 : GDKerror("error on: name=%s, ext=%s, mode=%d\n",
442 : nme, ext ? ext : "", (int) mode);
443 1077 : TRC_DEBUG(IO, "MT_msync(buf %p, size %zu) = %d\n",
444 : buf, size, err);
445 : } else {
446 364346 : int fd;
447 :
448 364346 : if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) {
449 : /* write() on 64-bits Redhat for IA64 returns
450 : * 32-bits signed result (= OS BUG)! write()
451 : * on Windows only takes unsigned int as
452 : * size */
453 728692 : while (size > 0) {
454 : /* circumvent problems by writing huge
455 : * buffers in chunks <= 1GiB */
456 364346 : ssize_t ret;
457 :
458 728692 : ret = write(fd, buf,
459 : (unsigned) MIN(1 << 30, size));
460 364346 : if (ret < 0) {
461 0 : err = -1;
462 0 : GDKsyserror("GDKsave: error %zd"
463 : " on: name=%s, ext=%s, "
464 : "mode=%d\n", ret, nme,
465 : ext ? ext : "", (int) mode);
466 0 : break;
467 : }
468 364346 : size -= ret;
469 364346 : buf = (void *) ((char *) buf + ret);
470 1093038 : TRC_DEBUG(IO, "Write(fd %d, buf %p"
471 : ", size %u) = %zd\n",
472 : fd, buf,
473 : (unsigned) MIN(1 << 30, size),
474 : ret);
475 : }
476 364346 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
477 : #if defined(NATIVE_WIN32)
478 : && _commit(fd) < 0
479 : #elif defined(HAVE_FDATASYNC)
480 190 : && fdatasync(fd) < 0
481 : #elif defined(HAVE_FSYNC)
482 : && fsync(fd) < 0
483 : #endif
484 : ) {
485 0 : GDKsyserror("GDKsave: error on: name=%s, "
486 : "ext=%s, mode=%d\n", nme,
487 : ext ? ext : "", (int) mode);
488 0 : err = -1;
489 : }
490 364346 : err |= close(fd);
491 364346 : if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) {
492 : /* do not tolerate corrupt heap images
493 : * (BBPrecover on restart will kill
494 : * them) */
495 0 : GDKerror("could not remove: name=%s, "
496 : "ext=%s, mode %d\n", nme,
497 : ext ? ext : "", (int) mode);
498 0 : return GDK_FAIL;
499 : }
500 : } else {
501 0 : err = -1;
502 0 : GDKerror("failed name=%s, ext=%s, mode %d\n",
503 : nme, ext ? ext : "", (int) mode);
504 : }
505 : }
506 365423 : return err ? GDK_FAIL : GDK_SUCCEED;
507 : }
508 :
509 : /*
510 : * Space for the load is directly allocated and the heaps are mapped.
511 : * Further initialization of the atom heaps require a separate action
512 : * defined in their implementation.
513 : *
514 : * size -- how much to read
515 : * *maxsize -- (in/out) how much to allocate / how much was allocated
516 : */
517 : char *
518 28085 : GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode)
519 : {
520 28085 : char *ret = NULL;
521 :
522 28085 : assert(!GDKinmemory(farmid));
523 28083 : assert(size <= *maxsize);
524 28083 : assert(farmid != NOFARM || ext == NULL);
525 28083 : TRC_DEBUG(IO, "GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode);
526 :
527 28086 : if (mode == STORE_MEM) {
528 25471 : int fd = GDKfdlocate(farmid, nme, "rb", ext);
529 :
530 25471 : if (fd >= 0) {
531 25471 : char *dst = ret = GDKmalloc(*maxsize);
532 25471 : ssize_t n_expected, n = 0;
533 :
534 25471 : if (ret) {
535 : /* read in chunks, some OSs do not
536 : * give you all at once and Windows
537 : * only accepts int */
538 50942 : for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) {
539 25471 : n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected));
540 25471 : if (n < 0)
541 0 : GDKsyserror("GDKload: cannot read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "", size, n_expected);
542 : #ifndef __COVERITY__
543 : /* Coverity doesn't seem to
544 : * recognize that we're just
545 : * printing the value of ptr,
546 : * not its contents */
547 25471 : TRC_DEBUG(IO, "read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n);
548 : #endif
549 :
550 25471 : if (n <= 0)
551 : break;
552 25471 : dst += n;
553 : }
554 25471 : if (n_expected > 0) {
555 : /* we couldn't read all, error
556 : * already generated */
557 0 : GDKfree(ret);
558 0 : if (n >= 0) /* don't report error twice */
559 0 : GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected);
560 : ret = NULL;
561 : }
562 : #ifndef NDEBUG
563 : /* just to make valgrind happy, we
564 : * initialize the whole thing */
565 25471 : if (ret && *maxsize > size)
566 17253 : memset(ret + size, 0, *maxsize - size);
567 : #endif
568 : }
569 25471 : close(fd);
570 : } else {
571 0 : GDKsyserror("cannot open: name=%s, ext=%s\n", nme, ext ? ext : "");
572 : }
573 : } else {
574 2615 : char path[MAXPATH];
575 :
576 : /* round up to multiple of GDK_mmap_pagesize with a
577 : * minimum of one */
578 2615 : size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
579 2615 : if (size == 0)
580 0 : size = GDK_mmap_pagesize;
581 2615 : if (farmid != NOFARM) {
582 1001 : if (GDKfilepath(path, sizeof(path), farmid, BATDIR, nme, ext) != GDK_SUCCEED)
583 0 : return NULL;
584 : nme = path;
585 : }
586 2615 : if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) {
587 2601 : int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL;
588 :
589 2601 : if (mode == STORE_PRIV)
590 : mod |= MMAP_COPY;
591 : else
592 2601 : mod |= MMAP_SYNC;
593 2601 : ret = GDKmmap(nme, mod, size);
594 2622 : if (ret != NULL) {
595 : /* success: update allocated size */
596 2622 : *maxsize = size;
597 : }
598 5244 : TRC_DEBUG(IO, "mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret);
599 : }
600 : }
601 : return ret;
602 : }
603 :
604 : /*
605 : * @+ BAT disk storage
606 : *
607 : * Between sessions the BATs comprising the database are saved on
608 : * disk. To simplify code, we assume a UNIX directory called its
609 : * physical @%home@ where they are to be located. The subdirectories
610 : * BAT and PRG contain what its name says.
611 : *
612 : * A BAT created by @%COLnew@ is considered temporary until one calls
613 : * the routine @%BATsave@. This routine reserves disk space and checks
614 : * for name clashes.
615 : *
616 : * Saving and restoring BATs is left to the upper layers. The library
617 : * merely copies the data into place. Failure to read or write the
618 : * BAT results in a NULL, otherwise it returns the BAT pointer.
619 : */
620 : static BAT *
621 23748 : DESCload(int i)
622 : {
623 23748 : const char *nme = BBP_physical(i);
624 23748 : BAT *b = NULL;
625 23748 : int tt;
626 :
627 23748 : TRC_DEBUG(IO, "DESCload: %s\n", nme ? nme : "<noname>");
628 :
629 23748 : b = BBP_desc(i);
630 :
631 23748 : if (b->batCacheid == 0) {
632 0 : GDKerror("no descriptor for BAT %d\n", i);
633 0 : return NULL;
634 : }
635 :
636 23748 : MT_lock_set(&b->theaplock);
637 23749 : tt = b->ttype;
638 23749 : if (tt < 0) {
639 0 : const char *s = ATOMunknown_name(tt);
640 0 : if ((tt = ATOMindex(s)) < 0) {
641 0 : MT_lock_unset(&b->theaplock);
642 0 : GDKerror("atom '%s' unknown, in BAT '%s'.\n", s, nme);
643 0 : return NULL;
644 : }
645 0 : b->ttype = tt;
646 : }
647 :
648 : /* reconstruct mode from BBP status (BATmode doesn't flush
649 : * descriptor, so loaded mode may be stale) */
650 23749 : b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0;
651 23749 : b->batCopiedtodisk = true;
652 23749 : MT_lock_unset(&b->theaplock);
653 23749 : return b;
654 : }
655 :
656 : gdk_return
657 1125760 : BATsave_iter(BAT *b, BATiter *bi, BUN size)
658 : {
659 1125760 : gdk_return err = GDK_SUCCEED;
660 1125760 : bool dosync;
661 1125760 : bool locked = false;
662 :
663 1125760 : BATcheck(b, GDK_FAIL);
664 :
665 1125760 : if (MT_rwlock_rdtry(&b->thashlock))
666 1125752 : locked = true;
667 :
668 1125760 : dosync = (BBP_status(b->batCacheid) & BBPPERSISTENT) != 0;
669 1125760 : assert(!GDKinmemory(bi->h->farmid));
670 : /* views cannot be saved, but make an exception for
671 : * force-remapped views */
672 1125760 : if (isVIEW(b)) {
673 0 : if (locked)
674 0 : MT_rwlock_rdunlock(&b->thashlock);
675 0 : GDKerror("%s is a view on %s; cannot be saved\n", BATgetId(b), BBP_logical(VIEWtparent(b)));
676 0 : return GDK_FAIL;
677 : }
678 1125760 : if (!BATdirtybi(*bi)) {
679 836229 : if (locked)
680 836229 : MT_rwlock_rdunlock(&b->thashlock);
681 836229 : return GDK_SUCCEED;
682 : }
683 :
684 : /* start saving data */
685 289531 : if (bi->type != TYPE_void && bi->base == NULL) {
686 0 : assert(BBP_status(b->batCacheid) & BBPSWAPPED);
687 0 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
688 0 : int fd = GDKfdlocate(bi->h->farmid, bi->h->filename, "rb+", NULL);
689 0 : if (fd < 0) {
690 0 : GDKsyserror("cannot open file %s for sync\n",
691 : bi->h->filename);
692 0 : err = GDK_FAIL;
693 : } else {
694 0 : if (
695 : #if defined(NATIVE_WIN32)
696 : _commit(fd) < 0
697 : #elif defined(HAVE_FDATASYNC)
698 0 : fdatasync(fd) < 0
699 : #elif defined(HAVE_FSYNC)
700 : fsync(fd) < 0
701 : #endif
702 : )
703 0 : GDKsyserror("sync failed for %s\n",
704 : bi->h->filename);
705 0 : close(fd);
706 : }
707 0 : if (bi->vh) {
708 0 : fd = GDKfdlocate(bi->vh->farmid, bi->vh->filename, "rb+", NULL);
709 0 : if (fd < 0) {
710 0 : GDKsyserror("cannot open file %s for sync\n",
711 : bi->vh->filename);
712 0 : err = GDK_FAIL;
713 : } else {
714 0 : if (
715 : #if defined(NATIVE_WIN32)
716 : _commit(fd) < 0
717 : #elif defined(HAVE_FDATASYNC)
718 0 : fdatasync(fd) < 0
719 : #elif defined(HAVE_FSYNC)
720 : fsync(fd) < 0
721 : #endif
722 : )
723 0 : GDKsyserror("sync failed for %s\n", bi->vh->filename);
724 0 : close(fd);
725 : }
726 : }
727 : }
728 : } else {
729 289531 : const char *nme = BBP_physical(b->batCacheid);
730 289531 : if ((!bi->copiedtodisk || bi->hdirty)
731 289484 : && (err == GDK_SUCCEED && bi->type)) {
732 289484 : const char *tail = strchr(bi->h->filename, '.') + 1;
733 289484 : err = HEAPsave(bi->h, nme, tail, dosync, bi->hfree, &b->theaplock);
734 : }
735 289531 : if (bi->vh
736 55972 : && (!bi->copiedtodisk || bi->vhdirty)
737 48768 : && ATOMvarsized(bi->type)
738 48768 : && err == GDK_SUCCEED)
739 48768 : err = HEAPsave(bi->vh, nme, "theap", dosync, bi->vhfree, &b->theaplock);
740 : }
741 :
742 289531 : if (err == GDK_SUCCEED) {
743 289531 : MT_lock_set(&b->theaplock);
744 289531 : if (b->theap != bi->h) {
745 1 : assert(b->theap->dirty);
746 1 : b->theap->wasempty = bi->h->wasempty;
747 1 : b->theap->hasfile |= bi->h->hasfile;
748 : }
749 289531 : if (b->tvheap && b->tvheap != bi->vh) {
750 0 : assert(b->tvheap->dirty);
751 0 : b->tvheap->wasempty = bi->vh->wasempty;
752 0 : b->tvheap->hasfile |= bi->vh->hasfile;
753 : }
754 289531 : if (size != b->batCount) {
755 : /* if the size doesn't match, the BAT must be dirty */
756 39839 : b->theap->dirty = true;
757 39839 : if (b->tvheap)
758 10185 : b->tvheap->dirty = true;
759 : }
760 : /* there is something on disk now */
761 289531 : b->batCopiedtodisk = true;
762 289531 : MT_lock_unset(&b->theaplock);
763 289531 : if (locked && b->thash && b->thash != (Hash *) 1)
764 14457 : BAThashsave(b, dosync);
765 : }
766 289523 : if (locked)
767 289523 : MT_rwlock_rdunlock(&b->thashlock);
768 : return err;
769 : }
770 :
771 : gdk_return
772 524 : BATsave(BAT *b)
773 : {
774 524 : gdk_return rc;
775 :
776 524 : BATiter bi = bat_iterator(b);
777 524 : rc = BATsave_iter(b, &bi, bi.count);
778 524 : bat_iterator_end(&bi);
779 524 : return rc;
780 : }
781 :
782 : /*
783 : * TODO: move to gdk_bbp.c
784 : */
785 : BAT *
786 23748 : BATload_intern(bat bid, bool lock)
787 : {
788 23748 : const char *nme;
789 23748 : BAT *b;
790 :
791 23748 : assert(!GDKinmemory(0));
792 23748 : assert(bid > 0);
793 :
794 23748 : nme = BBP_physical(bid);
795 23748 : b = DESCload(bid);
796 :
797 23748 : if (b == NULL) {
798 : return NULL;
799 : }
800 23748 : assert(!GDKinmemory(b->theap->farmid));
801 :
802 : /* LOAD bun heap */
803 23749 : if (b->ttype != TYPE_void) {
804 23749 : b->theap->storage = b->theap->newstorage = STORE_INVALID;
805 23749 : if ((b->batCount == 0 ?
806 3104 : HEAPalloc(b->theap, b->batCapacity, b->twidth) :
807 26853 : HEAPload(b->theap, b->theap->filename, NULL, b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
808 0 : HEAPfree(b->theap, false);
809 0 : return NULL;
810 : }
811 23749 : if (ATOMstorage(b->ttype) == TYPE_msk) {
812 5304 : b->batCapacity = (BUN) (b->theap->size * 8);
813 : } else {
814 18445 : assert(b->theap->size >> b->tshift <= BUN_MAX);
815 18445 : b->batCapacity = (BUN) (b->theap->size >> b->tshift);
816 : }
817 : } else {
818 0 : b->theap->base = NULL;
819 : }
820 :
821 : /* LOAD tail heap */
822 23749 : if (ATOMvarsized(b->ttype)) {
823 5380 : b->tvheap->storage = b->tvheap->newstorage = STORE_INVALID;
824 5380 : if ((b->tvheap->free == 0 ?
825 341 : ATOMheap(b->ttype, b->tvheap, b->batCapacity) :
826 5721 : HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
827 0 : HEAPfree(b->theap, false);
828 0 : HEAPfree(b->tvheap, false);
829 0 : return NULL;
830 : }
831 5380 : if (ATOMstorage(b->ttype) == TYPE_str) {
832 5244 : strCleanHash(b->tvheap, false); /* ensure consistency */
833 : } else {
834 136 : HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0),
835 : BATcount(b));
836 : }
837 : }
838 :
839 : /* initialize descriptor */
840 23749 : b->theap->parentid = b->batCacheid;
841 :
842 : /* load succeeded; register it in BBP */
843 23749 : if (BBPcacheit(b, lock) != GDK_SUCCEED) {
844 0 : HEAPfree(b->theap, false);
845 0 : if (b->tvheap)
846 0 : HEAPfree(b->tvheap, false);
847 0 : return NULL;
848 : }
849 : return b;
850 : }
851 :
852 : /*
853 : * @- BATdelete
854 : * The new behavior is to let the routine produce warnings but always
855 : * succeed. rationale: on a delete, we must get rid of *all* the
856 : * files. We do not have to care about preserving them or be too much
857 : * concerned if a file that had to be deleted was not found (end
858 : * result is still that it does not exist). The past behavior to
859 : * delete some files and then fail was erroneous. The BAT would
860 : * continue to exist with an incorrect disk status, causing havoc
861 : * later on.
862 : *
863 : * NT forces us to close all files before deleting them; in case of
864 : * memory mapped files this means that we have to unload the BATs
865 : * before deleting. This is enforced now.
866 : */
867 : void
868 24418372 : BATdelete(BAT *b)
869 : {
870 24418372 : HASHdestroy(b);
871 24360221 : OIDXdestroy(b);
872 24470499 : PROPdestroy_nolock(b);
873 24437139 : STRMPdestroy(b);
874 24454901 : RTREEdestroy(b);
875 24410979 : if (b->theap) {
876 4946 : HEAPfree(b->theap, true);
877 : }
878 24427576 : if (b->tvheap) {
879 496 : HEAPfree(b->tvheap, true);
880 : }
881 24427576 : b->batCopiedtodisk = false;
882 24427576 : }
883 :
884 : /*
885 : * BAT specific printing
886 : */
887 :
888 : gdk_return
889 685 : BATprintcolumns(stream *s, int argc, BAT *argv[])
890 : {
891 685 : int i;
892 685 : BUN n, cnt;
893 685 : struct colinfo {
894 : ssize_t (*s) (str *, size_t *, const void *, bool);
895 : BATiter i;
896 : } *colinfo;
897 685 : char *buf;
898 685 : size_t buflen = 0;
899 685 : ssize_t len;
900 685 : gdk_return rc = GDK_SUCCEED;
901 :
902 : /* error checking */
903 2122 : for (i = 0; i < argc; i++) {
904 1439 : if (argv[i] == NULL) {
905 0 : GDKerror("Columns missing\n");
906 0 : return GDK_FAIL;
907 : }
908 1439 : if (BATcount(argv[0]) != BATcount(argv[i])) {
909 2 : GDKerror("Columns must be the same size\n");
910 2 : return GDK_FAIL;
911 : }
912 : }
913 :
914 683 : if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) {
915 0 : GDKerror("Cannot allocate memory\n");
916 0 : return GDK_FAIL;
917 : }
918 :
919 2114 : for (i = 0; i < argc; i++) {
920 1431 : colinfo[i].i = bat_iterator(argv[i]);
921 1431 : colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr;
922 : }
923 :
924 683 : mnstr_write(s, "#--------------------------#\n", 1, 29);
925 683 : mnstr_write(s, "# ", 1, 2);
926 2797 : for (i = 0; i < argc; i++) {
927 1431 : if (i > 0)
928 748 : mnstr_write(s, "\t", 1, 1);
929 1431 : const char *nm = ATOMname(argv[i]->ttype);
930 1431 : mnstr_write(s, nm, 1, strlen(nm));
931 : }
932 683 : mnstr_write(s, " # type\n", 1, 9);
933 683 : mnstr_write(s, "#--------------------------#\n", 1, 29);
934 683 : buf = NULL;
935 :
936 3890 : for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) {
937 3207 : mnstr_write(s, "[ ", 1, 2);
938 13152 : for (i = 0; i < argc; i++) {
939 6738 : len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true);
940 6738 : if (len < 0) {
941 0 : rc = GDK_FAIL;
942 0 : goto bailout;
943 : }
944 6738 : if (i > 0)
945 3531 : mnstr_write(s, ",\t", 1, 2);
946 6738 : mnstr_write(s, buf, 1, len);
947 : }
948 3207 : mnstr_write(s, " ]\n", 1, 4);
949 : }
950 :
951 683 : bailout:
952 2114 : for (i = 0; i < argc; i++) {
953 1431 : bat_iterator_end(&colinfo[i].i);
954 : }
955 683 : GDKfree(buf);
956 683 : GDKfree(colinfo);
957 :
958 683 : return rc;
959 : }
960 :
961 : gdk_return
962 632 : BATprint(stream *fdout, BAT *b)
963 : {
964 632 : if (complex_cand(b)) {
965 0 : struct canditer ci;
966 0 : canditer_init(&ci, NULL, b);
967 0 : oid hseq = ci.hseq;
968 :
969 0 : mnstr_printf(fdout,
970 : "#--------------------------#\n"
971 : "# void\toid # type\n"
972 : "#--------------------------#\n");
973 0 : for (BUN i = 0; i < ci.ncand; i++) {
974 0 : oid o = canditer_next(&ci);
975 0 : mnstr_printf(fdout,
976 : "[ " OIDFMT "@0,\t" OIDFMT "@0 ]\n",
977 : (oid) (i + hseq), o);
978 : }
979 0 : return GDK_SUCCEED;
980 : }
981 :
982 632 : BAT *argv[2];
983 632 : gdk_return ret = GDK_FAIL;
984 :
985 632 : argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
986 632 : if (argv[0]) {
987 632 : argv[1] = b;
988 632 : ret = BATprintcolumns(fdout, 2, argv);
989 632 : BBPunfix(argv[0]->batCacheid);
990 : }
991 : return ret;
992 : }
|