Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : *
16 : * @* Database Storage Management
17 : * Contains routines for writing and reading GDK data to and from
18 : * disk. This section contains the primitives to manage the
19 : * disk-based images of the BATs. It relies on the existence of a UNIX
20 : * file system, including memory mapped files. Solaris and IRIX have
21 : * different implementations of madvise().
22 : *
23 : * The current version assumes that all BATs are stored on a single
24 : * disk partition. This simplistic assumption should be replaced in
25 : * the near future by a multi-volume version. The intention is to use
26 : * several BAT home locations. The files should be owned by the
27 : * database server. Otherwise, IO operations are likely to fail. This
28 : * is accomplished by setting the GID and UID upon system start.
29 : */
30 : #include "monetdb_config.h"
31 : #include "gdk.h"
32 : #include "gdk_private.h"
33 : #include "mutils.h"
34 : #ifdef HAVE_FCNTL_H
35 : #include <fcntl.h>
36 : #endif
37 :
38 : #ifndef O_CLOEXEC
39 : #ifdef _O_NOINHERIT
40 : #define O_CLOEXEC _O_NOINHERIT /* Windows */
41 : #else
42 : #define O_CLOEXEC 0
43 : #endif
44 : #endif
45 :
46 : /* GDKfilepath writes the path name of a database file into path and
47 : * returns a pointer to it. The arguments are the buffer into which the
48 : * path is written, the size of that buffer, the farmID or -1, the name
49 : * of a subdirectory within the farm (i.e., something like BATDIR or
50 : * BAKDIR -- see gdk.h) or NULL, the name of a BAT (i.e. the name that
51 : * is stored in BBP.dir -- something like 07/714), and finally the file
52 : * extension.
53 : *
54 : * If farmid is >= 0, GDKfilepath returns the complete path to the
55 : * specified farm concatenated with the other arguments with appropriate
56 : * separators. If farmid is NOFARM (i.e. -1) , it returns the
57 : * concatenation of its other arguments (in this case, the result cannot
58 : * be used to access a file directly -- the farm needs to be prepended
59 : * in some other place). */
60 : gdk_return
61 26629638 : GDKfilepath(char *path, size_t pathlen, int farmid, const char *dir, const char *name, const char *ext)
62 : {
63 26629638 : const char *sep;
64 :
65 26629638 : if (GDKinmemory(farmid)) {
66 1 : if (strcpy_len(path, ":memory:", pathlen) >= pathlen)
67 : return GDK_FAIL;
68 : return GDK_SUCCEED;
69 : }
70 :
71 26626989 : assert(dir == NULL || *dir != DIR_SEP);
72 26626989 : assert(farmid == NOFARM ||
73 : (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname));
74 26626989 : if (!GDKembedded() && MT_path_absolute(name)) {
75 0 : GDKerror("name should not be absolute\n");
76 0 : return GDK_FAIL;
77 : }
78 26624056 : if (dir && *dir == DIR_SEP)
79 0 : dir++;
80 26494970 : if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) {
81 : sep = "";
82 : } else {
83 26455650 : sep = DIR_SEP_STR;
84 : }
85 26624056 : size_t len;
86 26624056 : if (farmid == NOFARM) {
87 2439296 : len = strconcat_len(path, pathlen,
88 : dir ? dir : "", sep, name,
89 : ext ? "." : NULL, ext, NULL);
90 : } else {
91 41987196 : len = strconcat_len(path, pathlen,
92 : BBPfarms[farmid].dirname, DIR_SEP_STR,
93 : dir ? dir : "", sep, name,
94 : ext ? "." : NULL, ext, NULL);
95 : }
96 26639376 : if (len >= pathlen) {
97 0 : GDKerror("path name too long\n");
98 0 : return GDK_FAIL;
99 : }
100 : return GDK_SUCCEED;
101 : }
102 :
103 : /* make sure the parent directory of DIR exists (the argument itself
104 : * is usually a file that is to be created) */
105 : gdk_return
106 3799 : GDKcreatedir(const char *dir)
107 : {
108 3799 : char path[FILENAME_MAX];
109 3799 : char *r;
110 3799 : DIR *dirp;
111 :
112 3799 : TRC_DEBUG(IO_, "GDKcreatedir(%s)\n", dir);
113 3799 : assert(!GDKinmemory(0));
114 3798 : if (!GDKembedded() && !MT_path_absolute(dir)) {
115 0 : GDKerror("directory '%s' is not absolute\n", dir);
116 0 : return GDK_FAIL;
117 : }
118 3799 : if (strlen(dir) >= FILENAME_MAX) {
119 0 : GDKerror("directory name too long\n");
120 0 : return GDK_FAIL;
121 : }
122 3799 : strcpy(path, dir); /* we know this fits (see above) */
123 : /* skip initial /, if any */
124 50088 : for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) {
125 46288 : *r = 0;
126 46288 : if (
127 : #ifdef WIN32
128 : strlen(path) > 3 &&
129 : #endif
130 46293 : MT_mkdir(path) < 0) {
131 42280 : if (errno != EEXIST) {
132 0 : GDKsyserror("cannot create directory %s\n", path);
133 0 : return GDK_FAIL;
134 : }
135 42280 : if ((dirp = opendir(path)) == NULL) {
136 0 : GDKsyserror("%s cannot open directory\n", path);
137 0 : return GDK_FAIL;
138 : }
139 : /* it's a directory, we can continue */
140 42256 : closedir(dirp);
141 : }
142 46289 : *r++ = DIR_SEP;
143 : }
144 : return GDK_SUCCEED;
145 : }
146 :
147 : /* remove the directory DIRNAME with its file contents; does not
148 : * recurse into subdirectories */
149 : gdk_return
150 12826 : GDKremovedir(int farmid, const char *dirname)
151 : {
152 12826 : char dirnamestr[MAXPATH];
153 12826 : DIR *dirp;
154 12826 : char path[MAXPATH];
155 12826 : struct dirent *dent;
156 12826 : int ret;
157 :
158 12826 : assert(!GDKinmemory(farmid));
159 12826 : if (GDKfilepath(dirnamestr, sizeof(dirnamestr), farmid, NULL, dirname, NULL) != GDK_SUCCEED)
160 : return GDK_FAIL;
161 :
162 12826 : TRC_DEBUG(IO_, "GDKremovedir(%s)\n", dirnamestr);
163 :
164 12826 : if ((dirp = opendir(dirnamestr)) == NULL) {
165 : return GDK_SUCCEED;
166 : }
167 272740 : while ((dent = readdir(dirp)) != NULL) {
168 260615 : if (dent->d_name[0] == '.' &&
169 24250 : (dent->d_name[1] == 0 ||
170 12125 : (dent->d_name[1] == '.' && dent->d_name[2] == 0))) {
171 : /* skip . and .. */
172 24250 : continue;
173 : }
174 236365 : if (GDKfilepath(path, sizeof(path), farmid, dirname, dent->d_name, NULL) != GDK_SUCCEED) {
175 : /* most likely the rmdir will now fail causing
176 : * an error return */
177 : break;
178 : }
179 236365 : ret = MT_remove(path);
180 236365 : if (ret == -1)
181 0 : GDKsyserror("remove(%s) failed\n", path);
182 509105 : TRC_DEBUG(IO_, "Remove %s = %d\n", path, ret);
183 : }
184 12125 : closedir(dirp);
185 12125 : ret = MT_rmdir(dirnamestr);
186 12125 : if (ret != 0)
187 0 : GDKsyserror("rmdir(%s) failed\n", dirnamestr);
188 12125 : TRC_DEBUG(IO_, "rmdir %s = %d\n", dirnamestr, ret);
189 12125 : return ret ? GDK_FAIL : GDK_SUCCEED;
190 : }
191 :
192 : #define _FUNBUF 0x040000
193 : #define _FWRTHR 0x080000
194 : #define _FRDSEQ 0x100000
195 :
196 : /* open a file and return its file descriptor; the file is specified
197 : * using farmid, name and extension; if opening for writing, we create
198 : * the parent directory if necessary; if opening for reading, we don't
199 : * necessarily report an error if it fails, but we make sure errno is
200 : * set */
201 : int
202 423907 : GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension)
203 : {
204 423907 : char path[MAXPATH];
205 423907 : int fd, flags = O_CLOEXEC;
206 :
207 423907 : assert(!GDKinmemory(farmid));
208 423885 : if (nme == NULL || *nme == 0) {
209 0 : GDKerror("no name specified\n");
210 0 : errno = EFAULT;
211 0 : return -1;
212 : }
213 :
214 423885 : assert(farmid != NOFARM || extension == NULL);
215 423885 : if (farmid != NOFARM) {
216 422323 : if (GDKfilepath(path, sizeof(path), farmid, BATDIR, nme, extension) != GDK_SUCCEED) {
217 0 : errno = ENOMEM;
218 0 : return -1;
219 : }
220 : nme = path;
221 : }
222 :
223 423906 : if (*mode == 'm') { /* file open for mmap? */
224 0 : mode++;
225 : #ifdef _CYGNUS_H_
226 : } else {
227 : flags |= _FRDSEQ; /* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */
228 : #endif
229 : }
230 :
231 423906 : if (strchr(mode, 'w')) {
232 : flags |= O_WRONLY | O_CREAT;
233 75188 : } else if (!strchr(mode, '+')) {
234 : flags |= O_RDONLY;
235 : } else {
236 49242 : flags |= O_RDWR;
237 : }
238 : #ifdef WIN32
239 : flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT;
240 : #endif
241 423906 : fd = MT_open(nme, flags);
242 423947 : if (fd < 0 && *mode == 'w') {
243 : /* try to create the directory, in case that was the problem */
244 3575 : if (GDKcreatedir(nme) == GDK_SUCCEED) {
245 3574 : fd = MT_open(nme, flags);
246 3575 : if (fd < 0)
247 0 : GDKsyserror("cannot open file %s\n", nme);
248 : }
249 : }
250 423947 : int err = errno; /* save */
251 : /* don't generate error if we can't open a file for reading */
252 423947 : errno = err; /* restore */
253 423947 : return fd;
254 : }
255 :
256 : /* like GDKfdlocate, except return a FILE pointer */
257 : FILE *
258 12927 : GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension)
259 : {
260 12927 : int fd;
261 12927 : FILE *f;
262 :
263 12927 : if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0)
264 : return NULL;
265 12701 : if (*mode == 'm')
266 0 : mode++;
267 12701 : if ((f = fdopen(fd, mode)) == NULL) {
268 0 : GDKsyserror("cannot fdopen file\n");
269 0 : close(fd);
270 0 : return NULL;
271 : }
272 : return f;
273 : }
274 :
275 : FILE *
276 12116 : GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode)
277 : {
278 12116 : char path[MAXPATH];
279 :
280 : /* if name is null, try to get one from dir (in case it was a path) */
281 12116 : if (GDKfilepath(path, sizeof(path), farmid, dir, name, extension) == GDK_SUCCEED) {
282 12116 : FILE *f;
283 12116 : TRC_DEBUG(IO_, "GDKfileopen(%s)\n", path);
284 12116 : f = MT_fopen(path, mode);
285 12116 : int err = errno;
286 12116 : errno = err;
287 12116 : return f;
288 : }
289 : return NULL;
290 : }
291 :
292 : /* remove the file */
293 : gdk_return
294 12893 : GDKunlink(int farmid, const char *dir, const char *nme, const char *ext)
295 : {
296 12893 : if (nme && *nme) {
297 12893 : char path[MAXPATH];
298 :
299 12893 : if (GDKfilepath(path, sizeof(path), farmid, dir, nme, ext) != GDK_SUCCEED)
300 : return GDK_FAIL;
301 : /* if file already doesn't exist, we don't care */
302 12893 : if (MT_remove(path) != 0 && errno != ENOENT) {
303 0 : GDKsyserror("remove(%s)\n", path);
304 0 : return GDK_FAIL;
305 : }
306 : return GDK_SUCCEED;
307 : }
308 0 : GDKerror("no name specified");
309 0 : return GDK_FAIL;
310 : }
311 :
312 : /*
313 : * A move routine is overloaded to deal with extensions.
314 : */
315 : gdk_return
316 2700242 : GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2, bool report)
317 : {
318 2700242 : char path1[MAXPATH];
319 2700242 : char path2[MAXPATH];
320 2700242 : int ret;
321 2700242 : lng t0 = GDKusec();
322 :
323 2700242 : if (nme1 == NULL || *nme1 == 0) {
324 0 : GDKerror("no file specified\n");
325 0 : return GDK_FAIL;
326 : }
327 5400484 : if (GDKfilepath(path1, sizeof(path1), farmid, dir1, nme1, ext1) == GDK_SUCCEED &&
328 2700242 : GDKfilepath(path2, sizeof(path2), farmid, dir2, nme2, ext2) == GDK_SUCCEED) {
329 2700242 : ret = MT_rename(path1, path2);
330 2700242 : if (ret < 0 && report)
331 0 : GDKsyserror("cannot rename %s to %s\n", path1, path2);
332 :
333 2700242 : TRC_DEBUG(IO_, "Move %s %s = %d ("LLFMT" usec)\n", path1, path2, ret, GDKusec() - t0);
334 : } else {
335 : ret = -1;
336 : }
337 2700242 : return ret < 0 ? GDK_FAIL : GDK_SUCCEED;
338 : }
339 :
340 : gdk_return
341 3126 : GDKextendf(int fd, size_t size, const char *fn)
342 : {
343 3126 : struct stat stb;
344 3126 : int rt = 0;
345 3126 : lng t0 = GDKusec();
346 :
347 3128 : assert(!GDKinmemory(0));
348 : #ifdef __COVERITY__
349 : if (fd < 0) /* in real life, if fd < 0, fstat will fail */
350 : return GDK_FAIL;
351 : #endif
352 3127 : if (fstat(fd, &stb) < 0) {
353 : /* shouldn't happen */
354 0 : GDKsyserror("fstat failed unexpectedly\n");
355 0 : return GDK_FAIL;
356 : }
357 : /* if necessary, extend the underlying file */
358 3125 : if (stb.st_size < (off_t) size) {
359 : #ifdef HAVE_FALLOCATE
360 2138 : if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 &&
361 0 : errno == EOPNOTSUPP)
362 : /* on Linux, posix_fallocate uses a slow
363 : * method to allocate blocks if the underlying
364 : * file system doesn't support the operation,
365 : * so use fallocate instead and just resize
366 : * the file if it fails */
367 : #else
368 : #ifdef HAVE_POSIX_FALLOCATE
369 : /* posix_fallocate returns error number on failure,
370 : * not -1 :-( */
371 : if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL)
372 : /* on Solaris/OpenIndiana, this may mean that
373 : * the underlying file system doesn't support
374 : * the operation, so just resize the file */
375 : #endif
376 : #endif
377 : /* we get here when (posix_)fallocate fails
378 : * because it is not supported on the file
379 : * system, or if neither function exists */
380 0 : rt = ftruncate(fd, (off_t) size);
381 2132 : if (rt != 0) {
382 : /* extending failed, try to reduce file size
383 : * back to original */
384 0 : GDKsyserror("could not extend file\n");
385 0 : if (ftruncate(fd, stb.st_size))
386 0 : GDKsyserror("ftruncate to old size");
387 : }
388 : }
389 3119 : TRC_DEBUG(IO_, "GDKextend %s %zu -> %zu "LLFMT" usec%s\n",
390 : fn, (size_t) stb.st_size, size,
391 : GDKusec() - t0, rt != 0 ? " (failed)" : "");
392 : /* posix_fallocate returns != 0 on failure, fallocate and
393 : * ftruncate return -1 on failure, but all three return 0 on
394 : * success */
395 3119 : return rt != 0 ? GDK_FAIL : GDK_SUCCEED;
396 : }
397 :
398 : gdk_return
399 2622 : GDKextend(const char *fn, size_t size)
400 : {
401 2622 : int fd, flags = O_RDWR;
402 2622 : gdk_return rt = GDK_FAIL;
403 :
404 2622 : assert(!GDKinmemory(0));
405 : #ifdef O_BINARY
406 : /* On Windows, open() fails if the file is bigger than 2^32
407 : * bytes without O_BINARY. */
408 : flags |= O_BINARY;
409 : #endif
410 2618 : if ((fd = MT_open(fn, flags | O_CLOEXEC)) >= 0) {
411 2628 : rt = GDKextendf(fd, size, fn);
412 2617 : close(fd);
413 : } else {
414 0 : GDKsyserror("cannot open file %s\n", fn);
415 : }
416 2624 : return rt;
417 : }
418 :
419 : /*
420 : * @+ Save and load.
421 : * The BAT is saved on disk in several files. The extension DESC
422 : * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the
423 : * other heaps. The storage mechanism off a file can be memory mapped
424 : * (STORE_MMAP) or malloced (STORE_MEM).
425 : *
426 : * These modes indicates the disk-layout and the intended mapping.
427 : * The primary concern here is to handle STORE_MMAP and STORE_MEM.
428 : */
429 : gdk_return
430 335852 : GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync)
431 : {
432 335852 : int err = 0;
433 :
434 335852 : TRC_DEBUG(IO_, "GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync);
435 :
436 335852 : assert(!GDKinmemory(farmid));
437 335852 : if (mode == STORE_MMAP) {
438 1076 : if (dosync && size && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK))
439 0 : err = MT_msync(buf, size);
440 0 : if (err)
441 0 : GDKerror("error on: name=%s, ext=%s, mode=%d\n",
442 : nme, ext ? ext : "", (int) mode);
443 1076 : TRC_DEBUG(IO_, "MT_msync(buf %p, size %zu) = %d\n",
444 : buf, size, err);
445 : } else {
446 334776 : int fd;
447 :
448 334776 : if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) {
449 : /* write() on 64-bits Redhat for IA64 returns
450 : * 32-bits signed result (= OS BUG)! write()
451 : * on Windows only takes unsigned int as
452 : * size */
453 669552 : while (size > 0) {
454 : /* circumvent problems by writing huge
455 : * buffers in chunks <= 1GiB */
456 334776 : ssize_t ret;
457 :
458 669552 : ret = write(fd, buf,
459 : (unsigned) MIN(1 << 30, size));
460 334776 : if (ret < 0) {
461 0 : err = -1;
462 0 : GDKsyserror("GDKsave: error %zd"
463 : " on: name=%s, ext=%s, "
464 : "mode=%d\n", ret, nme,
465 : ext ? ext : "", (int) mode);
466 0 : break;
467 : }
468 334776 : size -= ret;
469 334776 : buf = (void *) ((char *) buf + ret);
470 1004328 : TRC_DEBUG(IO_, "Write(fd %d, buf %p"
471 : ", size %u) = %zd\n",
472 : fd, buf,
473 : (unsigned) MIN(1 << 30, size),
474 : ret);
475 : }
476 334776 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
477 : #if defined(NATIVE_WIN32)
478 : && _commit(fd) < 0
479 : #elif defined(HAVE_FDATASYNC)
480 140 : && fdatasync(fd) < 0
481 : #elif defined(HAVE_FSYNC)
482 : && fsync(fd) < 0
483 : #endif
484 : ) {
485 0 : GDKsyserror("GDKsave: error on: name=%s, "
486 : "ext=%s, mode=%d\n", nme,
487 : ext ? ext : "", (int) mode);
488 0 : err = -1;
489 : }
490 334776 : err |= close(fd);
491 334776 : if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) {
492 : /* do not tolerate corrupt heap images
493 : * (BBPrecover on restart will kill
494 : * them) */
495 0 : GDKerror("could not remove: name=%s, "
496 : "ext=%s, mode %d\n", nme,
497 : ext ? ext : "", (int) mode);
498 0 : return GDK_FAIL;
499 : }
500 : } else {
501 0 : err = -1;
502 0 : GDKerror("failed name=%s, ext=%s, mode %d\n",
503 : nme, ext ? ext : "", (int) mode);
504 : }
505 : }
506 335852 : return err ? GDK_FAIL : GDK_SUCCEED;
507 : }
508 :
509 : /*
510 : * Space for the load is directly allocated and the heaps are mapped.
511 : * Further initialization of the atom heaps require a separate action
512 : * defined in their implementation.
513 : *
514 : * size -- how much to read
515 : * *maxsize -- (in/out) how much to allocate / how much was allocated
516 : */
517 : char *
518 26905 : GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode)
519 : {
520 26905 : char *ret = NULL;
521 :
522 26905 : assert(!GDKinmemory(farmid));
523 26905 : assert(size <= *maxsize);
524 26905 : assert(farmid != NOFARM || ext == NULL);
525 26905 : TRC_DEBUG(IO_, "GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode);
526 :
527 26905 : if (mode == STORE_MEM) {
528 24279 : int fd = GDKfdlocate(farmid, nme, "rb", ext);
529 :
530 24279 : if (fd >= 0) {
531 24279 : char *dst = ret = GDKmalloc(*maxsize);
532 24279 : ssize_t n_expected, n = 0;
533 :
534 24279 : if (ret) {
535 : /* read in chunks, some OSs do not
536 : * give you all at once and Windows
537 : * only accepts int */
538 48558 : for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) {
539 24279 : n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected));
540 24279 : if (n < 0)
541 0 : GDKsyserror("GDKload: cannot read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "", size, n_expected);
542 : #ifndef __COVERITY__
543 : /* Coverity doesn't seem to
544 : * recognize that we're just
545 : * printing the value of ptr,
546 : * not its contents */
547 24279 : TRC_DEBUG(IO_, "read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n);
548 : #endif
549 :
550 24279 : if (n <= 0)
551 : break;
552 24279 : dst += n;
553 : }
554 24279 : if (n_expected > 0) {
555 : /* we couldn't read all, error
556 : * already generated */
557 0 : GDKfree(ret);
558 0 : if (n >= 0) /* don't report error twice */
559 0 : GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected);
560 : ret = NULL;
561 : }
562 : #ifndef NDEBUG
563 : /* just to make valgrind happy, we
564 : * initialize the whole thing */
565 24279 : if (ret && *maxsize > size)
566 16607 : memset(ret + size, 0, *maxsize - size);
567 : #endif
568 : }
569 24279 : close(fd);
570 : } else {
571 0 : GDKsyserror("cannot open: name=%s, ext=%s\n", nme, ext ? ext : "");
572 : }
573 : } else {
574 2626 : char path[MAXPATH];
575 :
576 : /* round up to multiple of GDK_mmap_pagesize with a
577 : * minimum of one */
578 2626 : size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
579 2626 : if (size == 0)
580 0 : size = GDK_mmap_pagesize;
581 2626 : if (farmid != NOFARM) {
582 1014 : if (GDKfilepath(path, sizeof(path), farmid, BATDIR, nme, ext) != GDK_SUCCEED)
583 0 : return NULL;
584 : nme = path;
585 : }
586 2626 : if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) {
587 2612 : int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL;
588 :
589 2612 : if (mode == STORE_PRIV)
590 : mod |= MMAP_COPY;
591 : else
592 2612 : mod |= MMAP_SYNC;
593 2612 : ret = GDKmmap(nme, mod, size);
594 2631 : if (ret != NULL) {
595 : /* success: update allocated size */
596 2631 : *maxsize = size;
597 : }
598 5262 : TRC_DEBUG(IO_, "mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret);
599 : }
600 : }
601 : return ret;
602 : }
603 :
604 : /*
605 : * @+ BAT disk storage
606 : *
607 : * Between sessions the BATs comprising the database are saved on
608 : * disk. To simplify code, we assume a UNIX directory called its
609 : * physical @%home@ where they are to be located. The subdirectories
610 : * BAT and PRG contain what its name says.
611 : *
612 : * A BAT created by @%COLnew@ is considered temporary until one calls
613 : * the routine @%BATsave@. This routine reserves disk space and checks
614 : * for name clashes.
615 : *
616 : * Saving and restoring BATs is left to the upper layers. The library
617 : * merely copies the data into place. Failure to read or write the
618 : * BAT results in a NULL, otherwise it returns the BAT pointer.
619 : */
620 : static BAT *
621 22784 : DESCload(int i)
622 : {
623 22784 : const char *s, *nme = BBP_physical(i);
624 22784 : BAT *b = NULL;
625 22784 : int tt;
626 :
627 22784 : TRC_DEBUG(IO_, "DESCload: %s\n", nme ? nme : "<noname>");
628 :
629 22784 : b = BBP_desc(i);
630 :
631 22784 : if (b->batCacheid == 0) {
632 0 : GDKerror("no descriptor for BAT %d\n", i);
633 0 : return NULL;
634 : }
635 :
636 22784 : MT_lock_set(&b->theaplock);
637 22783 : tt = b->ttype;
638 22783 : if (tt < 0) {
639 0 : if ((tt = ATOMindex(s = ATOMunknown_name(tt))) < 0) {
640 0 : MT_lock_unset(&b->theaplock);
641 0 : GDKerror("atom '%s' unknown, in BAT '%s'.\n", s, nme);
642 0 : return NULL;
643 : }
644 0 : b->ttype = tt;
645 : }
646 :
647 : /* reconstruct mode from BBP status (BATmode doesn't flush
648 : * descriptor, so loaded mode may be stale) */
649 22783 : b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0;
650 22783 : b->batCopiedtodisk = true;
651 22783 : MT_lock_unset(&b->theaplock);
652 22783 : return b;
653 : }
654 :
655 : gdk_return
656 1043947 : BATsave_iter(BAT *b, BATiter *bi, BUN size)
657 : {
658 1043947 : gdk_return err = GDK_SUCCEED;
659 1043947 : bool dosync;
660 1043947 : bool locked = false;
661 :
662 1043947 : BATcheck(b, GDK_FAIL);
663 :
664 1043947 : if (MT_rwlock_rdtry(&b->thashlock))
665 1043946 : locked = true;
666 :
667 1043947 : dosync = (BBP_status(b->batCacheid) & BBPPERSISTENT) != 0;
668 1043947 : assert(!GDKinmemory(bi->h->farmid));
669 : /* views cannot be saved, but make an exception for
670 : * force-remapped views */
671 1043947 : if (isVIEW(b)) {
672 0 : if (locked)
673 0 : MT_rwlock_rdunlock(&b->thashlock);
674 0 : GDKerror("%s is a view on %s; cannot be saved\n", BATgetId(b), BBP_logical(VIEWtparent(b)));
675 0 : return GDK_FAIL;
676 : }
677 1043947 : if (!BATdirtybi(*bi)) {
678 778249 : if (locked)
679 778249 : MT_rwlock_rdunlock(&b->thashlock);
680 778249 : return GDK_SUCCEED;
681 : }
682 :
683 : /* start saving data */
684 265698 : if (bi->type != TYPE_void && bi->base == NULL) {
685 0 : assert(BBP_status(b->batCacheid) & BBPSWAPPED);
686 0 : if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
687 0 : int fd = GDKfdlocate(bi->h->farmid, bi->h->filename, "rb+", NULL);
688 0 : if (fd < 0) {
689 0 : GDKsyserror("cannot open file %s for sync\n",
690 : bi->h->filename);
691 0 : err = GDK_FAIL;
692 : } else {
693 0 : if (
694 : #if defined(NATIVE_WIN32)
695 : _commit(fd) < 0
696 : #elif defined(HAVE_FDATASYNC)
697 0 : fdatasync(fd) < 0
698 : #elif defined(HAVE_FSYNC)
699 : fsync(fd) < 0
700 : #endif
701 : )
702 0 : GDKsyserror("sync failed for %s\n",
703 : bi->h->filename);
704 0 : close(fd);
705 : }
706 0 : if (bi->vh) {
707 0 : fd = GDKfdlocate(bi->vh->farmid, bi->vh->filename, "rb+", NULL);
708 0 : if (fd < 0) {
709 0 : GDKsyserror("cannot open file %s for sync\n",
710 : bi->vh->filename);
711 0 : err = GDK_FAIL;
712 : } else {
713 0 : if (
714 : #if defined(NATIVE_WIN32)
715 : _commit(fd) < 0
716 : #elif defined(HAVE_FDATASYNC)
717 0 : fdatasync(fd) < 0
718 : #elif defined(HAVE_FSYNC)
719 : fsync(fd) < 0
720 : #endif
721 : )
722 0 : GDKsyserror("sync failed for %s\n", bi->vh->filename);
723 0 : close(fd);
724 : }
725 : }
726 : }
727 : } else {
728 265698 : const char *nme = BBP_physical(b->batCacheid);
729 265698 : if ((!bi->copiedtodisk || bi->hdirty)
730 265655 : && (err == GDK_SUCCEED && bi->type)) {
731 265655 : const char *tail = strchr(bi->h->filename, '.') + 1;
732 265655 : err = HEAPsave(bi->h, nme, tail, dosync, bi->hfree, &b->theaplock);
733 : }
734 265698 : if (bi->vh
735 57234 : && (!bi->copiedtodisk || bi->vhdirty)
736 50065 : && ATOMvarsized(bi->type)
737 50065 : && err == GDK_SUCCEED)
738 50065 : err = HEAPsave(bi->vh, nme, "theap", dosync, bi->vhfree, &b->theaplock);
739 : }
740 :
741 265698 : if (err == GDK_SUCCEED) {
742 265698 : MT_lock_set(&b->theaplock);
743 265698 : if (b->theap != bi->h) {
744 1 : assert(b->theap->dirty);
745 1 : b->theap->wasempty = bi->h->wasempty;
746 1 : b->theap->hasfile |= bi->h->hasfile;
747 : }
748 265698 : if (b->tvheap && b->tvheap != bi->vh) {
749 0 : assert(b->tvheap->dirty);
750 0 : b->tvheap->wasempty = bi->vh->wasempty;
751 0 : b->tvheap->hasfile |= bi->vh->hasfile;
752 : }
753 265698 : if (size != b->batCount) {
754 : /* if the size doesn't match, the BAT must be dirty */
755 38013 : b->theap->dirty = true;
756 38013 : if (b->tvheap)
757 10206 : b->tvheap->dirty = true;
758 : }
759 : /* there is something on disk now */
760 265698 : b->batCopiedtodisk = true;
761 265698 : MT_lock_unset(&b->theaplock);
762 265698 : if (locked && b->thash && b->thash != (Hash *) 1)
763 14157 : BAThashsave(b, dosync);
764 : }
765 265697 : if (locked)
766 265697 : MT_rwlock_rdunlock(&b->thashlock);
767 : return err;
768 : }
769 :
770 : gdk_return
771 5173 : BATsave(BAT *b)
772 : {
773 5173 : gdk_return rc;
774 :
775 5173 : BATiter bi = bat_iterator(b);
776 5173 : rc = BATsave_iter(b, &bi, bi.count);
777 5173 : bat_iterator_end(&bi);
778 5173 : return rc;
779 : }
780 :
781 : /*
782 : * TODO: move to gdk_bbp.c
783 : */
784 : BAT *
785 22784 : BATload_intern(bat bid, bool lock)
786 : {
787 22784 : const char *nme;
788 22784 : BAT *b;
789 :
790 22784 : assert(!GDKinmemory(0));
791 22784 : assert(bid > 0);
792 :
793 22784 : nme = BBP_physical(bid);
794 22784 : b = DESCload(bid);
795 :
796 22784 : if (b == NULL) {
797 : return NULL;
798 : }
799 22784 : assert(!GDKinmemory(b->theap->farmid));
800 :
801 : /* LOAD bun heap */
802 22784 : if (b->ttype != TYPE_void) {
803 22784 : b->theap->storage = b->theap->newstorage = STORE_INVALID;
804 22784 : if ((b->batCount == 0 ?
805 3092 : HEAPalloc(b->theap, b->batCapacity, b->twidth) :
806 25876 : HEAPload(b->theap, b->theap->filename, NULL, b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
807 0 : HEAPfree(b->theap, false);
808 0 : return NULL;
809 : }
810 22784 : if (ATOMstorage(b->ttype) == TYPE_msk) {
811 5176 : b->batCapacity = (BUN) (b->theap->size * 8);
812 : } else {
813 17608 : assert(b->theap->size >> b->tshift <= BUN_MAX);
814 17608 : b->batCapacity = (BUN) (b->theap->size >> b->tshift);
815 : }
816 : } else {
817 0 : b->theap->base = NULL;
818 : }
819 :
820 : /* LOAD tail heap */
821 22784 : if (ATOMvarsized(b->ttype)) {
822 5195 : b->tvheap->storage = b->tvheap->newstorage = STORE_INVALID;
823 5195 : if ((b->tvheap->free == 0 ?
824 350 : ATOMheap(b->ttype, b->tvheap, b->batCapacity) :
825 5545 : HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
826 0 : HEAPfree(b->theap, false);
827 0 : HEAPfree(b->tvheap, false);
828 0 : return NULL;
829 : }
830 5195 : if (ATOMstorage(b->ttype) == TYPE_str) {
831 5060 : strCleanHash(b->tvheap, false); /* ensure consistency */
832 : } else {
833 135 : HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0),
834 : BATcount(b));
835 : }
836 : }
837 :
838 : /* initialize descriptor */
839 22784 : b->theap->parentid = b->batCacheid;
840 :
841 : /* load succeeded; register it in BBP */
842 22784 : if (BBPcacheit(b, lock) != GDK_SUCCEED) {
843 0 : HEAPfree(b->theap, false);
844 0 : if (b->tvheap)
845 0 : HEAPfree(b->tvheap, false);
846 0 : return NULL;
847 : }
848 : return b;
849 : }
850 :
851 : /*
852 : * @- BATdelete
853 : * The new behavior is to let the routine produce warnings but always
854 : * succeed. rationale: on a delete, we must get rid of *all* the
855 : * files. We do not have to care about preserving them or be too much
856 : * concerned if a file that had to be deleted was not found (end
857 : * result is still that it does not exist). The past behavior to
858 : * delete some files and then fail was erroneous. The BAT would
859 : * continue to exist with an incorrect disk status, causing havoc
860 : * later on.
861 : *
862 : * NT forces us to close all files before deleting them; in case of
863 : * memory mapped files this means that we have to unload the BATs
864 : * before deleting. This is enforced now.
865 : */
866 : void
867 23805158 : BATdelete(BAT *b)
868 : {
869 23805158 : HASHdestroy(b);
870 23738247 : OIDXdestroy(b);
871 23870848 : PROPdestroy_nolock(b);
872 23840280 : STRMPdestroy(b);
873 23862150 : RTREEdestroy(b);
874 23814503 : if (b->theap) {
875 9173 : HEAPfree(b->theap, true);
876 : }
877 23834513 : if (b->tvheap) {
878 2263 : HEAPfree(b->tvheap, true);
879 : }
880 23834513 : b->batCopiedtodisk = false;
881 23834513 : }
882 :
883 : /*
884 : * BAT specific printing
885 : */
886 :
887 : gdk_return
888 685 : BATprintcolumns(stream *s, int argc, BAT *argv[])
889 : {
890 685 : int i;
891 685 : BUN n, cnt;
892 685 : struct colinfo {
893 : ssize_t (*s) (str *, size_t *, const void *, bool);
894 : BATiter i;
895 : } *colinfo;
896 685 : char *buf;
897 685 : size_t buflen = 0;
898 685 : ssize_t len;
899 685 : gdk_return rc = GDK_SUCCEED;
900 :
901 : /* error checking */
902 2122 : for (i = 0; i < argc; i++) {
903 1439 : if (argv[i] == NULL) {
904 0 : GDKerror("Columns missing\n");
905 0 : return GDK_FAIL;
906 : }
907 1439 : if (BATcount(argv[0]) != BATcount(argv[i])) {
908 2 : GDKerror("Columns must be the same size\n");
909 2 : return GDK_FAIL;
910 : }
911 : }
912 :
913 683 : if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) {
914 0 : GDKerror("Cannot allocate memory\n");
915 0 : return GDK_FAIL;
916 : }
917 :
918 2114 : for (i = 0; i < argc; i++) {
919 1431 : colinfo[i].i = bat_iterator(argv[i]);
920 1431 : colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr;
921 : }
922 :
923 683 : mnstr_write(s, "#--------------------------#\n", 1, 29);
924 683 : mnstr_write(s, "# ", 1, 2);
925 2797 : for (i = 0; i < argc; i++) {
926 1431 : if (i > 0)
927 748 : mnstr_write(s, "\t", 1, 1);
928 1431 : const char *nm = ATOMname(argv[i]->ttype);
929 1431 : mnstr_write(s, nm, 1, strlen(nm));
930 : }
931 683 : mnstr_write(s, " # type\n", 1, 9);
932 683 : mnstr_write(s, "#--------------------------#\n", 1, 29);
933 683 : buf = NULL;
934 :
935 3888 : for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) {
936 3205 : mnstr_write(s, "[ ", 1, 2);
937 13144 : for (i = 0; i < argc; i++) {
938 6734 : len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true);
939 6734 : if (len < 0) {
940 0 : rc = GDK_FAIL;
941 0 : goto bailout;
942 : }
943 6734 : if (i > 0)
944 3529 : mnstr_write(s, ",\t", 1, 2);
945 6734 : mnstr_write(s, buf, 1, len);
946 : }
947 3205 : mnstr_write(s, " ]\n", 1, 4);
948 : }
949 :
950 683 : bailout:
951 2114 : for (i = 0; i < argc; i++) {
952 1431 : bat_iterator_end(&colinfo[i].i);
953 : }
954 683 : GDKfree(buf);
955 683 : GDKfree(colinfo);
956 :
957 683 : return rc;
958 : }
959 :
960 : gdk_return
961 632 : BATprint(stream *fdout, BAT *b)
962 : {
963 632 : if (complex_cand(b)) {
964 0 : struct canditer ci;
965 0 : canditer_init(&ci, NULL, b);
966 0 : oid hseq = ci.hseq;
967 :
968 0 : mnstr_printf(fdout,
969 : "#--------------------------#\n"
970 : "# void\toid # type\n"
971 : "#--------------------------#\n");
972 0 : for (BUN i = 0; i < ci.ncand; i++) {
973 0 : oid o = canditer_next(&ci);
974 0 : mnstr_printf(fdout,
975 : "[ " OIDFMT "@0,\t" OIDFMT "@0 ]\n",
976 : (oid) (i + hseq), o);
977 : }
978 0 : return GDK_SUCCEED;
979 : }
980 :
981 632 : BAT *argv[2];
982 632 : gdk_return ret = GDK_FAIL;
983 :
984 632 : argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
985 632 : if (argv[0]) {
986 632 : argv[1] = b;
987 632 : ret = BATprintcolumns(fdout, 2, argv);
988 632 : BBPunfix(argv[0]->batCacheid);
989 : }
990 : return ret;
991 : }
|