LCOV - code coverage report
Current view: top level - gdk - gdk_storage.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 359 475 75.6 %
Date: 2024-04-26 00:35:57 Functions: 19 19 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a M. L. Kersten, P. Boncz, N. Nes
      15             :  *
      16             :  * @* Database Storage Management
      17             :  * Contains routines for writing and reading GDK data to and from
      18             :  * disk.  This section contains the primitives to manage the
      19             :  * disk-based images of the BATs. It relies on the existence of a UNIX
      20             :  * file system, including memory mapped files. Solaris and IRIX have
      21             :  * different implementations of madvise().
      22             :  *
      23             :  * The current version assumes that all BATs are stored on a single
      24             :  * disk partition. This simplistic assumption should be replaced in
      25             :  * the near future by a multi-volume version. The intention is to use
      26             :  * several BAT home locations.  The files should be owned by the
      27             :  * database server. Otherwise, IO operations are likely to fail. This
      28             :  * is accomplished by setting the GID and UID upon system start.
      29             :  */
      30             : #include "monetdb_config.h"
      31             : #include "gdk.h"
      32             : #include "gdk_private.h"
      33             : #include "mutils.h"
      34             : #ifdef HAVE_FCNTL_H
      35             : #include <fcntl.h>
      36             : #endif
      37             : 
      38             : #ifndef O_CLOEXEC
      39             : #ifdef _O_NOINHERIT
      40             : #define O_CLOEXEC _O_NOINHERIT  /* Windows */
      41             : #else
      42             : #define O_CLOEXEC 0
      43             : #endif
      44             : #endif
      45             : 
      46             : /* GDKfilepath returns a newly allocated string containing the path
      47             :  * name of a database farm.
      48             :  * The arguments are the farmID or -1, the name of a subdirectory
      49             :  * within the farm (i.e., something like BATDIR or BAKDIR -- see
      50             :  * gdk.h) or NULL, the name of a BAT (i.e. the name that is stored in
      51             :  * BBP.dir -- something like 07/714), and finally the file extension.
      52             :  *
      53             :  * If farmid is >= 0, GDKfilepath returns the complete path to the
      54             :  * specified farm concatenated with the other arguments with
      55             :  * appropriate separators.  If farmid is -1, it returns the
      56             :  * concatenation of its other arguments (in this case, the result
      57             :  * cannot be used to access a file directly -- the farm needs to be
      58             :  * prepended in some other place). */
      59             : char *
      60    18830067 : GDKfilepath(int farmid, const char *dir, const char *name, const char *ext)
      61             : {
      62    18830067 :         const char *sep;
      63    18830067 :         size_t pathlen;
      64    18830067 :         char *path;
      65             : 
      66    18830067 :         if (GDKinmemory(farmid))
      67           1 :                 return GDKstrdup(":memory:");
      68             : 
      69    18828156 :         assert(dir == NULL || *dir != DIR_SEP);
      70    18828156 :         assert(farmid == NOFARM ||
      71             :                (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname));
      72    18828156 :         if (!GDKembedded() && MT_path_absolute(name)) {
      73           0 :                 GDKerror("name should not be absolute\n");
      74           0 :                 return NULL;
      75             :         }
      76    18829957 :         if (dir && *dir == DIR_SEP)
      77           0 :                 dir++;
      78    18772178 :         if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) {
      79             :                 sep = "";
      80             :         } else {
      81    18734415 :                 sep = DIR_SEP_STR;
      82             :         }
      83    18829957 :         pathlen = (farmid == NOFARM ? 0 : strlen(BBPfarms[farmid].dirname) + 1) +
      84    18829957 :                 (dir ? strlen(dir) : 0) + strlen(sep) + strlen(name) +
      85    18829957 :                 (ext ? strlen(ext) + 1 : 0) + 1;
      86    18829957 :         path = GDKmalloc(pathlen);
      87    18840190 :         if (path == NULL)
      88             :                 return NULL;
      89    18840190 :         if (farmid == NOFARM) {
      90     2070234 :                 strconcat_len(path, pathlen,
      91             :                               dir ? dir : "", sep, name,
      92             :                               ext ? "." : NULL, ext, NULL);
      93             :         } else {
      94    32140161 :                 strconcat_len(path, pathlen,
      95             :                               BBPfarms[farmid].dirname, DIR_SEP_STR,
      96             :                               dir ? dir : "", sep, name,
      97             :                               ext ? "." : NULL, ext, NULL);
      98             :         }
      99             :         return path;
     100             : }
     101             : 
     102             : /* make sure the parent directory of DIR exists (the argument itself
     103             :  * is usually a file that is to be created) */
     104             : gdk_return
     105        3236 : GDKcreatedir(const char *dir)
     106             : {
     107        3236 :         char path[FILENAME_MAX];
     108        3236 :         char *r;
     109        3236 :         DIR *dirp;
     110             : 
     111        3236 :         TRC_DEBUG(IO_, "GDKcreatedir(%s)\n", dir);
     112        3236 :         assert(!GDKinmemory(0));
     113        3235 :         if (!GDKembedded() && !MT_path_absolute(dir)) {
     114           0 :                 GDKerror("directory '%s' is not absolute\n", dir);
     115           0 :                 return GDK_FAIL;
     116             :         }
     117        3238 :         if (strlen(dir) >= FILENAME_MAX) {
     118           0 :                 GDKerror("directory name too long\n");
     119           0 :                 return GDK_FAIL;
     120             :         }
     121        3238 :         strcpy(path, dir);      /* we know this fits (see above) */
     122             :         /* skip initial /, if any */
     123       42443 :         for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) {
     124       39203 :                 *r = 0;
     125       39203 :                 if (
     126             : #ifdef WIN32
     127             :                         strlen(path) > 3 &&
     128             : #endif
     129       39220 :                         MT_mkdir(path) < 0) {
     130       35776 :                         if (errno != EEXIST) {
     131           0 :                                 GDKsyserror("cannot create directory %s\n", path);
     132           0 :                                 return GDK_FAIL;
     133             :                         }
     134       35776 :                         if ((dirp = opendir(path)) == NULL) {
     135           0 :                                 GDKsyserror("%s cannot open directory\n", path);
     136           0 :                                 return GDK_FAIL;
     137             :                         }
     138             :                         /* it's a directory, we can continue */
     139       35752 :                         closedir(dirp);
     140             :                 }
     141       39205 :                 *r++ = DIR_SEP;
     142             :         }
     143             :         return GDK_SUCCEED;
     144             : }
     145             : 
     146             : /* remove the directory DIRNAME with its file contents; does not
     147             :  * recurse into subdirectories */
     148             : gdk_return
     149       11439 : GDKremovedir(int farmid, const char *dirname)
     150             : {
     151       11439 :         str dirnamestr;
     152       11439 :         DIR *dirp;
     153       11439 :         char *path;
     154       11439 :         struct dirent *dent;
     155       11439 :         int ret;
     156             : 
     157       11439 :         assert(!GDKinmemory(farmid));
     158       11439 :         if ((dirnamestr = GDKfilepath(farmid, NULL, dirname, NULL)) == NULL)
     159             :                 return GDK_FAIL;
     160             : 
     161       11439 :         TRC_DEBUG(IO_, "GDKremovedir(%s)\n", dirnamestr);
     162             : 
     163       11439 :         if ((dirp = opendir(dirnamestr)) == NULL) {
     164         678 :                 GDKfree(dirnamestr);
     165         678 :                 return GDK_SUCCEED;
     166             :         }
     167      242647 :         while ((dent = readdir(dirp)) != NULL) {
     168      231886 :                 if (dent->d_name[0] == '.' &&
     169       21522 :                     (dent->d_name[1] == 0 ||
     170       10761 :                      (dent->d_name[1] == '.' && dent->d_name[2] == 0))) {
     171             :                         /* skip . and .. */
     172       21522 :                         continue;
     173             :                 }
     174      210364 :                 path = GDKfilepath(farmid, dirname, dent->d_name, NULL);
     175      210364 :                 if (path == NULL) {
     176             :                         /* most likely the rmdir will now fail causing
     177             :                          * an error return */
     178             :                         break;
     179             :                 }
     180      210364 :                 ret = MT_remove(path);
     181      210364 :                 if (ret == -1)
     182           0 :                         GDKsyserror("remove(%s) failed\n", path);
     183      210364 :                 TRC_DEBUG(IO_, "Remove %s = %d\n", path, ret);
     184      210364 :                 GDKfree(path);
     185             :         }
     186       10761 :         closedir(dirp);
     187       10761 :         ret = MT_rmdir(dirnamestr);
     188       10761 :         if (ret != 0)
     189           0 :                 GDKsyserror("rmdir(%s) failed\n", dirnamestr);
     190       10761 :         TRC_DEBUG(IO_, "rmdir %s = %d\n", dirnamestr, ret);
     191       10761 :         GDKfree(dirnamestr);
     192       10761 :         return ret ? GDK_FAIL : GDK_SUCCEED;
     193             : }
     194             : 
     195             : #define _FUNBUF         0x040000
     196             : #define _FWRTHR         0x080000
     197             : #define _FRDSEQ         0x100000
     198             : 
     199             : /* open a file and return its file descriptor; the file is specified
     200             :  * using farmid, name and extension; if opening for writing, we create
     201             :  * the parent directory if necessary; if opening for reading, we don't
     202             :  * necessarily report an error if it fails, but we make sure errno is
     203             :  * set */
     204             : int
     205      382970 : GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension)
     206             : {
     207      382970 :         char *path = NULL;
     208      382970 :         int fd, flags = O_CLOEXEC;
     209             : 
     210      382970 :         assert(!GDKinmemory(farmid));
     211      382974 :         if (nme == NULL || *nme == 0) {
     212           0 :                 GDKerror("no name specified\n");
     213           0 :                 errno = EFAULT;
     214           0 :                 return -1;
     215             :         }
     216             : 
     217      382974 :         assert(farmid != NOFARM || extension == NULL);
     218      382974 :         if (farmid != NOFARM) {
     219      381514 :                 path = GDKfilepath(farmid, BATDIR, nme, extension);
     220      381533 :                 if (path == NULL) {
     221           0 :                         errno = ENOMEM;
     222           0 :                         return -1;
     223             :                 }
     224             :                 nme = path;
     225             :         }
     226             : 
     227      382993 :         if (*mode == 'm') {     /* file open for mmap? */
     228           0 :                 mode++;
     229             : #ifdef _CYGNUS_H_
     230             :         } else {
     231             :                 flags |= _FRDSEQ;       /* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */
     232             : #endif
     233             :         }
     234             : 
     235      382993 :         if (strchr(mode, 'w')) {
     236             :                 flags |= O_WRONLY | O_CREAT;
     237       69531 :         } else if (!strchr(mode, '+')) {
     238             :                 flags |= O_RDONLY;
     239             :         } else {
     240       45339 :                 flags |= O_RDWR;
     241             :         }
     242             : #ifdef WIN32
     243             :         flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT;
     244             : #endif
     245      382993 :         fd = MT_open(nme, flags);
     246      383019 :         if (fd < 0 && *mode == 'w') {
     247             :                 /* try to create the directory, in case that was the problem */
     248        3017 :                 if (GDKcreatedir(nme) == GDK_SUCCEED) {
     249        3018 :                         fd = MT_open(nme, flags);
     250        3019 :                         if (fd < 0)
     251           0 :                                 GDKsyserror("cannot open file %s\n", nme);
     252             :                 }
     253             :         }
     254      383021 :         int err = errno;        /* save */
     255             :         /* don't generate error if we can't open a file for reading */
     256      383021 :         GDKfree(path);
     257      383045 :         errno = err;            /* restore */
     258      383045 :         return fd;
     259             : }
     260             : 
     261             : /* like GDKfdlocate, except return a FILE pointer */
     262             : FILE *
     263       11541 : GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension)
     264             : {
     265       11541 :         int fd;
     266       11541 :         FILE *f;
     267             : 
     268       11541 :         if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0)
     269             :                 return NULL;
     270       11320 :         if (*mode == 'm')
     271           0 :                 mode++;
     272       11320 :         if ((f = fdopen(fd, mode)) == NULL) {
     273           0 :                 GDKsyserror("cannot fdopen file\n");
     274           0 :                 close(fd);
     275           0 :                 return NULL;
     276             :         }
     277             :         return f;
     278             : }
     279             : 
     280             : FILE *
     281       10743 : GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode)
     282             : {
     283       10743 :         char *path;
     284             : 
     285             :         /* if name is null, try to get one from dir (in case it was a path) */
     286       10743 :         path = GDKfilepath(farmid, dir, name, extension);
     287             : 
     288       10743 :         if (path != NULL) {
     289       10743 :                 FILE *f;
     290       10743 :                 TRC_DEBUG(IO_, "GDKfileopen(%s)\n", path);
     291       10743 :                 f = MT_fopen(path, mode);
     292       10743 :                 int err = errno;
     293       10743 :                 GDKfree(path);
     294       10743 :                 errno = err;
     295       10743 :                 return f;
     296             :         }
     297             :         return NULL;
     298             : }
     299             : 
     300             : /* remove the file */
     301             : gdk_return
     302       11733 : GDKunlink(int farmid, const char *dir, const char *nme, const char *ext)
     303             : {
     304       11733 :         if (nme && *nme) {
     305       11733 :                 char *path;
     306             : 
     307       11733 :                 path = GDKfilepath(farmid, dir, nme, ext);
     308       11733 :                 if (path == NULL)
     309             :                         return GDK_FAIL;
     310             :                 /* if file already doesn't exist, we don't care */
     311       11733 :                 if (MT_remove(path) != 0 && errno != ENOENT) {
     312           0 :                         GDKsyserror("remove(%s)\n", path);
     313           0 :                         GDKfree(path);
     314           0 :                         return GDK_FAIL;
     315             :                 }
     316       11733 :                 GDKfree(path);
     317       11733 :                 return GDK_SUCCEED;
     318             :         }
     319           0 :         GDKerror("no name specified");
     320           0 :         return GDK_FAIL;
     321             : }
     322             : 
     323             : /*
     324             :  * A move routine is overloaded to deal with extensions.
     325             :  */
     326             : gdk_return
     327      221581 : GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2, bool report)
     328             : {
     329      221581 :         char *path1;
     330      221581 :         char *path2;
     331      221581 :         int ret;
     332      221581 :         lng t0 = GDKusec();
     333             : 
     334      221581 :         if (nme1 == NULL || *nme1 == 0) {
     335           0 :                 GDKerror("no file specified\n");
     336           0 :                 return GDK_FAIL;
     337             :         }
     338      221581 :         path1 = GDKfilepath(farmid, dir1, nme1, ext1);
     339      221581 :         path2 = GDKfilepath(farmid, dir2, nme2, ext2);
     340      221581 :         if (path1 && path2) {
     341      221581 :                 ret = MT_rename(path1, path2);
     342      221581 :                 if (ret < 0 && report)
     343           0 :                         GDKsyserror("cannot rename %s to %s\n", path1, path2);
     344             : 
     345      221581 :                 TRC_DEBUG(IO_, "Move %s %s = %d ("LLFMT" usec)\n", path1, path2, ret, GDKusec() - t0);
     346             :         } else {
     347             :                 ret = -1;
     348             :         }
     349      221581 :         GDKfree(path1);
     350      221581 :         GDKfree(path2);
     351      221581 :         return ret < 0 ? GDK_FAIL : GDK_SUCCEED;
     352             : }
     353             : 
     354             : gdk_return
     355        2595 : GDKextendf(int fd, size_t size, const char *fn)
     356             : {
     357        2595 :         struct stat stb;
     358        2595 :         int rt = 0;
     359        2595 :         lng t0 = GDKusec();
     360             : 
     361        2596 :         assert(!GDKinmemory(0));
     362             : #ifdef __COVERITY__
     363             :         if (fd < 0)          /* in real life, if fd < 0, fstat will fail */
     364             :                 return GDK_FAIL;
     365             : #endif
     366        2596 :         if (fstat(fd, &stb) < 0) {
     367             :                 /* shouldn't happen */
     368           0 :                 GDKsyserror("fstat failed unexpectedly\n");
     369           0 :                 return GDK_FAIL;
     370             :         }
     371             :         /* if necessary, extend the underlying file */
     372        2605 :         if (stb.st_size < (off_t) size) {
     373             : #ifdef HAVE_FALLOCATE
     374        2022 :                 if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 &&
     375           0 :                     errno == EOPNOTSUPP)
     376             :                         /* on Linux, posix_fallocate uses a slow
     377             :                          * method to allocate blocks if the underlying
     378             :                          * file system doesn't support the operation,
     379             :                          * so use fallocate instead and just resize
     380             :                          * the file if it fails */
     381             : #else
     382             : #ifdef HAVE_POSIX_FALLOCATE
     383             :                 /* posix_fallocate returns error number on failure,
     384             :                  * not -1 :-( */
     385             :                 if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL)
     386             :                         /* on Solaris/OpenIndiana, this may mean that
     387             :                          * the underlying file system doesn't support
     388             :                          * the operation, so just resize the file */
     389             : #endif
     390             : #endif
     391             :                         /* we get here when (posix_)fallocate fails
     392             :                          * because it is not supported on the file
     393             :                          * system, or if neither function exists */
     394           0 :                         rt = ftruncate(fd, (off_t) size);
     395        2011 :                 if (rt != 0) {
     396             :                         /* extending failed, try to reduce file size
     397             :                          * back to original */
     398           0 :                         GDKsyserror("could not extend file\n");
     399           0 :                         if (ftruncate(fd, stb.st_size))
     400           0 :                                 GDKsyserror("ftruncate to old size");
     401             :                 }
     402             :         }
     403        2594 :         TRC_DEBUG(IO_, "GDKextend %s %zu -> %zu "LLFMT" usec%s\n",
     404             :                   fn, (size_t) stb.st_size, size,
     405             :                   GDKusec() - t0, rt != 0 ? " (failed)" : "");
     406             :         /* posix_fallocate returns != 0 on failure, fallocate and
     407             :          * ftruncate return -1 on failure, but all three return 0 on
     408             :          * success */
     409        2593 :         return rt != 0 ? GDK_FAIL : GDK_SUCCEED;
     410             : }
     411             : 
     412             : gdk_return
     413        2096 : GDKextend(const char *fn, size_t size)
     414             : {
     415        2096 :         int fd, flags = O_RDWR;
     416        2096 :         gdk_return rt = GDK_FAIL;
     417             : 
     418        2096 :         assert(!GDKinmemory(0));
     419             : #ifdef O_BINARY
     420             :         /* On Windows, open() fails if the file is bigger than 2^32
     421             :          * bytes without O_BINARY. */
     422             :         flags |= O_BINARY;
     423             : #endif
     424        2096 :         if ((fd = MT_open(fn, flags | O_CLOEXEC)) >= 0) {
     425        2103 :                 rt = GDKextendf(fd, size, fn);
     426        2100 :                 close(fd);
     427             :         } else {
     428           0 :                 GDKsyserror("cannot open file %s\n", fn);
     429             :         }
     430        2106 :         return rt;
     431             : }
     432             : 
     433             : /*
     434             :  * @+ Save and load.
     435             :  * The BAT is saved on disk in several files. The extension DESC
     436             :  * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the
     437             :  * other heaps. The storage mechanism off a file can be memory mapped
     438             :  * (STORE_MMAP) or malloced (STORE_MEM).
     439             :  *
     440             :  * These modes indicates the disk-layout and the intended mapping.
     441             :  * The primary concern here is to handle STORE_MMAP and STORE_MEM.
     442             :  */
     443             : gdk_return
     444      302002 : GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync)
     445             : {
     446      302002 :         int err = 0;
     447             : 
     448      302002 :         TRC_DEBUG(IO_, "GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync);
     449             : 
     450      302002 :         assert(!GDKinmemory(farmid));
     451      302002 :         if (mode == STORE_MMAP) {
     452        1017 :                 if (dosync && size && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK))
     453          92 :                         err = MT_msync(buf, size);
     454          92 :                 if (err)
     455           0 :                         GDKerror("error on: name=%s, ext=%s, mode=%d\n",
     456             :                                  nme, ext ? ext : "", (int) mode);
     457        1017 :                 TRC_DEBUG(IO_, "MT_msync(buf %p, size %zu) = %d\n",
     458             :                           buf, size, err);
     459             :         } else {
     460      300985 :                 int fd;
     461             : 
     462      300985 :                 if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) {
     463             :                         /* write() on 64-bits Redhat for IA64 returns
     464             :                          * 32-bits signed result (= OS BUG)! write()
     465             :                          * on Windows only takes unsigned int as
     466             :                          * size */
     467      601970 :                         while (size > 0) {
     468             :                                 /* circumvent problems by writing huge
     469             :                                  * buffers in chunks <= 1GiB */
     470      300985 :                                 ssize_t ret;
     471             : 
     472      601970 :                                 ret = write(fd, buf,
     473             :                                             (unsigned) MIN(1 << 30, size));
     474      300985 :                                 if (ret < 0) {
     475           0 :                                         err = -1;
     476           0 :                                         GDKsyserror("GDKsave: error %zd"
     477             :                                                     " on: name=%s, ext=%s, "
     478             :                                                     "mode=%d\n", ret, nme,
     479             :                                                     ext ? ext : "", (int) mode);
     480           0 :                                         break;
     481             :                                 }
     482      300985 :                                 size -= ret;
     483      300985 :                                 buf = (void *) ((char *) buf + ret);
     484      902955 :                                 TRC_DEBUG(IO_, "Write(fd %d, buf %p"
     485             :                                           ", size %u) = %zd\n",
     486             :                                           fd, buf,
     487             :                                           (unsigned) MIN(1 << 30, size),
     488             :                                           ret);
     489             :                         }
     490      300985 :                         if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
     491             : #if defined(NATIVE_WIN32)
     492             :                             && _commit(fd) < 0
     493             : #elif defined(HAVE_FDATASYNC)
     494         134 :                             && fdatasync(fd) < 0
     495             : #elif defined(HAVE_FSYNC)
     496             :                             && fsync(fd) < 0
     497             : #endif
     498             :                                 ) {
     499           0 :                                 GDKsyserror("GDKsave: error on: name=%s, "
     500             :                                             "ext=%s, mode=%d\n", nme,
     501             :                                             ext ? ext : "", (int) mode);
     502           0 :                                 err = -1;
     503             :                         }
     504      300985 :                         err |= close(fd);
     505      300985 :                         if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) {
     506             :                                 /* do not tolerate corrupt heap images
     507             :                                  * (BBPrecover on restart will kill
     508             :                                  * them) */
     509           0 :                                 GDKerror("could not remove: name=%s, "
     510             :                                          "ext=%s, mode %d\n", nme,
     511             :                                          ext ? ext : "", (int) mode);
     512           0 :                                 return GDK_FAIL;
     513             :                         }
     514             :                 } else {
     515           0 :                         err = -1;
     516           0 :                         GDKerror("failed name=%s, ext=%s, mode %d\n",
     517             :                                  nme, ext ? ext : "", (int) mode);
     518             :                 }
     519             :         }
     520      302002 :         return err ? GDK_FAIL : GDK_SUCCEED;
     521             : }
     522             : 
     523             : /*
     524             :  * Space for the load is directly allocated and the heaps are mapped.
     525             :  * Further initialization of the atom heaps require a separate action
     526             :  * defined in their implementation.
     527             :  *
     528             :  * size -- how much to read
     529             :  * *maxsize -- (in/out) how much to allocate / how much was allocated
     530             :  */
     531             : char *
     532       24688 : GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode)
     533             : {
     534       24688 :         char *ret = NULL;
     535             : 
     536       24688 :         assert(!GDKinmemory(farmid));
     537       24686 :         assert(size <= *maxsize);
     538       24686 :         assert(farmid != NOFARM || ext == NULL);
     539       24686 :         TRC_DEBUG(IO_, "GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode);
     540             : 
     541       24692 :         if (mode == STORE_MEM) {
     542       22589 :                 int fd = GDKfdlocate(farmid, nme, "rb", ext);
     543             : 
     544       22589 :                 if (fd >= 0) {
     545       22589 :                         char *dst = ret = GDKmalloc(*maxsize);
     546       22590 :                         ssize_t n_expected, n = 0;
     547             : 
     548       22590 :                         if (ret) {
     549             :                                 /* read in chunks, some OSs do not
     550             :                                  * give you all at once and Windows
     551             :                                  * only accepts int */
     552       45179 :                                 for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) {
     553       22589 :                                         n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected));
     554       22589 :                                         if (n < 0)
     555           0 :                                                 GDKsyserror("GDKload: cannot read: name=%s, ext=%s, expected %zu, %zd bytes missing\n", nme, ext ? ext : "", size, n_expected);
     556             : #ifndef __COVERITY__
     557             :                                         /* Coverity doesn't seem to
     558             :                                          * recognize that we're just
     559             :                                          * printing the value of ptr,
     560             :                                          * not its contents */
     561       22589 :                                         TRC_DEBUG(IO_, "read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n);
     562             : #endif
     563             : 
     564       22589 :                                         if (n <= 0)
     565             :                                                 break;
     566       22589 :                                         dst += n;
     567             :                                 }
     568       22590 :                                 if (n_expected > 0) {
     569             :                                         /* we couldn't read all, error
     570             :                                          * already generated */
     571           0 :                                         GDKfree(ret);
     572           0 :                                         if (n >= 0) /* don't report error twice  */
     573           0 :                                                 GDKerror("short read from heap %s%s%s, expected %zu, missing %zd\n", nme, ext ? "." : "", ext ? ext : "", size, n_expected);
     574             :                                         ret = NULL;
     575             :                                 }
     576             : #ifndef NDEBUG
     577             :                                 /* just to make valgrind happy, we
     578             :                                  * initialize the whole thing */
     579       22590 :                                 if (ret && *maxsize > size)
     580       15390 :                                         memset(ret + size, 0, *maxsize - size);
     581             : #endif
     582             :                         }
     583       22590 :                         close(fd);
     584             :                 } else {
     585           0 :                         GDKsyserror("cannot open: name=%s, ext=%s\n", nme, ext ? ext : "");
     586             :                 }
     587             :         } else {
     588        2103 :                 char *path = NULL;
     589             : 
     590             :                 /* round up to multiple of GDK_mmap_pagesize with a
     591             :                  * minimum of one */
     592        2103 :                 size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     593        2103 :                 if (size == 0)
     594           0 :                         size = GDK_mmap_pagesize;
     595        2103 :                 if (farmid != NOFARM) {
     596         608 :                         path = GDKfilepath(farmid, BATDIR, nme, ext);
     597         608 :                         nme = path;
     598             :                 }
     599        2103 :                 if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) {
     600        2103 :                         int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL;
     601             : 
     602        2103 :                         if (mode == STORE_PRIV)
     603             :                                 mod |= MMAP_COPY;
     604             :                         else
     605        2103 :                                 mod |= MMAP_SYNC;
     606        2103 :                         ret = GDKmmap(nme, mod, size);
     607        2114 :                         if (ret != NULL) {
     608             :                                 /* success: update allocated size */
     609        2114 :                                 *maxsize = size;
     610             :                         }
     611        2114 :                         TRC_DEBUG(IO_, "mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret);
     612             :                 }
     613        2114 :                 GDKfree(path);
     614             :         }
     615       24704 :         return ret;
     616             : }
     617             : 
     618             : /*
     619             :  * @+ BAT disk storage
     620             :  *
     621             :  * Between sessions the BATs comprising the database are saved on
     622             :  * disk.  To simplify code, we assume a UNIX directory called its
     623             :  * physical @%home@ where they are to be located.  The subdirectories
     624             :  * BAT and PRG contain what its name says.
     625             :  *
     626             :  * A BAT created by @%COLnew@ is considered temporary until one calls
     627             :  * the routine @%BATsave@. This routine reserves disk space and checks
     628             :  * for name clashes.
     629             :  *
     630             :  * Saving and restoring BATs is left to the upper layers. The library
     631             :  * merely copies the data into place.  Failure to read or write the
     632             :  * BAT results in a NULL, otherwise it returns the BAT pointer.
     633             :  */
     634             : static BAT *
     635       20697 : DESCload(int i)
     636             : {
     637       20697 :         const char *s, *nme = BBP_physical(i);
     638       20697 :         BAT *b = NULL;
     639       20697 :         int tt;
     640             : 
     641       20697 :         TRC_DEBUG(IO_, "DESCload: %s\n", nme ? nme : "<noname>");
     642             : 
     643       20697 :         b = BBP_desc(i);
     644             : 
     645       20697 :         if (b->batCacheid == 0) {
     646           0 :                 GDKerror("no descriptor for BAT %d\n", i);
     647           0 :                 return NULL;
     648             :         }
     649             : 
     650       20697 :         MT_lock_set(&b->theaplock);
     651       20697 :         tt = b->ttype;
     652       20697 :         if (tt < 0) {
     653           0 :                 if ((tt = ATOMindex(s = ATOMunknown_name(tt))) < 0) {
     654           0 :                         MT_lock_unset(&b->theaplock);
     655           0 :                         GDKerror("atom '%s' unknown, in BAT '%s'.\n", s, nme);
     656           0 :                         return NULL;
     657             :                 }
     658           0 :                 b->ttype = tt;
     659             :         }
     660             : 
     661             :         /* reconstruct mode from BBP status (BATmode doesn't flush
     662             :          * descriptor, so loaded mode may be stale) */
     663       20697 :         b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0;
     664       20697 :         b->batCopiedtodisk = true;
     665       20697 :         MT_lock_unset(&b->theaplock);
     666       20697 :         return b;
     667             : }
     668             : 
     669             : gdk_return
     670      893531 : BATsave_iter(BAT *b, BATiter *bi, BUN size)
     671             : {
     672      893531 :         gdk_return err = GDK_SUCCEED;
     673      893531 :         bool dosync;
     674      893531 :         bool locked = false;
     675             : 
     676      893531 :         BATcheck(b, GDK_FAIL);
     677             : 
     678      893531 :         if (MT_rwlock_rdtry(&b->thashlock))
     679      893526 :                 locked = true;
     680             : 
     681      893531 :         dosync = (BBP_status(b->batCacheid) & BBPPERSISTENT) != 0;
     682      893531 :         assert(!GDKinmemory(bi->h->farmid));
     683             :         /* views cannot be saved, but make an exception for
     684             :          * force-remapped views */
     685      893531 :         if (isVIEW(b)) {
     686           0 :                 if (locked)
     687           0 :                         MT_rwlock_rdunlock(&b->thashlock);
     688           0 :                 GDKerror("%s is a view on %s; cannot be saved\n", BATgetId(b), BBP_logical(VIEWtparent(b)));
     689           0 :                 return GDK_FAIL;
     690             :         }
     691      893531 :         if (!BATdirtybi(*bi)) {
     692      658042 :                 if (locked)
     693      658042 :                         MT_rwlock_rdunlock(&b->thashlock);
     694      658042 :                 return GDK_SUCCEED;
     695             :         }
     696             : 
     697             :         /* start saving data */
     698      235489 :         if (bi->type != TYPE_void && bi->base == NULL) {
     699           0 :                 assert(BBP_status(b->batCacheid) & BBPSWAPPED);
     700           0 :                 if (dosync && !(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
     701           0 :                         int fd = GDKfdlocate(bi->h->farmid, bi->h->filename, "rb+", NULL);
     702           0 :                         if (fd < 0) {
     703           0 :                                 GDKsyserror("cannot open file %s for sync\n",
     704             :                                             bi->h->filename);
     705           0 :                                 err = GDK_FAIL;
     706             :                         } else {
     707           0 :                                 if (
     708             : #if defined(NATIVE_WIN32)
     709             :                                         _commit(fd) < 0
     710             : #elif defined(HAVE_FDATASYNC)
     711           0 :                                         fdatasync(fd) < 0
     712             : #elif defined(HAVE_FSYNC)
     713             :                                         fsync(fd) < 0
     714             : #endif
     715             :                                         )
     716           0 :                                         GDKsyserror("sync failed for %s\n",
     717             :                                                     bi->h->filename);
     718           0 :                                 close(fd);
     719             :                         }
     720           0 :                         if (bi->vh) {
     721           0 :                                 fd = GDKfdlocate(bi->vh->farmid, bi->vh->filename, "rb+", NULL);
     722           0 :                                 if (fd < 0) {
     723           0 :                                         GDKsyserror("cannot open file %s for sync\n",
     724             :                                                     bi->vh->filename);
     725           0 :                                         err = GDK_FAIL;
     726             :                                 } else {
     727           0 :                                         if (
     728             : #if defined(NATIVE_WIN32)
     729             :                                                 _commit(fd) < 0
     730             : #elif defined(HAVE_FDATASYNC)
     731           0 :                                                 fdatasync(fd) < 0
     732             : #elif defined(HAVE_FSYNC)
     733             :                                                 fsync(fd) < 0
     734             : #endif
     735             :                                                 )
     736           0 :                                                 GDKsyserror("sync failed for %s\n", bi->vh->filename);
     737           0 :                                         close(fd);
     738             :                                 }
     739             :                         }
     740             :                 }
     741             :         } else {
     742      235489 :                 const char *nme = BBP_physical(b->batCacheid);
     743      235489 :                 if ((!bi->copiedtodisk || bi->hdirty)
     744      235446 :                     && (err == GDK_SUCCEED && bi->type)) {
     745      235446 :                         const char *tail = strchr(bi->h->filename, '.') + 1;
     746      235446 :                         err = HEAPsave(bi->h, nme, tail, dosync, bi->hfree, &b->theaplock);
     747             :                 }
     748      235489 :                 if (bi->vh
     749       48397 :                     && (!bi->copiedtodisk || bi->vhdirty)
     750       42253 :                     && ATOMvarsized(bi->type)
     751       42253 :                     && err == GDK_SUCCEED)
     752       42253 :                         err = HEAPsave(bi->vh, nme, "theap", dosync, bi->vhfree, &b->theaplock);
     753             :         }
     754             : 
     755      235489 :         if (err == GDK_SUCCEED) {
     756      235489 :                 MT_lock_set(&b->theaplock);
     757      235489 :                 if (b->theap != bi->h) {
     758           0 :                         assert(b->theap->dirty);
     759           0 :                         b->theap->wasempty = bi->h->wasempty;
     760           0 :                         b->theap->hasfile |= bi->h->hasfile;
     761             :                 }
     762      235489 :                 if (b->tvheap && b->tvheap != bi->vh) {
     763           0 :                         assert(b->tvheap->dirty);
     764           0 :                         b->tvheap->wasempty = bi->vh->wasempty;
     765           0 :                         b->tvheap->hasfile |= bi->vh->hasfile;
     766             :                 }
     767      235489 :                 if (size != b->batCount) {
     768             :                         /* if the size doesn't match, the BAT must be dirty */
     769       29999 :                         b->theap->dirty = true;
     770       29999 :                         if (b->tvheap)
     771        7946 :                                 b->tvheap->dirty = true;
     772             :                 }
     773             :                 /* there is something on disk now */
     774      235489 :                 b->batCopiedtodisk = true;
     775      235489 :                 MT_lock_unset(&b->theaplock);
     776      235489 :                 if (locked &&  b->thash && b->thash != (Hash *) 1)
     777       12784 :                         BAThashsave(b, dosync);
     778             :         }
     779      235484 :         if (locked)
     780      235484 :                 MT_rwlock_rdunlock(&b->thashlock);
     781             :         return err;
     782             : }
     783             : 
     784             : gdk_return
     785         787 : BATsave(BAT *b)
     786             : {
     787         787 :         gdk_return rc;
     788             : 
     789         787 :         BATiter bi = bat_iterator(b);
     790         787 :         rc = BATsave_iter(b, &bi, bi.count);
     791         787 :         bat_iterator_end(&bi);
     792         787 :         return rc;
     793             : }
     794             : 
     795             : /*
     796             :  * TODO: move to gdk_bbp.c
     797             :  */
     798             : BAT *
     799       20696 : BATload_intern(bat bid, bool lock)
     800             : {
     801       20696 :         const char *nme;
     802       20696 :         BAT *b;
     803             : 
     804       20696 :         assert(!GDKinmemory(0));
     805       20696 :         assert(bid > 0);
     806             : 
     807       20696 :         nme = BBP_physical(bid);
     808       20696 :         b = DESCload(bid);
     809             : 
     810       20697 :         if (b == NULL) {
     811             :                 return NULL;
     812             :         }
     813       20697 :         assert(!GDKinmemory(b->theap->farmid));
     814             : 
     815             :         /* LOAD bun heap */
     816       20696 :         if (b->ttype != TYPE_void) {
     817       20697 :                 b->theap->storage = b->theap->newstorage = STORE_INVALID;
     818       20696 :                 if ((b->batCount == 0 ?
     819        2836 :                      HEAPalloc(b->theap, b->batCapacity, b->twidth) :
     820       23533 :                      HEAPload(b->theap, b->theap->filename, NULL, b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
     821           0 :                         HEAPfree(b->theap, false);
     822           0 :                         return NULL;
     823             :                 }
     824       20696 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
     825        4813 :                         b->batCapacity = (BUN) (b->theap->size * 8);
     826             :                 } else {
     827       15883 :                         assert(b->theap->size >> b->tshift <= BUN_MAX);
     828       15883 :                         b->batCapacity = (BUN) (b->theap->size >> b->tshift);
     829             :                 }
     830             :         } else {
     831           0 :                 b->theap->base = NULL;
     832             :         }
     833             : 
     834             :         /* LOAD tail heap */
     835       20695 :         if (ATOMvarsized(b->ttype)) {
     836        4748 :                 b->tvheap->storage = b->tvheap->newstorage = STORE_INVALID;
     837        4748 :                 if ((b->tvheap->free == 0 ?
     838         282 :                      ATOMheap(b->ttype, b->tvheap, b->batCapacity) :
     839        5030 :                      HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ)) != GDK_SUCCEED) {
     840           0 :                         HEAPfree(b->theap, false);
     841           0 :                         HEAPfree(b->tvheap, false);
     842           0 :                         return NULL;
     843             :                 }
     844        4748 :                 if (ATOMstorage(b->ttype) == TYPE_str) {
     845        4613 :                         strCleanHash(b->tvheap, false);      /* ensure consistency */
     846             :                 } else {
     847         135 :                         HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0),
     848             :                                      BATcount(b));
     849             :                 }
     850             :         }
     851             : 
     852             :         /* initialize descriptor */
     853       20695 :         b->theap->parentid = b->batCacheid;
     854             : 
     855             :         /* load succeeded; register it in BBP */
     856       20695 :         if (BBPcacheit(b, lock) != GDK_SUCCEED) {
     857           0 :                 HEAPfree(b->theap, false);
     858           0 :                 if (b->tvheap)
     859           0 :                         HEAPfree(b->tvheap, false);
     860           0 :                 return NULL;
     861             :         }
     862             :         return b;
     863             : }
     864             : 
     865             : /*
     866             :  * @- BATdelete
     867             :  * The new behavior is to let the routine produce warnings but always
     868             :  * succeed.  rationale: on a delete, we must get rid of *all* the
     869             :  * files. We do not have to care about preserving them or be too much
     870             :  * concerned if a file that had to be deleted was not found (end
     871             :  * result is still that it does not exist). The past behavior to
     872             :  * delete some files and then fail was erroneous. The BAT would
     873             :  * continue to exist with an incorrect disk status, causing havoc
     874             :  * later on.
     875             :  *
     876             :  * NT forces us to close all files before deleting them; in case of
     877             :  * memory mapped files this means that we have to unload the BATs
     878             :  * before deleting. This is enforced now.
     879             :  */
     880             : void
     881    21442059 : BATdelete(BAT *b)
     882             : {
     883    21442059 :         HASHdestroy(b);
     884    21414497 :         IMPSdestroy(b);
     885    21404719 :         OIDXdestroy(b);
     886    21398744 :         PROPdestroy_nolock(b);
     887    21405746 :         STRMPdestroy(b);
     888    21390034 :         RTREEdestroy(b);
     889    21400587 :         if (b->theap) {
     890        3779 :                 HEAPfree(b->theap, true);
     891             :         }
     892    21418761 :         if (b->tvheap) {
     893         190 :                 HEAPfree(b->tvheap, true);
     894             :         }
     895    21418761 :         b->batCopiedtodisk = false;
     896    21418761 : }
     897             : 
     898             : /*
     899             :  * BAT specific printing
     900             :  */
     901             : 
     902             : gdk_return
     903         687 : BATprintcolumns(stream *s, int argc, BAT *argv[])
     904             : {
     905         687 :         int i;
     906         687 :         BUN n, cnt;
     907         687 :         struct colinfo {
     908             :                 ssize_t (*s) (str *, size_t *, const void *, bool);
     909             :                 BATiter i;
     910             :         } *colinfo;
     911         687 :         char *buf;
     912         687 :         size_t buflen = 0;
     913         687 :         ssize_t len;
     914         687 :         gdk_return rc = GDK_SUCCEED;
     915             : 
     916             :         /* error checking */
     917        2128 :         for (i = 0; i < argc; i++) {
     918        1443 :                 if (argv[i] == NULL) {
     919           0 :                         GDKerror("Columns missing\n");
     920           0 :                         return GDK_FAIL;
     921             :                 }
     922        1443 :                 if (BATcount(argv[0]) != BATcount(argv[i])) {
     923           2 :                         GDKerror("Columns must be the same size\n");
     924           2 :                         return GDK_FAIL;
     925             :                 }
     926             :         }
     927             : 
     928         685 :         if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) {
     929           0 :                 GDKerror("Cannot allocate memory\n");
     930           0 :                 return GDK_FAIL;
     931             :         }
     932             : 
     933        2120 :         for (i = 0; i < argc; i++) {
     934        1435 :                 colinfo[i].i = bat_iterator(argv[i]);
     935        1435 :                 colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr;
     936             :         }
     937             : 
     938         685 :         mnstr_write(s, "#--------------------------#\n", 1, 29);
     939         685 :         mnstr_write(s, "# ", 1, 2);
     940        2805 :         for (i = 0; i < argc; i++) {
     941        1435 :                 if (i > 0)
     942         750 :                         mnstr_write(s, "\t", 1, 1);
     943        1435 :                 const char *nm = ATOMname(argv[i]->ttype);
     944        1435 :                 mnstr_write(s, nm, 1, strlen(nm));
     945             :         }
     946         685 :         mnstr_write(s, "  # type\n", 1, 9);
     947         685 :         mnstr_write(s, "#--------------------------#\n", 1, 29);
     948         685 :         buf = NULL;
     949             : 
     950        3896 :         for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) {
     951        3211 :                 mnstr_write(s, "[ ", 1, 2);
     952       13168 :                 for (i = 0; i < argc; i++) {
     953        6746 :                         len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true);
     954        6746 :                         if (len < 0) {
     955           0 :                                 rc = GDK_FAIL;
     956           0 :                                 goto bailout;
     957             :                         }
     958        6746 :                         if (i > 0)
     959        3535 :                                 mnstr_write(s, ",\t", 1, 2);
     960        6746 :                         mnstr_write(s, buf, 1, len);
     961             :                 }
     962        3211 :                 mnstr_write(s, "  ]\n", 1, 4);
     963             :         }
     964             : 
     965         685 :   bailout:
     966        2120 :         for (i = 0; i < argc; i++) {
     967        1435 :                 bat_iterator_end(&colinfo[i].i);
     968             :         }
     969         685 :         GDKfree(buf);
     970         685 :         GDKfree(colinfo);
     971             : 
     972         685 :         return rc;
     973             : }
     974             : 
     975             : gdk_return
     976         634 : BATprint(stream *fdout, BAT *b)
     977             : {
     978         634 :         if (complex_cand(b)) {
     979           0 :                 struct canditer ci;
     980           0 :                 canditer_init(&ci, NULL, b);
     981           0 :                 oid hseq = ci.hseq;
     982             : 
     983           0 :                 mnstr_printf(fdout,
     984             :                              "#--------------------------#\n"
     985             :                              "# void\toid  # type\n"
     986             :                              "#--------------------------#\n");
     987           0 :                 for (BUN i = 0; i < ci.ncand; i++) {
     988           0 :                         oid o = canditer_next(&ci);
     989           0 :                         mnstr_printf(fdout,
     990             :                                      "[ " OIDFMT "@0,\t" OIDFMT "@0  ]\n",
     991             :                                      (oid) (i + hseq), o);
     992             :                 }
     993           0 :                 return GDK_SUCCEED;
     994             :         }
     995             : 
     996         634 :         BAT *argv[2];
     997         634 :         gdk_return ret = GDK_FAIL;
     998             : 
     999         634 :         argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
    1000         634 :         if (argv[0]) {
    1001         634 :                 argv[1] = b;
    1002         634 :                 ret = BATprintcolumns(fdout, 2, argv);
    1003         634 :                 BBPunfix(argv[0]->batCacheid);
    1004             :         }
    1005             :         return ret;
    1006             : }

Generated by: LCOV version 1.14