LCOV - code coverage report
Current view: top level - gdk - gdk_heap.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 501 675 74.2 %
Date: 2024-04-25 20:03:45 Functions: 21 21 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @f gdk_heap
      15             :  * @a Peter Boncz, Wilko Quak
      16             :  * @+ Atom Heaps
      17             :  * Heaps are the basic mass storage structure of Monet. A heap is a
      18             :  * handle to a large, possibly huge, contiguous area of main memory,
      19             :  * that can be allocated in various ways (discriminated by the
      20             :  * heap->storage field):
      21             :  *
      22             :  * @table @code
      23             :  * @item STORE_MEM: malloc-ed memory
      24             :  * small (or rather: not huge) heaps are allocated with GDKmalloc.
      25             :  * Notice that GDKmalloc may redirect big requests to anonymous
      26             :  * virtual memory to prevent @emph{memory fragmentation} in the malloc
      27             :  * library (see gdk_utils.c).
      28             :  *
      29             :  * @item STORE_MMAP: read-only mapped region
      30             :  * this is a file on disk that is mapped into virtual memory.  This is
      31             :  * normally done MAP_SHARED, so we can use msync() to commit dirty
      32             :  * data using the OS virtual memory management.
      33             :  *
      34             :  * @item STORE_PRIV: read-write mapped region
      35             :  * in order to preserve ACID properties, we use a different memory
      36             :  * mapping on virtual memory that is writable. This is because in case
      37             :  * of a crash on a dirty STORE_MMAP heap, the OS may have written some
      38             :  * of the dirty pages to disk and other not (but it is impossible to
      39             :  * determine which).  The OS MAP_PRIVATE mode does not modify the file
      40             :  * on which is being mapped, rather creates substitute pages
      41             :  * dynamically taken from the swap file when modifications occur. This
      42             :  * is the only way to make writing to mmap()-ed regions safe.  To save
      43             :  * changes, we created a new file X.new; as some OS-es do not allow to
      44             :  * write into a file that has a mmap open on it (e.g. Windows).  Such
      45             :  * X.new files take preference over X files when opening them.
      46             :  * @end table
      47             :  * Read also the discussion in BATsetaccess (gdk_bat.c).
      48             :  */
      49             : #include "monetdb_config.h"
      50             : #include "gdk.h"
      51             : #include "gdk_private.h"
      52             : #include "mutils.h"
      53             : 
      54             : static void *
      55        1559 : HEAPcreatefile(int farmid, size_t *maxsz, const char *fn)
      56             : {
      57        1559 :         void *base = NULL;
      58        1559 :         char *path = NULL;
      59        1559 :         int fd;
      60             : 
      61        1559 :         if (farmid != NOFARM) {
      62             :                 /* call GDKfilepath once here instead of twice inside
      63             :                  * the calls to GDKfdlocate and GDKload */
      64        1064 :                 if ((path = GDKfilepath(farmid, BATDIR, fn, NULL)) == NULL)
      65             :                         return NULL;
      66             :                 fn = path;
      67             :         }
      68             :         /* round up to mulitple of GDK_mmap_pagesize */
      69        1557 :         fd = GDKfdlocate(NOFARM, fn, "wb", NULL);
      70        1559 :         if (fd >= 0) {
      71        1559 :                 close(fd);
      72        1559 :                 base = GDKload(NOFARM, fn, NULL, *maxsz, maxsz, STORE_MMAP);
      73             :         }
      74        1559 :         GDKfree(path);
      75        1559 :         return base;
      76             : }
      77             : 
      78             : static char *
      79       45489 : decompose_filename(str nme)
      80             : {
      81       45489 :         char *ext;
      82             : 
      83       45489 :         ext = strchr(nme, '.'); /* extract base and ext from heap file name */
      84       45489 :         if (ext) {
      85       45489 :                 *ext++ = 0;
      86             :         }
      87       45489 :         return ext;
      88             : }
      89             : 
      90             : /* this function is called with the theaplock held */
      91             : gdk_return
      92       43682 : HEAPgrow(Heap **hp, size_t size, bool mayshare)
      93             : {
      94       43682 :         Heap *new;
      95             : 
      96       43682 :         ATOMIC_BASE_TYPE refs = ATOMIC_GET(&(*hp)->refs);
      97       43682 :         if ((refs & HEAPREFS) == 1) {
      98       43614 :                 return HEAPextend((*hp), size, mayshare);
      99             :         }
     100          68 :         new = GDKmalloc(sizeof(Heap));
     101          68 :         if (new != NULL) {
     102          68 :                 Heap *old = *hp;
     103          68 :                 *new = (Heap) {
     104          68 :                         .farmid = old->farmid,
     105             :                         .dirty = true,
     106          68 :                         .parentid = old->parentid,
     107          68 :                         .wasempty = old->wasempty,
     108          68 :                         .hasfile = old->hasfile,
     109             :                 };
     110          68 :                 memcpy(new->filename, old->filename, sizeof(new->filename));
     111          68 :                 if (HEAPalloc(new, size, 1) == GDK_SUCCEED) {
     112          68 :                         ATOMIC_INIT(&new->refs, 1 | (refs & HEAPREMOVE));
     113          68 :                         new->free = old->free;
     114          68 :                         new->cleanhash = old->cleanhash;
     115          68 :                         if (old->free > 0 &&
     116          63 :                             (new->storage == STORE_MEM || old->storage == STORE_MEM))
     117          46 :                                 memcpy(new->base, old->base, old->free);
     118             :                         /* else both are STORE_MMAP and refer to the
     119             :                          * same file and so we don't need to copy */
     120             : 
     121             :                         /* replace old heap with new */
     122          68 :                         HEAPdecref(*hp, false);
     123          68 :                         *hp = new;
     124             :                 } else {
     125           0 :                         GDKfree(new);
     126           0 :                         new = NULL;
     127             :                 }
     128             :         }
     129          68 :         return new ? GDK_SUCCEED : GDK_FAIL;
     130             : }
     131             : 
     132             : /*
     133             :  * @- HEAPalloc
     134             :  *
     135             :  * Normally, we use GDKmalloc for creating a new heap.  Huge heaps,
     136             :  * though, come from memory mapped files that we create with a large
     137             :  * fallocate. This is fast, and leads to files-with-holes on Unixes (on
     138             :  * Windows, it actually always performs I/O which is not nice).
     139             :  */
     140             : gdk_return
     141     7396086 : HEAPalloc(Heap *h, size_t nitems, size_t itemsize)
     142             : {
     143     7396086 :         size_t size = 0;
     144     7396086 :         QryCtx *qc = h->farmid == 1 ? MT_thread_get_qry_ctx() : NULL;
     145             : 
     146     7394607 :         h->base = NULL;
     147     7394607 :         h->size = 1;
     148     7394607 :         if (itemsize) {
     149             :                 /* check for overflow */
     150     7394607 :                 if (nitems > BUN_NONE / itemsize) {
     151           0 :                         GDKerror("allocating more than heap can accomodate\n");
     152           0 :                         return GDK_FAIL;
     153             :                 }
     154     7394607 :                 h->size = MAX(1, nitems) * itemsize;
     155             :         }
     156     7394607 :         h->free = 0;
     157     7394607 :         h->cleanhash = false;
     158             : 
     159             : #ifdef SIZE_CHECK_IN_HEAPS_ONLY
     160     7394607 :         if (GDKvm_cursize() + h->size >= GDK_vm_maxsize &&
     161           0 :             !MT_thread_override_limits()) {
     162           0 :                 GDKerror("allocating too much memory (current: %zu, requested: %zu, limit: %zu)\n", GDKvm_cursize(), h->size, GDK_vm_maxsize);
     163           0 :                 return GDK_FAIL;
     164             :         }
     165             : #endif
     166             : 
     167     7393641 :         size_t allocated;
     168     7393641 :         if (GDKinmemory(h->farmid) ||
     169    14761523 :             ((allocated = GDKmem_cursize()) + h->size < GDK_mem_maxsize &&
     170     7380135 :              h->size < (h->farmid == 0 ? GDK_mmap_minsize_persistent : GDK_mmap_minsize_transient) &&
     171     7380160 :              h->size < ((GDK_mem_maxsize - allocated) >> 6))) {
     172     7391462 :                 h->storage = STORE_MEM;
     173     7391462 :                 size = h->size;
     174     7391462 :                 if (qc != NULL) {
     175     4161873 :                         ATOMIC_BASE_TYPE sz = ATOMIC_ADD(&qc->datasize, size);
     176     4161873 :                         sz += size;
     177     4161873 :                         if (qc->maxmem > 0 && sz > qc->maxmem) {
     178           0 :                                 ATOMIC_SUB(&qc->datasize, size);
     179           0 :                                 GDKerror("Query using too much memory.\n");
     180           0 :                                 return GDK_FAIL;
     181             :                         }
     182             :                 }
     183     7391462 :                 h->base = GDKmalloc(size);
     184     7392616 :                 TRC_DEBUG(HEAP, "%s %zu %p\n", h->filename, size, h->base);
     185     7392616 :                 if (h->base == NULL && qc != NULL)
     186           0 :                         ATOMIC_SUB(&qc->datasize, size);
     187             :         }
     188             : 
     189     7392831 :         if (h->base == NULL && !GDKinmemory(h->farmid)) {
     190         495 :                 char *nme = GDKfilepath(h->farmid, BATDIR, h->filename, NULL);
     191         495 :                 if (nme == NULL)
     192             :                         return GDK_FAIL;
     193         495 :                 h->storage = STORE_MMAP;
     194         495 :                 h->size = (h->size + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     195         495 :                 size = h->size;
     196         495 :                 if (qc != NULL) {
     197          33 :                         ATOMIC_BASE_TYPE sz = ATOMIC_ADD(&qc->datasize, size);
     198          33 :                         sz += size;
     199          33 :                         if (qc->maxmem > 0 && sz > qc->maxmem) {
     200           0 :                                 ATOMIC_SUB(&qc->datasize, size);
     201           0 :                                 GDKerror("Query using too much memory.\n");
     202           0 :                                 return GDK_FAIL;
     203             :                         }
     204             :                 }
     205         495 :                 h->base = HEAPcreatefile(NOFARM, &h->size, nme);
     206         495 :                 h->hasfile = true;
     207         495 :                 if (h->base == NULL) {
     208           0 :                         if (qc != NULL)
     209           0 :                                 ATOMIC_SUB(&qc->datasize, size);
     210             :                         /* remove file we may just have created
     211             :                          * it may or may not exist, depending on what
     212             :                          * failed */
     213           0 :                         (void) MT_remove(nme);
     214           0 :                         GDKfree(nme);
     215           0 :                         h->hasfile = false; /* just removed it */
     216           0 :                         GDKerror("Insufficient space for HEAP of %zu bytes.", h->size);
     217           0 :                         return GDK_FAIL;
     218             :                 }
     219         495 :                 GDKfree(nme);
     220         495 :                 TRC_DEBUG(HEAP, "%s %zu %p (mmap)\n", h->filename, size, h->base);
     221             :         }
     222     7392831 :         h->newstorage = h->storage;
     223     7392831 :         return GDK_SUCCEED;
     224             : }
     225             : 
     226             : /* Extend the allocated space of the heap H to be at least SIZE bytes.
     227             :  * If the heap grows beyond a threshold and a filename is known, the
     228             :  * heap is converted from allocated memory to a memory-mapped file.
     229             :  * When switching from allocated to memory mapped, if MAYSHARE is set,
     230             :  * the heap does not have to be copy-on-write.
     231             :  *
     232             :  * The function returns 0 on success, -1 on failure.
     233             :  *
     234             :  * When extending a memory-mapped heap, we use the function MT_mremap
     235             :  * (which see).  When extending an allocated heap, we use GDKrealloc.
     236             :  * If that fails, we switch to memory mapped, even when the size is
     237             :  * below the threshold.
     238             :  *
     239             :  * When converting from allocated to memory mapped, we try several
     240             :  * strategies.  First we try to create the memory map, and if that
     241             :  * works, copy the data and free the old memory.  If this fails, we
     242             :  * first write the data to disk, free the memory, and then try to
     243             :  * memory map the saved data. */
     244             : gdk_return
     245       59988 : HEAPextend(Heap *h, size_t size, bool mayshare)
     246             : {
     247       59988 :         size_t osize = h->size;
     248       59988 :         size_t xsize;
     249       59988 :         QryCtx *qc = h->farmid == 1 ? MT_thread_get_qry_ctx() : NULL;
     250             : 
     251       59981 :         if (size <= h->size)
     252             :                 return GDK_SUCCEED;     /* nothing to do */
     253             : 
     254       45528 :         char nme[sizeof(h->filename)], *ext;
     255       45528 :         const char *failure = "None";
     256             : 
     257       45528 :         if (GDKinmemory(h->farmid)) {
     258          45 :                 strcpy_len(nme, ":memory:", sizeof(nme));
     259          45 :                 ext = "ext";
     260             :         } else {
     261       45485 :                 strcpy_len(nme, h->filename, sizeof(nme));
     262       45477 :                 ext = decompose_filename(nme);
     263             :         }
     264       45530 :         failure = "size > h->size";
     265             : 
     266             : #ifdef SIZE_CHECK_IN_HEAPS_ONLY
     267       45530 :         if (GDKvm_cursize() + size - h->size >= GDK_vm_maxsize &&
     268           0 :             !MT_thread_override_limits()) {
     269           0 :                 GDKerror("allocating too much memory (current: %zu, requested: %zu, limit: %zu)\n", GDKvm_cursize(), size - h->size, GDK_vm_maxsize);
     270           0 :                 return GDK_FAIL;
     271             :         }
     272             : #endif
     273             : 
     274       45528 :         if (h->storage != STORE_MEM) {
     275         568 :                 char *p;
     276         568 :                 char *path;
     277             : 
     278         568 :                 assert(h->hasfile);
     279         568 :                 TRC_DEBUG(HEAP, "Extending %s mmapped heap (%s)\n", h->storage == STORE_MMAP ? "shared" : "privately", h->filename);
     280             :                 /* extend memory mapped file */
     281         568 :                 if ((path = GDKfilepath(h->farmid, BATDIR, nme, ext)) == NULL) {
     282             :                         return GDK_FAIL;
     283             :                 }
     284         568 :                 size = (size + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     285         568 :                 if (size == 0)
     286           0 :                         size = GDK_mmap_pagesize;
     287             : 
     288         568 :                 xsize = size - osize;
     289             : 
     290         568 :                 if (qc != NULL) {
     291         157 :                         ATOMIC_BASE_TYPE sz = ATOMIC_ADD(&qc->datasize, xsize);
     292         157 :                         sz += size - osize;
     293         157 :                         if (qc->maxmem > 0 && sz > qc->maxmem) {
     294           0 :                                 GDKerror("Query using too much memory.\n");
     295           0 :                                 ATOMIC_SUB(&qc->datasize, xsize);
     296           0 :                                 return GDK_FAIL;
     297             :                         }
     298             :                 }
     299        1136 :                 p = GDKmremap(path,
     300             :                               h->storage == STORE_PRIV ?
     301             :                                 MMAP_COPY | MMAP_READ | MMAP_WRITE :
     302             :                                 MMAP_READ | MMAP_WRITE,
     303             :                               h->base, h->size, &size);
     304         568 :                 GDKfree(path);
     305         568 :                 if (p) {
     306         568 :                         h->size = size;
     307         568 :                         h->base = p;
     308         568 :                         return GDK_SUCCEED; /* success */
     309             :                 }
     310           0 :                 if (qc != NULL)
     311           0 :                         ATOMIC_SUB(&qc->datasize, xsize);
     312             :                 failure = "GDKmremap() failed";
     313             :         } else {
     314             :                 /* extend a malloced heap, possibly switching over to
     315             :                  * file-mapped storage */
     316       44960 :                 Heap bak = *h;
     317       44960 :                 size_t allocated;
     318       44960 :                 bool must_mmap = (!GDKinmemory(h->farmid) &&
     319       44907 :                                    (h->newstorage != STORE_MEM ||
     320       89820 :                                     (allocated = GDKmem_cursize()) + size >= GDK_mem_maxsize ||
     321       44915 :                                     size >= (h->farmid == 0 ? GDK_mmap_minsize_persistent : GDK_mmap_minsize_transient) ||
     322       43829 :                                     size >= ((GDK_mem_maxsize - allocated) >> 6)));
     323             : 
     324       44959 :                 h->size = size;
     325       44959 :                 xsize = size - osize;
     326             : 
     327             :                 /* try GDKrealloc if the heap size stays within
     328             :                  * reasonable limits */
     329       44959 :                 if (!must_mmap) {
     330       43857 :                         if (qc != NULL) {
     331       20292 :                                 ATOMIC_BASE_TYPE sz = ATOMIC_ADD(&qc->datasize, xsize);
     332       20292 :                                 sz += xsize;
     333       20292 :                                 if (qc->maxmem > 0 && sz > qc->maxmem) {
     334           0 :                                         GDKerror("Query using too much memory.\n");
     335           0 :                                         ATOMIC_SUB(&qc->datasize, xsize);
     336           0 :                                         *h = bak;
     337       44965 :                                         return GDK_FAIL;
     338             :                                 }
     339             :                         }
     340       43857 :                         h->newstorage = h->storage = STORE_MEM;
     341       43857 :                         h->base = GDKrealloc(h->base, size);
     342       43869 :                         TRC_DEBUG(HEAP, "Extending malloced heap %s %zu->%zu %p->%p\n", h->filename, bak.size, size, bak.base, h->base);
     343       43869 :                         if (h->base) {
     344             :                                 return GDK_SUCCEED; /* success */
     345             :                         }
     346             :                         /* bak.base is still valid and may get restored */
     347           0 :                         failure = "h->storage == STORE_MEM && !must_map && !h->base";
     348           0 :                         if (qc != NULL)
     349           0 :                                 ATOMIC_SUB(&qc->datasize, xsize);
     350             :                 }
     351             : 
     352        1102 :                 if (!GDKinmemory(h->farmid)) {
     353             :                         /* too big: convert it to a disk-based temporary heap */
     354             : 
     355        1089 :                         assert(h->storage == STORE_MEM);
     356        1089 :                         assert(ext != NULL);
     357             :                         /* if the heap file already exists, we want to switch
     358             :                          * to STORE_PRIV (copy-on-write memory mapped files),
     359             :                          * but if the heap file doesn't exist yet, the BAT is
     360             :                          * new and we can use STORE_MMAP */
     361        1089 :                         int fd = GDKfdlocate(h->farmid, nme, "rb", ext);
     362        1087 :                         if (fd >= 0) {
     363          32 :                                 assert(h->hasfile);
     364          32 :                                 close(fd);
     365          32 :                                 fd = GDKfdlocate(h->farmid, nme, "wb", ext);
     366          32 :                                 if (fd >= 0) {
     367          32 :                                         gdk_return rc = GDKextendf(fd, size, nme);
     368          32 :                                         close(fd);
     369          32 :                                         if (rc != GDK_SUCCEED) {
     370           0 :                                                 failure = "h->storage == STORE_MEM && can_map && fd >= 0 && GDKextendf() != GDK_SUCCEED";
     371           0 :                                                 goto failed;
     372             :                                         }
     373          32 :                                         h->storage = h->newstorage == STORE_MMAP && !mayshare ? STORE_PRIV : h->newstorage;
     374             :                                         /* make sure we really MMAP */
     375          32 :                                         if (must_mmap && h->newstorage == STORE_MEM)
     376          32 :                                                 h->storage = STORE_MMAP;
     377          32 :                                         h->newstorage = h->storage;
     378             : 
     379          32 :                                         h->base = NULL;
     380          32 :                                         TRC_DEBUG(HEAP, "Converting malloced to %s mmapped heap %s\n", h->newstorage == STORE_MMAP ? "shared" : "privately", h->filename);
     381             :                                         /* try to allocate a memory-mapped based
     382             :                                          * heap */
     383          32 :                                         if (HEAPload(h, nme, ext, false) == GDK_SUCCEED) {
     384             :                                                 /* copy data to heap and free old
     385             :                                                  * memory */
     386          32 :                                                 memcpy(h->base, bak.base, bak.free);
     387          32 :                                                 HEAPfree(&bak, false);
     388          32 :                                                 return GDK_SUCCEED;
     389             :                                         }
     390             :                                         failure = "h->storage == STORE_MEM && can_map && fd >= 0 && HEAPload() != GDK_SUCCEED";
     391             :                                         /* we failed */
     392             :                                 } else {
     393             :                                         failure = "h->storage == STORE_MEM && can_map && fd < 0";
     394             :                                 }
     395             :                         } else {
     396             :                                 /* no pre-existing heap file, so create a new
     397             :                                  * one */
     398        1055 :                                 if (qc != NULL) {
     399          12 :                                         h->size = (h->size + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     400          12 :                                         xsize = h->size;
     401          12 :                                         ATOMIC_BASE_TYPE sz = ATOMIC_ADD(&qc->datasize, xsize);
     402          12 :                                         sz += h->size;
     403          12 :                                         if (qc->maxmem > 0 && sz > qc->maxmem) {
     404           0 :                                                 GDKerror("Query using too much memory.\n");
     405           0 :                                                 sz = ATOMIC_ADD(&qc->datasize, xsize);
     406           0 :                                                 *h = bak;
     407           0 :                                                 return GDK_FAIL;
     408             :                                         }
     409             :                                 }
     410        1055 :                                 h->base = HEAPcreatefile(h->farmid, &h->size, h->filename);
     411        1064 :                                 h->hasfile = true;
     412        1064 :                                 if (h->base) {
     413        1064 :                                         h->newstorage = h->storage = STORE_MMAP;
     414        1064 :                                         if (bak.free > 0)
     415          41 :                                                 memcpy(h->base, bak.base, bak.free);
     416        1064 :                                         HEAPfree(&bak, false);
     417        1064 :                                         return GDK_SUCCEED;
     418             :                                 }
     419           0 :                                 failure = "h->storage == STORE_MEM && can_map && fd >= 0 && HEAPcreatefile() == NULL";
     420           0 :                                 if (qc != NULL)
     421           0 :                                         ATOMIC_SUB(&qc->datasize, xsize);
     422             :                         }
     423             :                 }
     424           0 :           failed:
     425           0 :                 if (h->hasfile && !bak.hasfile) {
     426           0 :                         char *path = GDKfilepath(h->farmid, BATDIR, nme, ext);
     427           0 :                         if (path) {
     428           0 :                                 MT_remove(path);
     429           0 :                                 GDKfree(path);
     430             :                         } else {
     431             :                                 /* couldn't remove, so now we have a file */
     432           0 :                                 bak.hasfile = true;
     433             :                         }
     434             :                 }
     435           0 :                 *h = bak;
     436             :         }
     437           0 :         GDKerror("failed to extend to %zu for %s%s%s: %s\n",
     438             :                  size, nme, ext ? "." : "", ext ? ext : "", failure);
     439           0 :         return GDK_FAIL;
     440             : }
     441             : 
     442             : /* grow the string offset heap so that the value v fits (i.e. wide
     443             :  * enough to fit the value), and it has space for at least cap elements;
     444             :  * copy ncopy BUNs, or up to the heap size, whichever is smaller */
     445             : gdk_return
     446       99878 : GDKupgradevarheap(BAT *b, var_t v, BUN cap, BUN ncopy)
     447             : {
     448       99878 :         uint8_t shift = b->tshift;
     449       99878 :         uint16_t width = b->twidth;
     450       99878 :         uint8_t *pc;
     451       99878 :         uint16_t *ps;
     452       99878 :         uint32_t *pi;
     453             : #if SIZEOF_VAR_T == 8
     454       99878 :         uint64_t *pl;
     455             : #endif
     456       99878 :         size_t i, n;
     457       99878 :         size_t newsize;
     458       99878 :         bat bid = b->batCacheid;
     459       99878 :         Heap *old, *new;
     460             : 
     461       99878 :         old = b->theap;
     462             : //      assert(old->storage == STORE_MEM);
     463       99878 :         assert(old->parentid == b->batCacheid);
     464       99878 :         assert(b->tbaseoff == 0);
     465       99878 :         assert(width != 0);
     466       99878 :         assert(v >= GDK_VAROFFSET);
     467             : 
     468      129703 :         while (width < SIZEOF_VAR_T && (width <= 2 ? v - GDK_VAROFFSET : v) >= ((var_t) 1 << (8 * width))) {
     469       29825 :                 width <<= 1;
     470       29825 :                 shift++;
     471             :         }
     472             :         /* if cap(acity) given (we check whether it is larger than the
     473             :          * old), then grow to cap */
     474       99878 :         if (cap > (old->size >> b->tshift))
     475        4546 :                 newsize = cap << shift;
     476             :         else
     477       95332 :                 newsize = (old->size >> b->tshift) << shift;
     478       99878 :         if (b->twidth == width) {
     479       71348 :                 if (newsize <= old->size) {
     480             :                         /* nothing to do */
     481       68174 :                         if (cap > b->batCapacity)
     482           0 :                                 BATsetcapacity(b, cap);
     483       68174 :                         return GDK_SUCCEED;
     484             :                 }
     485        3174 :                 return BATextend(b, newsize >> shift);
     486             :         }
     487             : 
     488       28530 :         n = MIN(ncopy, old->size >> b->tshift);
     489             : 
     490       44485 :         MT_thread_setalgorithm(n ? "widen offset heap" : "widen empty offset heap");
     491             : 
     492       28528 :         new = GDKmalloc(sizeof(Heap));
     493       28528 :         if (new == NULL)
     494             :                 return GDK_FAIL;
     495       28528 :         *new = (Heap) {
     496       28528 :                 .farmid = old->farmid,
     497             :                 .dirty = true,
     498       28528 :                 .parentid = old->parentid,
     499       28528 :                 .wasempty = old->wasempty,
     500             :         };
     501       28528 :         settailname(new, BBP_physical(b->batCacheid), b->ttype, width);
     502       28533 :         if (HEAPalloc(new, newsize, 1) != GDK_SUCCEED) {
     503           0 :                 GDKfree(new);
     504           0 :                 return GDK_FAIL;
     505             :         }
     506             :         /* HEAPalloc initialized .free, so we need to set it after */
     507       28533 :         new->free = old->free << (shift - b->tshift);
     508       28533 :         ATOMIC_INIT(&new->refs, 1 | (ATOMIC_GET(&old->refs) & HEAPREMOVE));
     509             :         /* per the above, width > b->twidth, so certain combinations are
     510             :          * impossible */
     511       28533 :         switch (width) {
     512       26139 :         case 2:
     513       26139 :                 ps = (uint16_t *) new->base;
     514       26139 :                 switch (b->twidth) {
     515       26139 :                 case 1:
     516       26139 :                         pc = (uint8_t *) old->base;
     517     1284937 :                         for (i = 0; i < n; i++)
     518     1258798 :                                 ps[i] = pc[i];
     519       26139 :                         break;
     520             :                 default:
     521           0 :                         MT_UNREACHABLE();
     522             :                 }
     523             : #ifndef NDEBUG
     524             :                 /* valgrind */
     525       26139 :                 memset(ps + n, 0, new->size - n * 2);
     526             : #endif
     527       26139 :                 break;
     528        2394 :         case 4:
     529        2394 :                 pi = (uint32_t *) new->base;
     530        2394 :                 switch (b->twidth) {
     531        1339 :                 case 1:
     532        1339 :                         pc = (uint8_t *) old->base;
     533        1342 :                         for (i = 0; i < n; i++)
     534           3 :                                 pi[i] = pc[i] + GDK_VAROFFSET;
     535             :                         break;
     536        1055 :                 case 2:
     537        1055 :                         ps = (uint16_t *) old->base;
     538     1539287 :                         for (i = 0; i < n; i++)
     539     1538232 :                                 pi[i] = ps[i] + GDK_VAROFFSET;
     540             :                         break;
     541             :                 default:
     542           0 :                         MT_UNREACHABLE();
     543             :                 }
     544             : #ifndef NDEBUG
     545             :                 /* valgrind */
     546        2394 :                 memset(pi + n, 0, new->size - n * 4);
     547             : #endif
     548        2394 :                 break;
     549             : #if SIZEOF_VAR_T == 8
     550           0 :         case 8:
     551           0 :                 pl = (uint64_t *) new->base;
     552           0 :                 switch (b->twidth) {
     553           0 :                 case 1:
     554           0 :                         pc = (uint8_t *) old->base;
     555           0 :                         for (i = 0; i < n; i++)
     556           0 :                                 pl[i] = pc[i] + GDK_VAROFFSET;
     557             :                         break;
     558           0 :                 case 2:
     559           0 :                         ps = (uint16_t *) old->base;
     560           0 :                         for (i = 0; i < n; i++)
     561           0 :                                 pl[i] = ps[i] + GDK_VAROFFSET;
     562             :                         break;
     563           0 :                 case 4:
     564           0 :                         pi = (uint32_t *) old->base;
     565           0 :                         for (i = 0; i < n; i++)
     566           0 :                                 pl[i] = pi[i];
     567             :                         break;
     568             :                 default:
     569           0 :                         MT_UNREACHABLE();
     570             :                 }
     571             : #ifndef NDEBUG
     572             :                 /* valgrind */
     573           0 :                 memset(pl + n, 0, new->size - n * 8);
     574             : #endif
     575           0 :                 break;
     576             : #endif
     577             :         default:
     578           0 :                 MT_UNREACHABLE();
     579             :         }
     580       28533 :         MT_lock_set(&b->theaplock);
     581       28522 :         b->tshift = shift;
     582       28522 :         b->twidth = width;
     583       28522 :         if (cap > BATcapacity(b))
     584        1372 :                 BATsetcapacity(b, cap);
     585       28522 :         b->theap = new;
     586       28522 :         if (BBP_status(bid) & (BBPEXISTING|BBPDELETED) && b->oldtail == NULL) {
     587        2019 :                 b->oldtail = old;
     588        2019 :                 if ((ATOMIC_OR(&old->refs, DELAYEDREMOVE) & HEAPREFS) == 1) {
     589             :                         /* we have the only reference, we can free the
     590             :                          * memory */
     591        2019 :                         HEAPfree(old, false);
     592             :                 }
     593             :         } else {
     594       26503 :                 ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
     595       53016 :                 HEAPdecref(old, p == NULL || strcmp(((Heap*) p->val.pval)->filename, old->filename) != 0);
     596             :         }
     597       28533 :         MT_lock_unset(&b->theaplock);
     598       28532 :         return GDK_SUCCEED;
     599             : }
     600             : 
     601             : /*
     602             :  * @- HEAPcopy
     603             :  * simple: alloc and copy. Notice that we suppose a preallocated
     604             :  * dst->filename (or NULL), which might be used in HEAPalloc().
     605             :  */
     606             : gdk_return
     607        3474 : HEAPcopy(Heap *dst, Heap *src, size_t offset)
     608             : {
     609        3474 :         if (offset > src->free)
     610             :                 offset = src->free;
     611        3474 :         if (HEAPalloc(dst, src->free - offset, 1) == GDK_SUCCEED) {
     612        3475 :                 dst->free = src->free - offset;
     613        3475 :                 memcpy(dst->base, src->base + offset, src->free - offset);
     614        3475 :                 dst->cleanhash = src->cleanhash;
     615        3475 :                 dst->dirty = true;
     616        3475 :                 return GDK_SUCCEED;
     617             :         }
     618             :         return GDK_FAIL;
     619             : }
     620             : 
     621             : /* Free the memory associated with the heap H.
     622             :  * Unlinks (removes) the associated file if the rmheap flag is set. */
     623             : void
     624    12777702 : HEAPfree(Heap *h, bool rmheap)
     625             : {
     626    12777702 :         if (h->base) {
     627     7419587 :                 if (h->farmid == 1 && (h->storage == STORE_MEM || h->storage == STORE_MMAP || h->storage == STORE_PRIV)) {
     628     6879130 :                         QryCtx *qc = MT_thread_get_qry_ctx();
     629     6879313 :                         if (qc)
     630     4163695 :                                 ATOMIC_SUB(&qc->datasize, h->size);
     631             :                 }
     632     7419770 :                 if (h->storage == STORE_MEM) {       /* plain memory */
     633     7417170 :                         TRC_DEBUG(HEAP, "HEAPfree %s %zu %p\n", h->filename, h->size, h->base);
     634     7417170 :                         GDKfree(h->base);
     635        2600 :                 } else if (h->storage == STORE_CMEM) {
     636             :                         //heap is stored in regular C memory rather than GDK memory,so we call free()
     637          47 :                         free(h->base);
     638        2553 :                 } else if (h->storage != STORE_NOWN) {       /* mapped file, or STORE_PRIV */
     639        5106 :                         gdk_return ret = GDKmunmap(h->base,
     640             :                                                    h->storage == STORE_PRIV ?
     641             :                                                    MMAP_COPY | MMAP_READ | MMAP_WRITE :
     642             :                                                    MMAP_READ | MMAP_WRITE,
     643             :                                                    h->size);
     644             : 
     645        2554 :                         if (ret != GDK_SUCCEED) {
     646           0 :                                 GDKsyserror("HEAPfree: %s was not mapped\n",
     647             :                                             h->filename);
     648           0 :                                 assert(0);
     649             :                         }
     650        2554 :                         TRC_DEBUG(HEAP, "munmap(base=%p, size=%zu) = %d\n",
     651             :                                   (void *)h->base, h->size, (int) ret);
     652             :                 }
     653             :         }
     654    12778194 :         h->base = NULL;
     655    12778194 :         if (rmheap && !GDKinmemory(h->farmid)) {
     656    12248318 :                 if (h->hasfile) {
     657       30919 :                         char *path = GDKfilepath(h->farmid, BATDIR, h->filename, NULL);
     658       30919 :                         if (path) {
     659       30919 :                                 int ret = MT_remove(path);
     660       30919 :                                 if (ret == -1) {
     661             :                                         /* unexpectedly not present */
     662           0 :                                         perror(path);
     663             :                                 }
     664       30919 :                                 assert(ret == 0);
     665       30919 :                                 GDKfree(path);
     666       30919 :                                 h->hasfile = false;
     667             :                         }
     668       30919 :                         path = GDKfilepath(h->farmid, BATDIR, h->filename, "new");
     669       30919 :                         if (path) {
     670             :                                 /* in practice, should never be present */
     671       30919 :                                 int ret = MT_remove(path);
     672       30919 :                                 if (ret == -1 && errno != ENOENT)
     673           0 :                                         perror(path);
     674       30919 :                                 assert(ret == -1 && errno == ENOENT);
     675       30919 :                                 GDKfree(path);
     676             :                         }
     677             : #ifndef NDEBUG
     678             :                 } else {
     679    12217399 :                         char *path = GDKfilepath(h->farmid, BATDIR, h->filename, NULL);
     680    12218195 :                         if (path) {
     681             :                                 /* should not be present */
     682    12218195 :                                 struct stat st;
     683    12218195 :                                 assert(stat(path, &st) == -1 && errno == ENOENT);
     684    12218720 :                                 GDKfree(path);
     685             :                         }
     686             : #endif
     687             :                 }
     688             :         }
     689    12780061 : }
     690             : 
     691             : void
     692    52486768 : HEAPdecref(Heap *h, bool remove)
     693             : {
     694    52486768 :         if (remove)
     695     9251022 :                 ATOMIC_OR(&h->refs, HEAPREMOVE);
     696    52486768 :         ATOMIC_BASE_TYPE refs = ATOMIC_DEC(&h->refs);
     697             :         //printf("dec ref(%d) %p %d\n", (int)h->refs, h, h->parentid);
     698    52486768 :         switch (refs & HEAPREFS) {
     699     9272693 :         case 0:
     700     9272693 :                 ATOMIC_DESTROY(&h->refs);
     701     9272693 :                 HEAPfree(h, (bool) (refs & HEAPREMOVE));
     702     9274056 :                 GDKfree(h);
     703     9274056 :                 break;
     704    19087009 :         case 1:
     705    19087009 :                 if (refs & DELAYEDREMOVE) {
     706             :                         /* only reference left is b->oldtail */
     707        1685 :                         HEAPfree(h, false);
     708             :                 }
     709             :                 break;
     710             :         default:
     711             :                 break;
     712             :         }
     713    52487757 : }
     714             : 
     715             : void
     716    43209639 : HEAPincref(Heap *h)
     717             : {
     718             :         //printf("inc ref(%d) %p %d\n", (int)h->refs, h, h->parentid);
     719    43209639 :         ATOMIC_INC(&h->refs);
     720    43209639 : }
     721             : 
     722             : /*
     723             :  * @- HEAPload
     724             :  *
     725             :  * If we find file X.new, we move it over X (if present) and open it.
     726             :  */
     727             : static gdk_return
     728       22723 : HEAPload_intern(Heap *h, const char *nme, const char *ext, const char *suffix, bool trunc)
     729             : {
     730       22723 :         size_t minsize;
     731       22723 :         int ret = 0;
     732       22723 :         char *srcpath, *dstpath;
     733       22723 :         lng t0;
     734             : 
     735       22723 :         if (h->storage == STORE_INVALID || h->newstorage == STORE_INVALID) {
     736       22691 :                 size_t allocated;
     737       45382 :                 h->storage = h->newstorage = h->size < (h->farmid == 0 ? GDK_mmap_minsize_persistent : GDK_mmap_minsize_transient) &&
     738       21728 :                         (allocated = GDKmem_cursize()) < GDK_mem_maxsize &&
     739       44419 :                         h->size < ((GDK_mem_maxsize - allocated) >> 6) ? STORE_MEM : STORE_MMAP;
     740             :         }
     741             : 
     742       22723 :         minsize = (h->size + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     743       22723 :         if (h->storage != STORE_MEM && minsize != h->size)
     744         121 :                 h->size = minsize;
     745             : 
     746             :         /* when a bat is made read-only, we can truncate any unused
     747             :          * space at the end of the heap */
     748       22723 :         if (trunc) {
     749             :                 /* round up mmap heap sizes to GDK_mmap_pagesize
     750             :                  * segments, also add some slack */
     751       21472 :                 int fd;
     752             : 
     753       21472 :                 if (minsize == 0)
     754           0 :                         minsize = GDK_mmap_pagesize; /* minimum of one page */
     755       21472 :                 if ((fd = GDKfdlocate(h->farmid, nme, "rb+", ext)) >= 0) {
     756       21472 :                         struct stat stb;
     757       21472 :                         if (fstat(fd, &stb) == 0 &&
     758       21472 :                             stb.st_size > (off_t) minsize) {
     759           0 :                                 ret = ftruncate(fd, minsize);
     760           0 :                                 TRC_DEBUG(HEAP,
     761             :                                           "ftruncate(file=%s.%s, size=%zu) = %d\n",
     762             :                                           nme, ext, minsize, ret);
     763           0 :                                 if (ret == 0) {
     764           0 :                                         h->size = minsize;
     765             :                                 }
     766             :                         }
     767       21471 :                         close(fd);
     768             :                 }
     769             :         }
     770             : 
     771       22722 :         TRC_DEBUG(HEAP, "%s%s%s,storage=%d,free=%zu,size=%zu\n",
     772             :                   nme, ext ? "." : "", ext ? ext : "",
     773             :                   (int) h->storage, h->free, h->size);
     774             : 
     775             :         /* On some OSs (WIN32,Solaris), it is prohibited to write to a
     776             :          * file that is open in MAP_PRIVATE (FILE_MAP_COPY) solution:
     777             :          * we write to a file named .ext.new.  This file, if present,
     778             :          * takes precedence. */
     779       22722 :         dstpath = GDKfilepath(h->farmid, BATDIR, nme, ext);
     780       22723 :         if (dstpath == NULL)
     781             :                 return GDK_FAIL;
     782       22723 :         minsize = strlen(dstpath) + strlen(suffix) + 1;
     783       22723 :         srcpath = GDKmalloc(minsize);
     784       22723 :         if (srcpath == NULL) {
     785           0 :                 GDKfree(dstpath);
     786           0 :                 return GDK_FAIL;
     787             :         }
     788       22723 :         strconcat_len(srcpath, minsize, dstpath, suffix, NULL);
     789             : 
     790       22723 :         t0 = GDKusec();
     791       22723 :         ret = MT_rename(srcpath, dstpath);
     792       22723 :         TRC_DEBUG(HEAP, "rename %s %s = %d %s ("LLFMT"usec)\n",
     793             :                   srcpath, dstpath, ret, ret < 0 ? GDKstrerror(errno, (char[128]){0}, 128) : "",
     794             :                   GDKusec() - t0);
     795       22723 :         GDKfree(srcpath);
     796       22723 :         GDKfree(dstpath);
     797             : 
     798             : #ifdef SIZE_CHECK_IN_HEAPS_ONLY
     799       22723 :         if (GDKvm_cursize() + h->size >= GDK_vm_maxsize &&
     800           0 :             !MT_thread_override_limits()) {
     801           0 :                 GDKerror("allocating too much memory (current: %zu, requested: %zu, limit: %zu)\n", GDKvm_cursize(), h->size, GDK_vm_maxsize);
     802           0 :                 return GDK_FAIL;
     803             :         }
     804             : #endif
     805             : 
     806       22723 :         size_t size = h->size;
     807       22723 :         QryCtx *qc = NULL;
     808       22723 :         if (h->storage != STORE_MEM)
     809         995 :                 size = (size + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
     810       22723 :         if (h->farmid == 1 && (qc = MT_thread_get_qry_ctx()) != NULL) {
     811           0 :                 ATOMIC_BASE_TYPE sz = 0;
     812           0 :                 sz = ATOMIC_ADD(&qc->datasize, size);
     813           0 :                 sz += h->size;
     814           0 :                 if (qc->maxmem > 0 && sz > qc->maxmem) {
     815           0 :                         ATOMIC_SUB(&qc->datasize, size);
     816           0 :                         GDKerror("Query using too much memory.\n");
     817           0 :                         return GDK_FAIL;
     818             :                 }
     819             :         }
     820       22723 :         if (h->storage == STORE_MEM && h->free == 0) {
     821           0 :                 h->base = GDKmalloc(h->size);
     822           0 :                 h->wasempty = true;
     823             :         } else {
     824       22723 :                 if (h->free == 0) {
     825           0 :                         int fd = GDKfdlocate(h->farmid, nme, "wb", ext);
     826           0 :                         if (fd >= 0)
     827           0 :                                 close(fd);
     828           0 :                         h->wasempty = true;
     829             :                 }
     830       22723 :                 h->base = GDKload(h->farmid, nme, ext, h->free, &h->size, h->storage);
     831             :         }
     832       22723 :         if (h->base == NULL) {
     833           0 :                 if (qc != NULL)
     834           0 :                         ATOMIC_SUB(&qc->datasize, size);
     835           0 :                 return GDK_FAIL; /* file could  not be read satisfactorily */
     836             :         }
     837             : 
     838       22723 :         h->dirty = false;    /* we just read it, so it's clean */
     839       22723 :         return GDK_SUCCEED;
     840             : }
     841             : 
     842             : gdk_return
     843       22723 : HEAPload(Heap *h, const char *nme, const char *ext, bool trunc)
     844             : {
     845       22723 :         return HEAPload_intern(h, nme, ext, ".new", trunc);
     846             : }
     847             : 
     848             : /*
     849             :  * @- HEAPsave
     850             :  *
     851             :  * Saving STORE_MEM will do a write(fd, buf, size) in GDKsave
     852             :  * (explicit IO).
     853             :  *
     854             :  * Saving a STORE_PRIV heap X means that we must actually write to
     855             :  * X.new, thus we convert the mode passed to GDKsave to STORE_MEM.
     856             :  *
     857             :  * Saving STORE_MMAP will do a msync(buf, MSSYNC) in GDKsave (implicit
     858             :  * IO).
     859             :  *
     860             :  * After GDKsave returns successfully (>=0), we assume the heaps are
     861             :  * safe on stable storage.
     862             :  */
     863             : static gdk_return
     864      330587 : HEAPsave_intern(Heap *h, const char *nme, const char *ext, const char *suffix, bool dosync, BUN free, MT_Lock *lock)
     865             : {
     866      330587 :         storage_t store = h->newstorage;
     867      330587 :         long_str extension;
     868      330587 :         gdk_return rc;
     869             : 
     870      330587 :         if (h->base == NULL) {
     871           0 :                 GDKerror("no heap to save\n");
     872           0 :                 return GDK_FAIL;
     873             :         }
     874      330587 :         if (free == 0) {
     875             :                 /* nothing to see, please move on */
     876        1336 :                 if (lock)
     877        1336 :                         MT_lock_set(lock);
     878        1336 :                 h->wasempty = true;
     879           0 :                 if (lock)
     880        1336 :                         MT_lock_unset(lock);
     881        1336 :                 TRC_DEBUG(HEAP,
     882             :                           "not saving: "
     883             :                           "(%s.%s,storage=%d,free=%zu,size=%zu,dosync=%s)\n",
     884             :                           nme?nme:"", ext, (int) h->newstorage, free, h->size,
     885             :                           dosync?"true":"false");
     886        1336 :                 return GDK_SUCCEED;
     887             :         }
     888      329251 :         if (h->storage != STORE_MEM && store == STORE_PRIV) {
     889             :                 /* anonymous or private VM is saved as if it were malloced */
     890           0 :                 store = STORE_MEM;
     891           0 :                 assert(strlen(ext) + strlen(suffix) < sizeof(extension));
     892           0 :                 strconcat_len(extension, sizeof(extension), ext, suffix, NULL);
     893           0 :                 ext = extension;
     894      329251 :         } else if (store != STORE_MEM) {
     895        1030 :                 store = h->storage;
     896             :         }
     897      329251 :         TRC_DEBUG(HEAP,
     898             :                   "(%s.%s,storage=%d,free=%zu,size=%zu,dosync=%s)\n",
     899             :                   nme?nme:"", ext, (int) h->newstorage, free, h->size,
     900             :                   dosync?"true":"false");
     901      329251 :         rc = GDKsave(h->farmid, nme, ext, h->base, free, store, dosync);
     902      329251 :         if (lock)
     903      303883 :                 MT_lock_set(lock);
     904      329251 :         if (rc == GDK_SUCCEED) {
     905      329251 :                 h->hasfile = true;
     906      329251 :                 h->dirty = free != h->free;
     907      329251 :                 h->wasempty = false;
     908             :         } else {
     909           0 :                 h->dirty = true;
     910           0 :                 if (store != STORE_MMAP)
     911           0 :                         h->hasfile = false;
     912             :         }
     913      329251 :         if (lock)
     914      303883 :                 MT_lock_unset(lock);
     915             :         return rc;
     916             : }
     917             : 
     918             : gdk_return
     919      330587 : HEAPsave(Heap *h, const char *nme, const char *ext, bool dosync, BUN free, MT_Lock *lock)
     920             : {
     921      330587 :         return HEAPsave_intern(h, nme, ext, ".new", dosync, free, lock);
     922             : }
     923             : 
     924             : 
     925             : /* Return the (virtual) size of the heap. */
     926             : size_t
     927      321306 : HEAPvmsize(Heap *h)
     928             : {
     929      321306 :         if (h && h->base && h->free)
     930      150704 :                 return h->size;
     931             :         return 0;
     932             : }
     933             : 
     934             : /* Return the allocated size of the heap, i.e. if the heap is memory
     935             :  * mapped and not copy-on-write (privately mapped), return 0. */
     936             : size_t
     937      321306 : HEAPmemsize(Heap *h)
     938             : {
     939      321306 :         if (h && h->base && h->free && h->storage != STORE_MMAP)
     940      150348 :                 return h->size;
     941             :         return 0;
     942             : }
     943             : 
     944             : 
     945             : /*
     946             :  * @+ Standard Heap Library
     947             :  * This library contains some routines which implement a @emph{
     948             :  * malloc} and @emph{ free} function on the Monet @emph{Heap}
     949             :  * structure. They are useful when implementing a new @emph{
     950             :  * variable-size} atomic data type, or for implementing new search
     951             :  * accelerators.  All functions start with the prefix @emph{HEAP_}. T
     952             :  *
     953             :  * Due to non-careful design, the HEADER field was found to be
     954             :  * 32/64-bit dependent.  As we do not (yet) want to change the BAT
     955             :  * image on disk, This is now fixed by switching on-the-fly between
     956             :  * two representations. We ensure that the 64-bit memory
     957             :  * representation is just as long as the 32-bits version (20 bytes) so
     958             :  * the rest of the heap never needs to shift. The function
     959             :  * HEAP_checkformat converts at load time dynamically between the
     960             :  * layout found on disk and the memory format.  Recognition of the
     961             :  * header mode is done by looking at the first two ints: alignment
     962             :  * must be 4 or 8, and head can never be 4 or eight.
     963             :  *
     964             :  * TODO: user HEADER64 for both 32 and 64 bits (requires BAT format
     965             :  * change)
     966             :  */
     967             : 
     968             : #define HEAPVERSION     20030408
     969             : 
     970             : typedef struct heapheader {
     971             :         size_t head;            /* index to first free block */
     972             :         int alignment;          /* alignment of objects on heap */
     973             :         size_t firstblock;      /* first block in heap */
     974             :         int version;
     975             :         int (*sizefcn)(const void *);   /* ADT function to ask length */
     976             : } HEADER32;
     977             : 
     978             : typedef struct {
     979             :         int version;
     980             :         int alignment;
     981             :         size_t head;
     982             :         size_t firstblock;
     983             :         int (*sizefcn)(const void *);
     984             : } HEADER64;
     985             : 
     986             : #if SIZEOF_SIZE_T==8
     987             : typedef HEADER64 HEADER;
     988             : typedef HEADER32 HEADER_OTHER;
     989             : #else
     990             : typedef HEADER32 HEADER;
     991             : typedef HEADER64 HEADER_OTHER;
     992             : #endif
     993             : typedef struct hfblock {
     994             :         size_t size;            /* Size of this block in freelist */
     995             :         size_t next;            /* index of next block */
     996             : } CHUNK;
     997             : 
     998             : #define roundup_8(x)    (((x)+7)&~7)
     999             : #define roundup_4(x)    (((x)+3)&~3)
    1000             : #define blocksize(h,p)  ((p)->size)
    1001             : 
    1002             : static inline size_t
    1003        2183 : roundup_num(size_t number, int alignment)
    1004             : {
    1005        2183 :         size_t rval;
    1006             : 
    1007        2183 :         rval = number + (size_t) alignment - 1;
    1008        2183 :         rval -= (rval % (size_t) alignment);
    1009        2183 :         return rval;
    1010             : }
    1011             : 
    1012             : #define HEAP_index(HEAP,INDEX,TYPE)     ((TYPE *)((char *) (HEAP)->base + (INDEX)))
    1013             : 
    1014             : 
    1015             : static void
    1016        2183 : HEAP_empty(Heap *heap, size_t nprivate, int alignment)
    1017             : {
    1018             :         /* Find position of header block. */
    1019        2183 :         HEADER *hheader = HEAP_index(heap, 0, HEADER);
    1020             : 
    1021             :         /* Calculate position of first and only free block. */
    1022        2183 :         size_t head = roundup_num((size_t) (roundup_8(sizeof(HEADER)) + roundup_8(nprivate)), alignment);
    1023        2183 :         CHUNK *headp = HEAP_index(heap, head, CHUNK);
    1024             : 
    1025        2183 :         assert(roundup_8(sizeof(HEADER)) + roundup_8(nprivate) <= VAR_MAX);
    1026             : 
    1027             :         /* Fill header block. */
    1028        2183 :         hheader->head = head;
    1029        2183 :         hheader->sizefcn = NULL;
    1030        2183 :         hheader->alignment = alignment;
    1031        2183 :         hheader->firstblock = head;
    1032        2183 :         hheader->version = HEAPVERSION;
    1033             : 
    1034             :         /* Fill first free block. */
    1035        2183 :         assert(heap->size - head <= VAR_MAX);
    1036        2183 :         headp->size = (size_t) (heap->size - head);
    1037        2183 :         headp->next = 0;
    1038             : 
    1039        2183 :         heap->dirty = true;
    1040        2183 : }
    1041             : 
    1042             : gdk_return
    1043        2183 : HEAP_initialize(Heap *heap, size_t nbytes, size_t nprivate, int alignment)
    1044             : {
    1045             :         /* For now we know about two alignments. */
    1046        2183 :         if (alignment != 8) {
    1047             :                 alignment = 4;
    1048             :         }
    1049        2183 :         if ((size_t) alignment < sizeof(size_t))
    1050             :                 alignment = (int) sizeof(size_t);
    1051             : 
    1052             :         /* Calculate number of bytes needed for heap + structures. */
    1053             :         {
    1054        2183 :                 size_t total = 100 + nbytes + nprivate + sizeof(HEADER) + sizeof(CHUNK);
    1055             : 
    1056        2183 :                 total = roundup_8(total);
    1057        2183 :                 if (HEAPalloc(heap, total, 1) != GDK_SUCCEED)
    1058             :                         return GDK_FAIL;
    1059        2183 :                 heap->free = heap->size;
    1060             :         }
    1061             : 
    1062             :         /* initialize heap as empty */
    1063        2183 :         HEAP_empty(heap, nprivate, alignment);
    1064        2183 :         return GDK_SUCCEED;
    1065             : }
    1066             : 
    1067             : 
    1068             : var_t
    1069     7353224 : HEAP_malloc(BAT *b, size_t nbytes)
    1070             : {
    1071     7353224 :         Heap *heap = b->tvheap;
    1072     7353224 :         size_t block, trail, ttrail;
    1073     7353224 :         CHUNK *blockp;
    1074     7353224 :         CHUNK *trailp;
    1075     7353224 :         HEADER *hheader = HEAP_index(heap, 0, HEADER);
    1076             : 
    1077             :         /* add space for size field */
    1078     7353224 :         nbytes += hheader->alignment;
    1079     7353224 :         nbytes = roundup_8(nbytes);
    1080     7353224 :         if (nbytes < sizeof(CHUNK))
    1081             :                 nbytes = (size_t) sizeof(CHUNK);
    1082             : 
    1083             :         /* block  -- points to block with acceptable size (if available).
    1084             :          * trail  -- points to predecessor of block.
    1085             :          * ttrail -- points to predecessor of trail.
    1086             :          */
    1087     7353224 :         ttrail = 0;
    1088     7353224 :         trail = 0;
    1089     7353345 :         for (block = hheader->head; block != 0; block = blockp->next) {
    1090     7353190 :                 blockp = HEAP_index(heap, block, CHUNK);
    1091             : 
    1092     7353190 :                 assert(trail == 0 || block > trail);
    1093     7353190 :                 if (trail != 0 && block <= trail) {
    1094           0 :                         GDKerror("Free list is not orderered\n");
    1095           0 :                         return (var_t) -1;
    1096             :                 }
    1097             : 
    1098     7353190 :                 if (blockp->size >= nbytes)
    1099             :                         break;
    1100         121 :                 ttrail = trail;
    1101         121 :                 trail = block;
    1102             :         }
    1103             : 
    1104             :         /* If no block of acceptable size is found we try to enlarge
    1105             :          * the heap. */
    1106     7353224 :         if (block == 0) {
    1107         155 :                 size_t newsize;
    1108             : 
    1109         155 :                 assert(heap->free + MAX(heap->free, nbytes) <= VAR_MAX);
    1110             : #if SIZEOF_SIZE_T == 4
    1111             :                 newsize = MIN(heap->free, (size_t) 1 << 20);
    1112             : #else
    1113         155 :                 newsize = MIN(heap->free, (size_t) 1 << 30);
    1114             : #endif
    1115         155 :                 newsize = (size_t) roundup_8(heap->free + MAX(newsize, nbytes));
    1116         155 :                 assert(heap->free <= VAR_MAX);
    1117         155 :                 block = (size_t) heap->free; /* current end-of-heap */
    1118             : 
    1119             :                 /* Increase the size of the heap. */
    1120         155 :                 TRC_DEBUG(HEAP, "HEAPextend in HEAP_malloc %s %zu %zu\n", heap->filename, heap->size, newsize);
    1121         155 :                 if (HEAPgrow(&b->tvheap, newsize, false) != GDK_SUCCEED) {
    1122             :                         return (var_t) -1;
    1123             :                 }
    1124         155 :                 heap = b->tvheap;
    1125         155 :                 heap->free = newsize;
    1126         155 :                 heap->dirty = true;
    1127         155 :                 hheader = HEAP_index(heap, 0, HEADER);
    1128             : 
    1129         155 :                 blockp = HEAP_index(heap, block, CHUNK);
    1130         155 :                 trailp = HEAP_index(heap, trail, CHUNK);
    1131             : 
    1132         155 :                 blockp->next = 0;
    1133         155 :                 assert(heap->free - block <= VAR_MAX);
    1134         155 :                 blockp->size = (size_t) (heap->free - block);     /* determine size of allocated block */
    1135             : 
    1136             :                 /* Try to join the last block in the freelist and the
    1137             :                  * newly allocated memory */
    1138         155 :                 if ((trail != 0) && (trail + trailp->size == block)) {
    1139         121 :                         trailp->size += blockp->size;
    1140         121 :                         trailp->next = blockp->next;
    1141             : 
    1142         121 :                         block = trail;
    1143         121 :                         trail = ttrail;
    1144             :                 }
    1145             :         }
    1146             : 
    1147             :         /* Now we have found a block which is big enough in block.
    1148             :          * The predecessor of this block is in trail. */
    1149     7353224 :         blockp = HEAP_index(heap, block, CHUNK);
    1150             : 
    1151             :         /* If selected block is bigger than block needed split block
    1152             :          * in two.
    1153             :          * TUNE: use different amount than 2*sizeof(CHUNK) */
    1154     7353224 :         if (blockp->size >= nbytes + 2 * sizeof(CHUNK)) {
    1155     7353188 :                 size_t newblock = block + nbytes;
    1156     7353188 :                 CHUNK *newblockp = HEAP_index(heap, newblock, CHUNK);
    1157             : 
    1158     7353188 :                 newblockp->size = blockp->size - nbytes;
    1159     7353188 :                 newblockp->next = blockp->next;
    1160             : 
    1161     7353188 :                 blockp->next = newblock;
    1162     7353188 :                 blockp->size = nbytes;
    1163             :         }
    1164             : 
    1165             :         /* Delete block from freelist */
    1166     7353224 :         if (trail == 0) {
    1167     7353224 :                 hheader->head = blockp->next;
    1168             :         } else {
    1169           0 :                 trailp = HEAP_index(heap, trail, CHUNK);
    1170             : 
    1171           0 :                 trailp->next = blockp->next;
    1172             :         }
    1173             : 
    1174     7353224 :         block += hheader->alignment;
    1175     7353224 :         return (var_t) block;
    1176             : }
    1177             : 
    1178             : void
    1179          35 : HEAP_free(Heap *heap, var_t mem)
    1180             : {
    1181             : /* we cannot free pieces of the heap because we may have used
    1182             :  * append_varsized_bat to create the heap; if we did, there may be
    1183             :  * multiple locations in the offset heap that refer to the same entry in
    1184             :  * the vheap, so we cannot free any until we free all */
    1185          35 :         (void) heap;
    1186          35 :         (void) mem;
    1187             : #if 0
    1188             :         HEADER *hheader = HEAP_index(heap, 0, HEADER);
    1189             :         CHUNK *beforep;
    1190             :         CHUNK *blockp;
    1191             :         CHUNK *afterp;
    1192             :         size_t after, before, block = mem;
    1193             : 
    1194             :         assert(hheader->alignment == 8 || hheader->alignment == 4);
    1195             :         if (hheader->alignment != 8 && hheader->alignment != 4) {
    1196             :                 GDKerror("Heap structure corrupt\n");
    1197             :                 return;
    1198             :         }
    1199             : 
    1200             :         block -= hheader->alignment;
    1201             :         blockp = HEAP_index(heap, block, CHUNK);
    1202             : 
    1203             :         /* block   -- block which we want to free
    1204             :          * before  -- first free block before block
    1205             :          * after   -- first free block after block
    1206             :          */
    1207             : 
    1208             :         before = 0;
    1209             :         for (after = hheader->head; after != 0; after = HEAP_index(heap, after, CHUNK)->next) {
    1210             :                 if (after > block)
    1211             :                         break;
    1212             :                 before = after;
    1213             :         }
    1214             : 
    1215             :         beforep = HEAP_index(heap, before, CHUNK);
    1216             :         afterp = HEAP_index(heap, after, CHUNK);
    1217             : 
    1218             :         /* If it is not the last free block. */
    1219             :         if (after != 0) {
    1220             :                 /*
    1221             :                  * If this block and the block after are consecutive.
    1222             :                  */
    1223             :                 if (block + blockp->size == after) {
    1224             :                         /*
    1225             :                          * We unite them.
    1226             :                          */
    1227             :                         blockp->size += afterp->size;
    1228             :                         blockp->next = afterp->next;
    1229             :                 } else
    1230             :                         blockp->next = after;
    1231             :         } else {
    1232             :                 /*
    1233             :                  * It is the last block in the freelist.
    1234             :                  */
    1235             :                 blockp->next = 0;
    1236             :         }
    1237             : 
    1238             :         /*
    1239             :          * If it is not the first block in the list.
    1240             :          */
    1241             :         if (before != 0) {
    1242             :                 /*
    1243             :                  * If the before block and this block are consecutive.
    1244             :                  */
    1245             :                 if (before + beforep->size == block) {
    1246             :                         /*
    1247             :                          * We unite them.
    1248             :                          */
    1249             :                         beforep->size += blockp->size;
    1250             :                         beforep->next = blockp->next;
    1251             :                 } else
    1252             :                         beforep->next = block;
    1253             :         } else {
    1254             :                 /*
    1255             :                  * Add block at head of free list.
    1256             :                  */
    1257             :                 hheader->head = block;
    1258             :         }
    1259             : #endif
    1260          35 : }
    1261             : 
    1262             : void
    1263         135 : HEAP_recover(Heap *h, const var_t *offsets, BUN noffsets)
    1264             : {
    1265         135 :         HEADER *hheader;
    1266         135 :         CHUNK *blockp;
    1267         135 :         size_t dirty = 0;
    1268         135 :         var_t maxoff = 0;
    1269         135 :         BUN i;
    1270             : 
    1271         135 :         if (!h->cleanhash)
    1272             :                 return;
    1273         135 :         hheader = HEAP_index(h, 0, HEADER);
    1274         135 :         assert(h->free >= sizeof(HEADER));
    1275         135 :         assert(hheader->version == HEAPVERSION);
    1276         135 :         assert(h->size >= hheader->firstblock);
    1277         414 :         for (i = 0; i < noffsets; i++)
    1278         279 :                 if (offsets[i] > maxoff)
    1279             :                         maxoff = offsets[i];
    1280         135 :         assert(maxoff < h->free);
    1281         135 :         if (maxoff == 0) {
    1282           0 :                 if (hheader->head != hheader->firstblock) {
    1283           0 :                         hheader->head = hheader->firstblock;
    1284           0 :                         dirty = sizeof(HEADER);
    1285             :                 }
    1286           0 :                 blockp = HEAP_index(h, hheader->firstblock, CHUNK);
    1287           0 :                 if (blockp->next != 0 ||
    1288           0 :                     blockp->size != h->size - hheader->head) {
    1289           0 :                         blockp->size = (size_t) (h->size - hheader->head);
    1290           0 :                         blockp->next = 0;
    1291           0 :                         dirty = hheader->firstblock + sizeof(CHUNK);
    1292             :                 }
    1293             :         } else {
    1294         135 :                 size_t block = maxoff - hheader->alignment;
    1295         135 :                 size_t end = block + *HEAP_index(h, block, size_t);
    1296         135 :                 size_t trail;
    1297             : 
    1298         135 :                 assert(end <= h->free);
    1299         135 :                 if (end + sizeof(CHUNK) <= h->free) {
    1300         135 :                         blockp = HEAP_index(h, end, CHUNK);
    1301         135 :                         if (hheader->head <= end &&
    1302         135 :                             blockp->next == 0 &&
    1303         135 :                             blockp->size == h->free - end)
    1304             :                                 return;
    1305           0 :                 } else if (hheader->head == 0) {
    1306             :                         /* no free space after last allocated block
    1307             :                          * and no free list */
    1308             :                         return;
    1309             :                 }
    1310           0 :                 block = hheader->head;
    1311           0 :                 trail = 0;
    1312           0 :                 while (block < maxoff && block != 0) {
    1313           0 :                         blockp = HEAP_index(h, block, CHUNK);
    1314           0 :                         trail = block;
    1315           0 :                         block = blockp->next;
    1316             :                 }
    1317           0 :                 if (trail == 0) {
    1318             :                         /* no free list */
    1319           0 :                         if (end + sizeof(CHUNK) > h->free) {
    1320             :                                 /* no free space after last allocated
    1321             :                                  * block */
    1322           0 :                                 if (hheader->head != 0) {
    1323           0 :                                         hheader->head = 0;
    1324           0 :                                         dirty = sizeof(HEADER);
    1325             :                                 }
    1326             :                         } else {
    1327             :                                 /* there is free space after last
    1328             :                                  * allocated block */
    1329           0 :                                 if (hheader->head != end) {
    1330           0 :                                         hheader->head = end;
    1331           0 :                                         dirty = sizeof(HEADER);
    1332             :                                 }
    1333           0 :                                 blockp = HEAP_index(h, end, CHUNK);
    1334           0 :                                 if (blockp->next != 0 ||
    1335           0 :                                     blockp->size != h->free - end) {
    1336           0 :                                         blockp->next = 0;
    1337           0 :                                         blockp->size = h->free - end;
    1338           0 :                                         dirty = end + sizeof(CHUNK);
    1339             :                                 }
    1340             :                         }
    1341             :                 } else {
    1342             :                         /* there is a free list */
    1343           0 :                         blockp = HEAP_index(h, trail, CHUNK);
    1344           0 :                         if (end + sizeof(CHUNK) > h->free) {
    1345             :                                 /* no free space after last allocated
    1346             :                                  * block */
    1347           0 :                                 if (blockp->next != 0) {
    1348           0 :                                         blockp->next = 0;
    1349           0 :                                         dirty = trail + sizeof(CHUNK);
    1350             :                                 }
    1351             :                         } else {
    1352             :                                 /* there is free space after last
    1353             :                                  * allocated block */
    1354           0 :                                 if (blockp->next != end) {
    1355           0 :                                         blockp->next = end;
    1356           0 :                                         dirty = trail + sizeof(CHUNK);
    1357             :                                 }
    1358           0 :                                 blockp = HEAP_index(h, end, CHUNK);
    1359           0 :                                 if (blockp->next != 0 ||
    1360           0 :                                     blockp->size != h->free - end) {
    1361           0 :                                         blockp->next = 0;
    1362           0 :                                         blockp->size = h->free - end;
    1363           0 :                                         dirty = end + sizeof(CHUNK);
    1364             :                                 }
    1365             :                         }
    1366             :                 }
    1367             :         }
    1368           0 :         h->cleanhash = false;
    1369           0 :         if (dirty) {
    1370           0 :                 if (h->storage == STORE_MMAP) {
    1371           0 :                         if (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK))
    1372           0 :                                 (void) MT_msync(h->base, dirty);
    1373             :                         else
    1374           0 :                                 h->dirty = true;
    1375             :                 } else
    1376           0 :                         h->dirty = true;
    1377             :         }
    1378             : }

Generated by: LCOV version 1.14