LCOV - code coverage report
Current view: top level - gdk - gdk_align.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 127 200 63.5 %
Date: 2024-04-25 20:03:45 Functions: 4 6 66.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a Peter Boncz, Niels Nes
      15             :  * @* BAT Alignment
      16             :  * For BATs that result from a n-ary relational scheme it may help to
      17             :  * align the BATs on their head value. In particular, it permits
      18             :  * replacing a hash-join by a merge-join, which is significantly
      19             :  * faster on large tables. Especially if the BATs involved cause page
      20             :  * activity or when you can not afford the large hash structures to
      21             :  * speed-up processing.
      22             :  *
      23             :  * For orthogonality, we support alignment between arbitrary columns
      24             :  * (head or tail).
      25             :  *
      26             :  * All standard GDK set-calls update the alignment info in their
      27             :  * respective ways. For example, the routine @emph{BUNclustercopy}
      28             :  * shuffles the first argument, such that the BUNs are in the same
      29             :  * order as those in the second argument.  This operation will mark
      30             :  * both columns of the first @emph{BAT} as synced with the second
      31             :  * (likewise, @emph{Colcopy()}, which makes a copy, instead of
      32             :  * in-place shuffling, has the same alignment effect.
      33             :  *
      34             :  * Each alignment sequence is given a unique identifier, so as to
      35             :  * easily detect this situation. It is retained in the @emph{BAT
      36             :  * descriptor}.
      37             :  * @+ Alignment Design Considerations
      38             :  * Alignment primitives require the right hooks to be inserted in
      39             :  * several places in the GDK, apart form this file:
      40             :  * @itemize
      41             :  * @item @emph{ BUN update operations}.
      42             :  * The updated BATs have to be marked as un-aligned.
      43             :  * @item @emph{ set operations}.
      44             :  * For most relational operations some statements can be made about
      45             :  * the size and order of the BATs they produce. This information can
      46             :  * be formalized by indicating alignment information automatically.
      47             :  * @item @emph{ transaction operations}.
      48             :  * Alignment statuses must be kept consistent under database commits
      49             :  * and aborts.
      50             :  * @end itemize
      51             :  */
      52             : #include "monetdb_config.h"
      53             : #include "gdk.h"
      54             : #include "gdk_private.h"
      55             : 
      56             : /* Return TRUE if the two BATs are aligned (same size, same
      57             :  * hseqbase). */
      58             : int
      59           0 : ALIGNsynced(BAT *b1, BAT *b2)
      60             : {
      61           0 :         if (b1 == NULL || b2 == NULL)
      62             :                 return 0;
      63             : 
      64           0 :         assert(!is_oid_nil(b1->hseqbase));
      65           0 :         assert(!is_oid_nil(b2->hseqbase));
      66             : 
      67           0 :         return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
      68             : }
      69             : 
      70             : /*
      71             :  * @+ View BATS
      72             :  * The general routine for getting a 'view' BAT upon another BAT is
      73             :  * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
      74             :  * support for this), you can then make vertical slices.
      75             :  *
      76             :  * It is possible to create a view on a writable BAT. Updates in the
      77             :  * parent are then automatically reflected in the VIEW.  Note that the
      78             :  * VIEW bat itself can never be modified.
      79             :  *
      80             :  * Horizontal views should only be given out on a view BAT, but only
      81             :  * if it is dead sure the parent BAT is read-only.  This because they
      82             :  * cannot physically share the batBuns heap with the parent, as they
      83             :  * need a modified version.
      84             :  */
      85             : BAT *
      86     6810544 : VIEWcreate(oid seq, BAT *b)
      87             : {
      88     6810544 :         BAT *bn;
      89     6810544 :         bat tp = 0;
      90             : 
      91     6810544 :         BATcheck(b, NULL);
      92             : 
      93     6810544 :         if (b->ttype == TYPE_void) {
      94             :                 /* we don't do views on void bats */
      95        2221 :                 return BATdense(seq, b->tseqbase, b->batCount);
      96             :         }
      97             : 
      98     6808323 :         bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
      99     6807865 :         if (bn == NULL)
     100             :                 return NULL;
     101     6807865 :         assert(bn->theap == NULL);
     102             : 
     103     6807865 :         MT_lock_set(&b->theaplock);
     104     6808183 :         bn->batInserted = 0;
     105     6808183 :         bn->batCount = b->batCount;
     106     6808183 :         bn->batCapacity = b->batCapacity;
     107     6808183 :         bn->batRestricted = BAT_READ;
     108             : 
     109             :         /* the T column descriptor is fully copied except for the
     110             :          * accelerator data. We need copies because in case of a mark,
     111             :          * we are going to override a column with a void. */
     112     6808183 :         bn->tkey = b->tkey;
     113     6808183 :         bn->tseqbase = b->tseqbase;
     114     6808183 :         bn->tsorted = b->tsorted;
     115     6808183 :         bn->trevsorted = b->trevsorted;
     116     6808183 :         bn->twidth = b->twidth;
     117     6808183 :         bn->tshift = b->tshift;
     118     6808183 :         bn->tnonil = b->tnonil;
     119     6808183 :         bn->tnil = b->tnil;
     120     6808183 :         bn->tnokey[0] = b->tnokey[0];
     121     6808183 :         bn->tnokey[1] = b->tnokey[1];
     122     6808183 :         bn->tnosorted = b->tnosorted;
     123     6808183 :         bn->tnorevsorted = b->tnorevsorted;
     124     6808183 :         bn->tminpos = b->tminpos;
     125     6808183 :         bn->tmaxpos = b->tmaxpos;
     126     6808183 :         bn->tunique_est = b->tunique_est;
     127     6808183 :         bn->theap = b->theap;
     128     6808183 :         bn->tbaseoff = b->tbaseoff;
     129     6808183 :         bn->tvheap = b->tvheap;
     130             : 
     131     6808183 :         tp = VIEWtparent(b);
     132     6808183 :         if (tp == 0 && b->ttype != TYPE_void)
     133     5740491 :                 tp = b->batCacheid;
     134     6808183 :         assert(b->ttype != TYPE_void || !tp);
     135     6808183 :         HEAPincref(b->theap);
     136     6808134 :         if (b->tvheap)
     137     1471430 :                 HEAPincref(b->tvheap);
     138     6808149 :         MT_lock_unset(&b->theaplock);
     139             : 
     140     6807499 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {      /* enter in BBP */
     141           0 :                 if (bn->tvheap)
     142           0 :                         HEAPdecref(bn->tvheap, false);
     143           0 :                 HEAPdecref(bn->theap, false);
     144           0 :                 MT_lock_destroy(&bn->theaplock);
     145           0 :                 MT_lock_destroy(&bn->batIdxLock);
     146           0 :                 MT_rwlock_destroy(&bn->thashlock);
     147           0 :                 GDKfree(bn);
     148           0 :                 return NULL;
     149             :         }
     150     6807723 :         BBPretain(bn->theap->parentid);
     151     6807437 :         if (bn->tvheap)
     152     1471382 :                 BBPretain(bn->tvheap->parentid);
     153     6807160 :         TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
     154             :                   ALGOBATPAR(b), ALGOBATPAR(bn));
     155             :         return bn;
     156             : }
     157             : 
     158             : /*
     159             :  * The BATmaterialize routine produces in-place materialized version
     160             :  * of a void bat (which should not be a VIEW) (later we should add the
     161             :  * code for VIEWs).
     162             :  */
     163             : 
     164             : gdk_return
     165       13355 : BATmaterialize(BAT *b, BUN cap)
     166             : {
     167       13355 :         Heap *tail;
     168       13355 :         Heap *h, *vh = NULL;
     169       13355 :         BUN p, q;
     170       13355 :         oid t, *x;
     171             : 
     172       13355 :         BATcheck(b, GDK_FAIL);
     173       13355 :         assert(!isVIEW(b));
     174       13355 :         if (cap == BUN_NONE || cap < BATcapacity(b))
     175       12221 :                 cap = BATcapacity(b);
     176       13355 :         if (b->ttype != TYPE_void) {
     177             :                 /* no voids; just call BATextend to make sure of capacity */
     178       12221 :                 return BATextend(b, cap);
     179             :         }
     180             : 
     181        1134 :         if ((tail = GDKmalloc(sizeof(Heap))) == NULL)
     182             :                 return GDK_FAIL;
     183        1134 :         p = 0;
     184        1134 :         q = BATcount(b);
     185        1134 :         assert(cap >= q - p);
     186        1134 :         TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
     187             : 
     188             :         /* cleanup possible ACC's */
     189        1134 :         HASHdestroy(b);
     190        1134 :         IMPSdestroy(b);
     191        1134 :         OIDXdestroy(b);
     192             : 
     193        2268 :         *tail = (Heap) {
     194        1134 :                 .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
     195        1134 :                 .parentid = b->batCacheid,
     196             :                 .dirty = true,
     197             :         };
     198        1134 :         settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
     199        1134 :         if (HEAPalloc(tail, cap, sizeof(oid)) != GDK_SUCCEED) {
     200           0 :                 GDKfree(tail);
     201           0 :                 return GDK_FAIL;
     202             :         }
     203        1134 :         x = (oid *) tail->base;
     204        1134 :         t = b->tseqbase;
     205        1134 :         if (is_oid_nil(t)) {
     206           0 :                 for (p = 0; p < q; p++)
     207           0 :                         x[p] = oid_nil;
     208             :         } else {
     209     4855504 :                 for (p = 0; p < q; p++)
     210     4854370 :                         x[p] = t++;
     211             :         }
     212        1134 :         ATOMIC_INIT(&tail->refs, 1);
     213             :         /* point of no return */
     214        1134 :         MT_lock_set(&b->theaplock);
     215        1134 :         assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 0);
     216             :         /* can only look at tvheap when lock is held */
     217        1134 :         if (complex_cand(b)) {
     218           0 :                 assert(b->batRole == TRANSIENT);
     219           0 :                 if (negoid_cand(b)) {
     220           0 :                         assert(ccand_free(b) % SIZEOF_OID == 0);
     221           0 :                         BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
     222           0 :                         const oid *exc = (const oid *) ccand_first(b);
     223           0 :                         BUN i;
     224           0 :                         for (p = 0, i = 0; p < q; p++) {
     225           0 :                                 while (i < nexc && t == exc[i]) {
     226           0 :                                         i++;
     227           0 :                                         t++;
     228             :                                 }
     229           0 :                                 x[p] = t++;
     230             :                         }
     231             :                 } else {
     232           0 :                         assert(mask_cand(b));
     233           0 :                         BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
     234           0 :                         const uint32_t *src = (const uint32_t *) ccand_first(b);
     235           0 :                         BUN n = 0;
     236           0 :                         t -= (oid) CCAND(b)->firstbit;
     237           0 :                         for (p = 0; p < nmsk; p++) {
     238           0 :                                 uint32_t val = src[p];
     239           0 :                                 if (val == 0)
     240           0 :                                         continue;
     241           0 :                                 for (uint32_t i = 0; i < 32; i++) {
     242           0 :                                         if (val & (1U << i)) {
     243           0 :                                                 assert(n < q);
     244           0 :                                                 x[n++] = t + p * 32 + i;
     245             :                                         }
     246             :                                 }
     247             :                         }
     248           0 :                         assert(n == q);
     249             :                 }
     250           0 :                 vh = b->tvheap;
     251           0 :                 b->tvheap = NULL;
     252             :         }
     253        1134 :         h = b->theap;
     254        1134 :         b->theap = tail;
     255        1134 :         b->tbaseoff = 0;
     256        1134 :         b->theap->dirty = true;
     257        1134 :         b->tunique_est = is_oid_nil(t) ? 1.0 : (double) b->batCount;
     258        1134 :         b->ttype = TYPE_oid;
     259        1134 :         BATsetdims(b, 0);
     260        1134 :         BATsetcount(b, b->batCount);
     261        1134 :         BATsetcapacity(b, cap);
     262        1134 :         MT_lock_unset(&b->theaplock);
     263        1134 :         if (h->parentid != b->batCacheid)
     264           0 :                 BBPrelease(h->parentid);
     265        1134 :         HEAPdecref(h, false);
     266        1134 :         if (vh) {
     267           0 :                 if (vh->parentid != b->batCacheid)
     268           0 :                         BBPrelease(vh->parentid);
     269           0 :                 HEAPdecref(vh, true);
     270             :         }
     271             : 
     272             :         return GDK_SUCCEED;
     273             : }
     274             : 
     275             : /*
     276             :  * The remainder are utilities to manipulate the BAT view and not to
     277             :  * forget some details in the process.  It expects a position range in
     278             :  * the underlying BAT and compensates for outliers.
     279             :  */
     280             : void
     281     6795756 : VIEWboundsbi(BATiter *bi, BAT *view, BUN l, BUN h)
     282             : {
     283     6795756 :         BUN cnt;
     284     6795756 :         BUN baseoff;
     285             : 
     286     6795756 :         if (bi == NULL || view == NULL)
     287             :                 return;
     288     6795756 :         if (h > bi->count)
     289             :                 h = bi->count;
     290     6795756 :         baseoff = bi->baseoff;
     291     6795756 :         if (h < l)
     292             :                 h = l;
     293     6795756 :         cnt = h - l;
     294     6795756 :         view->batInserted = 0;
     295     6795756 :         if (view->ttype != TYPE_void) {
     296     6794128 :                 view->tbaseoff = baseoff + l;
     297             :         }
     298     6795756 :         BATsetcount(view, cnt);
     299     6795052 :         BATsetcapacity(view, cnt);
     300     6795683 :         if (view->tnosorted > l && view->tnosorted < l + cnt)
     301     3319203 :                 view->tnosorted -= l;
     302             :         else
     303     3476480 :                 view->tnosorted = 0;
     304     6795683 :         if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
     305     4207838 :                 view->tnorevsorted -= l;
     306             :         else
     307     2587845 :                 view->tnorevsorted = 0;
     308     6795683 :         if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
     309     6065290 :             view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
     310             :             view->tnokey[0] != view->tnokey[1]) {
     311     2436529 :                 view->tnokey[0] -= l;
     312     2436529 :                 view->tnokey[1] -= l;
     313             :         } else {
     314     4359154 :                 view->tnokey[0] = view->tnokey[1] = 0;
     315             :         }
     316     6795683 :         if (view->tminpos >= l && view->tminpos < l + cnt)
     317     5400041 :                 view->tminpos -= l;
     318             :         else
     319     1395642 :                 view->tminpos = BUN_NONE;
     320     6795683 :         if (view->tmaxpos >= l && view->tmaxpos < l + cnt)
     321     5284286 :                 view->tmaxpos -= l;
     322             :         else
     323     1511397 :                 view->tmaxpos = BUN_NONE;
     324     6795683 :         view->tkey |= cnt <= 1;
     325             : }
     326             : void
     327           3 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
     328             : {
     329           3 :         BATiter bi = bat_iterator(b);
     330           3 :         VIEWboundsbi(&bi, view, l, h);
     331           3 :         bat_iterator_end(&bi);
     332           3 : }
     333             : 
     334             : /*
     335             :  * Destroy a view.
     336             :  */
     337             : void
     338           0 : VIEWdestroy(BAT *b)
     339             : {
     340           0 :         assert(isVIEW(b));
     341           0 :         bat tp = 0, tvp = 0;
     342             : 
     343             :         /* remove any leftover private hash structures */
     344           0 :         HASHdestroy(b);
     345           0 :         IMPSdestroy(b);
     346           0 :         OIDXdestroy(b);
     347           0 :         STRMPdestroy(b);
     348           0 :         RTREEdestroy(b);
     349             : 
     350           0 :         MT_lock_set(&b->theaplock);
     351           0 :         PROPdestroy_nolock(b);
     352             :         /* heaps that are left after VIEWunlink are ours, so need to be
     353             :          * destroyed (and files deleted) */
     354           0 :         if (b->theap) {
     355           0 :                 tp = b->theap->parentid;
     356           0 :                 HEAPdecref(b->theap, tp == b->batCacheid);
     357           0 :                 b->theap = NULL;
     358             :         }
     359           0 :         if (b->tvheap) {
     360             :                 /* should never happen: if this heap exists, then it was
     361             :                  * our own (not a view), and then it doesn't make sense
     362             :                  * that the offset heap was a view (at least one of them
     363             :                  * had to be) */
     364           0 :                 tvp = b->tvheap->parentid;
     365           0 :                 HEAPdecref(b->tvheap, tvp == b->batCacheid);
     366           0 :                 b->tvheap = NULL;
     367             :         }
     368           0 :         MT_lock_unset(&b->theaplock);
     369           0 :         if (tp != 0 && tp != b->batCacheid)
     370           0 :                 BBPrelease(tp);
     371           0 :         if (tvp != 0 && tvp != b->batCacheid)
     372           0 :                 BBPrelease(tvp);
     373           0 :         BATfree(b);
     374           0 : }

Generated by: LCOV version 1.14