LCOV - code coverage report
Current view: top level - gdk - gdk_align.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 132 209 63.2 %
Date: 2024-11-14 20:04:02 Functions: 4 6 66.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a Peter Boncz, Niels Nes
      15             :  * @* BAT Alignment
      16             :  * For BATs that result from a n-ary relational scheme it may help to
      17             :  * align the BATs on their head value. In particular, it permits
      18             :  * replacing a hash-join by a merge-join, which is significantly
      19             :  * faster on large tables. Especially if the BATs involved cause page
      20             :  * activity or when you can not afford the large hash structures to
      21             :  * speed-up processing.
      22             :  *
      23             :  * For orthogonality, we support alignment between arbitrary columns
      24             :  * (head or tail).
      25             :  *
      26             :  * All standard GDK set-calls update the alignment info in their
      27             :  * respective ways. For example, the routine @emph{BUNclustercopy}
      28             :  * shuffles the first argument, such that the BUNs are in the same
      29             :  * order as those in the second argument.  This operation will mark
      30             :  * both columns of the first @emph{BAT} as synced with the second
      31             :  * (likewise, @emph{Colcopy()}, which makes a copy, instead of
      32             :  * in-place shuffling, has the same alignment effect.
      33             :  *
      34             :  * Each alignment sequence is given a unique identifier, so as to
      35             :  * easily detect this situation. It is retained in the @emph{BAT
      36             :  * descriptor}.
      37             :  * @+ Alignment Design Considerations
      38             :  * Alignment primitives require the right hooks to be inserted in
      39             :  * several places in the GDK, apart form this file:
      40             :  * @itemize
      41             :  * @item @emph{ BUN update operations}.
      42             :  * The updated BATs have to be marked as un-aligned.
      43             :  * @item @emph{ set operations}.
      44             :  * For most relational operations some statements can be made about
      45             :  * the size and order of the BATs they produce. This information can
      46             :  * be formalized by indicating alignment information automatically.
      47             :  * @item @emph{ transaction operations}.
      48             :  * Alignment statuses must be kept consistent under database commits
      49             :  * and aborts.
      50             :  * @end itemize
      51             :  */
      52             : #include "monetdb_config.h"
      53             : #include "gdk.h"
      54             : #include "gdk_private.h"
      55             : 
      56             : /* Return TRUE if the two BATs are aligned (same size, same
      57             :  * hseqbase). */
      58             : int
      59           0 : ALIGNsynced(BAT *b1, BAT *b2)
      60             : {
      61           0 :         if (b1 == NULL || b2 == NULL)
      62             :                 return 0;
      63             : 
      64           0 :         assert(!is_oid_nil(b1->hseqbase));
      65           0 :         assert(!is_oid_nil(b2->hseqbase));
      66             : 
      67           0 :         return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
      68             : }
      69             : 
      70             : /*
      71             :  * @+ View BATS
      72             :  * The general routine for getting a 'view' BAT upon another BAT is
      73             :  * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
      74             :  * support for this), you can then make vertical slices.
      75             :  *
      76             :  * It is possible to create a view on a writable BAT. Updates in the
      77             :  * parent are then automatically reflected in the VIEW.  Note that the
      78             :  * VIEW bat itself can never be modified.
      79             :  *
      80             :  * Horizontal views should only be given out on a view BAT, but only
      81             :  * if it is dead sure the parent BAT is read-only.  This because they
      82             :  * cannot physically share the batBuns heap with the parent, as they
      83             :  * need a modified version.
      84             :  */
      85             : static void
      86     1885641 : VIEWboundsbi(BATiter *bi, BAT *view, BUN l, BUN h)
      87             : {
      88     1885641 :         BUN cnt;
      89     1885641 :         BUN baseoff;
      90             : 
      91     1885641 :         if (bi == NULL || view == NULL)
      92             :                 return;
      93     1885641 :         if (h > bi->count)
      94             :                 h = bi->count;
      95     1885641 :         baseoff = bi->baseoff;
      96     1885641 :         if (h < l)
      97             :                 h = l;
      98     1885641 :         cnt = h - l;
      99     1885641 :         if (view->ttype != TYPE_void) {
     100     1885665 :                 view->tbaseoff = baseoff + l;
     101             :         }
     102     1885641 :         if (!is_oid_nil(view->tseqbase))
     103          61 :                 view->tseqbase += l;
     104     1885641 :         BATsetcount(view, cnt);
     105     1885411 :         BATsetcapacity(view, cnt);
     106     1885061 :         if (view->tnosorted > l && view->tnosorted < l + cnt)
     107      434621 :                 view->tnosorted -= l;
     108             :         else
     109     1450440 :                 view->tnosorted = 0;
     110     1885061 :         if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
     111      503261 :                 view->tnorevsorted -= l;
     112             :         else
     113     1381800 :                 view->tnorevsorted = 0;
     114     1885061 :         if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
     115     1229815 :             view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
     116             :             view->tnokey[0] != view->tnokey[1]) {
     117      853046 :                 view->tnokey[0] -= l;
     118      853046 :                 view->tnokey[1] -= l;
     119             :         } else {
     120     1032015 :                 view->tnokey[0] = view->tnokey[1] = 0;
     121             :         }
     122     1885061 :         if (view->tminpos >= l && view->tminpos < l + cnt)
     123      875492 :                 view->tminpos -= l;
     124             :         else
     125     1009569 :                 view->tminpos = BUN_NONE;
     126     1885061 :         if (view->tmaxpos >= l && view->tmaxpos < l + cnt)
     127      734911 :                 view->tmaxpos -= l;
     128             :         else
     129     1150150 :                 view->tmaxpos = BUN_NONE;
     130     1885061 :         view->tkey |= cnt <= 1;
     131     1885061 :         view->tnil = false;  /* we don't know */
     132             : }
     133             : 
     134             : void
     135           2 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
     136             : {
     137           2 :         BATiter bi = bat_iterator(b);
     138           2 :         VIEWboundsbi(&bi, view, l, h);
     139           2 :         bat_iterator_end(&bi);
     140           2 : }
     141             : 
     142             : BAT *
     143     9924187 : VIEWcreate(oid seq, BAT *b, BUN l, BUN h)
     144             : {
     145     9924187 :         BAT *bn;
     146     9924187 :         bat tp = 0;
     147             : 
     148     9924187 :         BATcheck(b, NULL);
     149             : 
     150     9924187 :         if (b->ttype == TYPE_void) {
     151             :                 /* we don't do views on void bats */
     152        1897 :                 if (h > b->batCount)
     153             :                         h = b->batCount;
     154        1897 :                 if (l > h)
     155           0 :                         l = h = 0;
     156        1897 :                 return BATdense(seq, b->tseqbase + l, h - l);
     157             :         }
     158             : 
     159     9922290 :         bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
     160     9922155 :         if (bn == NULL)
     161             :                 return NULL;
     162     9922155 :         assert(bn->theap == NULL);
     163             : 
     164     9922155 :         MT_lock_set(&b->theaplock);
     165     9922790 :         BATiter bi = bat_iterator_nolock(b);
     166     9922790 :         bn->batInserted = 0;
     167     9922790 :         bn->batCount = bi.count;
     168     9922790 :         bn->batCapacity = b->batCapacity;
     169     9922790 :         bn->batRestricted = BAT_READ;
     170             : 
     171             :         /* the T column descriptor is fully copied except for the
     172             :          * accelerator data. We need copies because in case of a mark,
     173             :          * we are going to override a column with a void. */
     174     9922790 :         bn->tkey = bi.key;
     175     9922790 :         bn->tseqbase = bi.tseq;
     176     9922790 :         bn->tsorted = bi.sorted;
     177     9922790 :         bn->trevsorted = bi.revsorted;
     178     9922790 :         bn->twidth = bi.width;
     179     9922790 :         bn->tshift = bi.shift;
     180     9922790 :         bn->tnonil = bi.nonil;
     181     9922790 :         bn->tnil = bi.nil;
     182     9922790 :         bn->tascii = bi.ascii;
     183     9922790 :         bn->tnokey[0] = bi.nokey[0];
     184     9922790 :         bn->tnokey[1] = bi.nokey[1];
     185     9922790 :         bn->tnosorted = bi.nosorted;
     186     9922790 :         bn->tnorevsorted = bi.norevsorted;
     187     9922790 :         bn->tminpos = bi.minpos;
     188     9922790 :         bn->tmaxpos = bi.maxpos;
     189     9922790 :         bn->tunique_est = bi.unique_est;
     190     9922790 :         bn->theap = bi.h;
     191     9922790 :         bn->tbaseoff = bi.baseoff;
     192     9922790 :         bn->tvheap = bi.vh;
     193             : 
     194     9922790 :         tp = VIEWtparent(b);
     195     9922790 :         if (tp == 0 && b->ttype != TYPE_void)
     196     8797836 :                 tp = b->batCacheid;
     197     9922790 :         assert(b->ttype != TYPE_void || !tp);
     198     9922790 :         HEAPincref(bi.h);
     199     9922259 :         if (bi.vh)
     200     2217738 :                 HEAPincref(bi.vh);
     201     9922486 :         if (l != 0 || h < bi.count)
     202     1885877 :                 VIEWboundsbi(&bi, bn, l, h);
     203     9921291 :         MT_lock_unset(&b->theaplock);
     204             : 
     205     9921353 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {      /* enter in BBP */
     206           0 :                 if (bn->tvheap)
     207           0 :                         HEAPdecref(bn->tvheap, false);
     208           0 :                 HEAPdecref(bn->theap, false);
     209           0 :                 MT_lock_destroy(&bn->theaplock);
     210           0 :                 MT_lock_destroy(&bn->batIdxLock);
     211           0 :                 MT_rwlock_destroy(&bn->thashlock);
     212           0 :                 GDKfree(bn);
     213           0 :                 return NULL;
     214             :         }
     215     9922032 :         BBPretain(bn->theap->parentid);
     216     9922425 :         if (bn->tvheap)
     217     2217746 :                 BBPretain(bn->tvheap->parentid);
     218     9922242 :         TRC_DEBUG(ALGO, ALGOBATFMT " " BUNFMT "," BUNFMT " -> " ALGOBATFMT "\n",
     219             :                   ALGOBATPAR(b), l, h, ALGOBATPAR(bn));
     220             :         return bn;
     221             : }
     222             : 
     223             : /*
     224             :  * The BATmaterialize routine produces in-place materialized version
     225             :  * of a void bat (which should not be a VIEW) (later we should add the
     226             :  * code for VIEWs).
     227             :  */
     228             : 
     229             : gdk_return
     230       18570 : BATmaterialize(BAT *b, BUN cap)
     231             : {
     232       18570 :         Heap *tail;
     233       18570 :         Heap *h, *vh = NULL;
     234       18570 :         BUN p, q;
     235       18570 :         oid t, *x;
     236             : 
     237       18570 :         BATcheck(b, GDK_FAIL);
     238       18570 :         assert(!isVIEW(b));
     239       18570 :         if (cap == BUN_NONE || cap < BATcapacity(b))
     240       12450 :                 cap = BATcapacity(b);
     241       18570 :         MT_lock_set(&b->theaplock);
     242       18570 :         if (b->ttype != TYPE_void) {
     243             :                 /* no voids; just call BATextend to make sure of capacity */
     244       12450 :                 MT_lock_unset(&b->theaplock);
     245       12450 :                 return BATextend(b, cap);
     246             :         }
     247             : 
     248        6120 :         if ((tail = GDKmalloc(sizeof(Heap))) == NULL) {
     249           0 :                 MT_lock_unset(&b->theaplock);
     250           0 :                 return GDK_FAIL;
     251             :         }
     252        6120 :         p = 0;
     253        6120 :         q = BATcount(b);
     254        6120 :         assert(cap >= q - p);
     255        6120 :         TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
     256             : 
     257       12240 :         *tail = (Heap) {
     258        6120 :                 .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
     259        6120 :                 .parentid = b->batCacheid,
     260             :                 .dirty = true,
     261             :                 .refs = ATOMIC_VAR_INIT(1),
     262             :         };
     263        6120 :         settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
     264        6120 :         if (HEAPalloc(tail, cap, sizeof(oid)) != GDK_SUCCEED) {
     265           0 :                 MT_lock_unset(&b->theaplock);
     266           0 :                 GDKfree(tail);
     267           0 :                 return GDK_FAIL;
     268             :         }
     269        6120 :         x = (oid *) tail->base;
     270        6120 :         t = b->tseqbase;
     271        6120 :         if (is_oid_nil(t)) {
     272           0 :                 for (p = 0; p < q; p++)
     273           0 :                         x[p] = oid_nil;
     274             :         } else {
     275    34617189 :                 for (p = 0; p < q; p++)
     276    34611069 :                         x[p] = t++;
     277             :         }
     278             :         /* point of no return */
     279        6120 :         assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 0);
     280             :         /* can only look at tvheap when lock is held */
     281        6120 :         if (complex_cand(b)) {
     282           0 :                 assert(b->batRole == TRANSIENT);
     283           0 :                 if (negoid_cand(b)) {
     284           0 :                         assert(ccand_free(b) % SIZEOF_OID == 0);
     285           0 :                         BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
     286           0 :                         const oid *exc = (const oid *) ccand_first(b);
     287           0 :                         BUN i;
     288           0 :                         for (p = 0, i = 0; p < q; p++) {
     289           0 :                                 while (i < nexc && t == exc[i]) {
     290           0 :                                         i++;
     291           0 :                                         t++;
     292             :                                 }
     293           0 :                                 x[p] = t++;
     294             :                         }
     295             :                 } else {
     296           0 :                         assert(mask_cand(b));
     297           0 :                         BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
     298           0 :                         const uint32_t *src = (const uint32_t *) ccand_first(b);
     299           0 :                         BUN n = 0;
     300           0 :                         t -= (oid) CCAND(b)->firstbit;
     301           0 :                         for (p = 0; p < nmsk; p++) {
     302           0 :                                 uint32_t val = src[p];
     303           0 :                                 if (val == 0)
     304           0 :                                         continue;
     305           0 :                                 for (uint32_t i = 0; i < 32; i++) {
     306           0 :                                         if (val & (1U << i)) {
     307           0 :                                                 assert(n < q);
     308           0 :                                                 x[n++] = t + p * 32 + i;
     309             :                                         }
     310             :                                 }
     311             :                         }
     312           0 :                         assert(n == q);
     313             :                 }
     314           0 :                 vh = b->tvheap;
     315           0 :                 b->tvheap = NULL;
     316             :         }
     317        6120 :         h = b->theap;
     318        6120 :         b->theap = tail;
     319        6120 :         b->tbaseoff = 0;
     320        6120 :         b->theap->dirty = true;
     321        6120 :         b->tunique_est = is_oid_nil(t) ? 1.0 : (double) b->batCount;
     322        6120 :         b->ttype = TYPE_oid;
     323        6120 :         BATsetdims(b, 0);
     324        6120 :         BATsetcount(b, b->batCount);
     325        6120 :         BATsetcapacity(b, cap);
     326        6120 :         MT_lock_unset(&b->theaplock);
     327        6120 :         if (h->parentid != b->batCacheid)
     328           0 :                 BBPrelease(h->parentid);
     329        6120 :         HEAPdecref(h, false);
     330        6120 :         if (vh) {
     331           0 :                 if (vh->parentid != b->batCacheid)
     332           0 :                         BBPrelease(vh->parentid);
     333           0 :                 HEAPdecref(vh, true);
     334             :         }
     335             : 
     336             :         return GDK_SUCCEED;
     337             : }
     338             : 
     339             : /*
     340             :  * Destroy a view.
     341             :  */
     342             : void
     343           0 : VIEWdestroy(BAT *b)
     344             : {
     345           0 :         assert(isVIEW(b));
     346           0 :         bat tp = 0, tvp = 0;
     347             : 
     348             :         /* remove any leftover private hash structures */
     349           0 :         HASHdestroy(b);
     350           0 :         IMPSdestroy(b);
     351           0 :         OIDXdestroy(b);
     352           0 :         STRMPdestroy(b);
     353           0 :         RTREEdestroy(b);
     354             : 
     355           0 :         MT_lock_set(&b->theaplock);
     356           0 :         PROPdestroy_nolock(b);
     357             :         /* heaps that are left after VIEWunlink are ours, so need to be
     358             :          * destroyed (and files deleted) */
     359           0 :         if (b->theap) {
     360           0 :                 tp = b->theap->parentid;
     361           0 :                 HEAPdecref(b->theap, tp == b->batCacheid);
     362           0 :                 b->theap = NULL;
     363             :         }
     364           0 :         if (b->tvheap) {
     365             :                 /* should never happen: if this heap exists, then it was
     366             :                  * our own (not a view), and then it doesn't make sense
     367             :                  * that the offset heap was a view (at least one of them
     368             :                  * had to be) */
     369           0 :                 tvp = b->tvheap->parentid;
     370           0 :                 HEAPdecref(b->tvheap, tvp == b->batCacheid);
     371           0 :                 b->tvheap = NULL;
     372             :         }
     373           0 :         MT_lock_unset(&b->theaplock);
     374           0 :         if (tp != 0 && tp != b->batCacheid)
     375           0 :                 BBPrelease(tp);
     376           0 :         if (tvp != 0 && tvp != b->batCacheid)
     377           0 :                 BBPrelease(tvp);
     378           0 :         BATfree(b);
     379           0 : }

Generated by: LCOV version 1.14