LCOV - code coverage report
Current view: top level - gdk - gdk_align.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 132 208 63.5 %
Date: 2024-12-20 21:24:02 Functions: 4 6 66.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * @a Peter Boncz, Niels Nes
      15             :  * @* BAT Alignment
      16             :  * For BATs that result from a n-ary relational scheme it may help to
      17             :  * align the BATs on their head value. In particular, it permits
      18             :  * replacing a hash-join by a merge-join, which is significantly
      19             :  * faster on large tables. Especially if the BATs involved cause page
      20             :  * activity or when you can not afford the large hash structures to
      21             :  * speed-up processing.
      22             :  *
      23             :  * For orthogonality, we support alignment between arbitrary columns
      24             :  * (head or tail).
      25             :  *
      26             :  * All standard GDK set-calls update the alignment info in their
      27             :  * respective ways. For example, the routine @emph{BUNclustercopy}
      28             :  * shuffles the first argument, such that the BUNs are in the same
      29             :  * order as those in the second argument.  This operation will mark
      30             :  * both columns of the first @emph{BAT} as synced with the second
      31             :  * (likewise, @emph{Colcopy()}, which makes a copy, instead of
      32             :  * in-place shuffling, has the same alignment effect.
      33             :  *
      34             :  * Each alignment sequence is given a unique identifier, so as to
      35             :  * easily detect this situation. It is retained in the @emph{BAT
      36             :  * descriptor}.
      37             :  * @+ Alignment Design Considerations
      38             :  * Alignment primitives require the right hooks to be inserted in
      39             :  * several places in the GDK, apart form this file:
      40             :  * @itemize
      41             :  * @item @emph{ BUN update operations}.
      42             :  * The updated BATs have to be marked as un-aligned.
      43             :  * @item @emph{ set operations}.
      44             :  * For most relational operations some statements can be made about
      45             :  * the size and order of the BATs they produce. This information can
      46             :  * be formalized by indicating alignment information automatically.
      47             :  * @item @emph{ transaction operations}.
      48             :  * Alignment statuses must be kept consistent under database commits
      49             :  * and aborts.
      50             :  * @end itemize
      51             :  */
      52             : #include "monetdb_config.h"
      53             : #include "gdk.h"
      54             : #include "gdk_private.h"
      55             : 
      56             : /* Return TRUE if the two BATs are aligned (same size, same
      57             :  * hseqbase). */
      58             : int
      59           0 : ALIGNsynced(BAT *b1, BAT *b2)
      60             : {
      61           0 :         if (b1 == NULL || b2 == NULL)
      62             :                 return 0;
      63             : 
      64           0 :         assert(!is_oid_nil(b1->hseqbase));
      65           0 :         assert(!is_oid_nil(b2->hseqbase));
      66             : 
      67           0 :         return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
      68             : }
      69             : 
      70             : /*
      71             :  * @+ View BATS
      72             :  * The general routine for getting a 'view' BAT upon another BAT is
      73             :  * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
      74             :  * support for this), you can then make vertical slices.
      75             :  *
      76             :  * It is possible to create a view on a writable BAT. Updates in the
      77             :  * parent are then automatically reflected in the VIEW.  Note that the
      78             :  * VIEW bat itself can never be modified.
      79             :  *
      80             :  * Horizontal views should only be given out on a view BAT, but only
      81             :  * if it is dead sure the parent BAT is read-only.  This because they
      82             :  * cannot physically share the batBuns heap with the parent, as they
      83             :  * need a modified version.
      84             :  */
      85             : static void
      86     3074387 : VIEWboundsbi(BATiter *bi, BAT *view, BUN l, BUN h)
      87             : {
      88     3074387 :         BUN cnt;
      89     3074387 :         BUN baseoff;
      90             : 
      91     3074387 :         if (bi == NULL || view == NULL)
      92             :                 return;
      93     3074387 :         if (h > bi->count)
      94             :                 h = bi->count;
      95     3074387 :         baseoff = bi->baseoff;
      96     3074387 :         if (h < l)
      97             :                 h = l;
      98     3074387 :         cnt = h - l;
      99     3074387 :         if (view->ttype != TYPE_void) {
     100     3074709 :                 view->tbaseoff = baseoff + l;
     101             :         }
     102     3074387 :         if (!is_oid_nil(view->tseqbase))
     103          62 :                 view->tseqbase += l;
     104     3074387 :         BATsetcount(view, cnt);
     105     3056445 :         BATsetcapacity(view, cnt);
     106     3055756 :         if (view->tnosorted > l && view->tnosorted < l + cnt)
     107      528850 :                 view->tnosorted -= l;
     108             :         else
     109     2526906 :                 view->tnosorted = 0;
     110     3055756 :         if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
     111      617937 :                 view->tnorevsorted -= l;
     112             :         else
     113     2437819 :                 view->tnorevsorted = 0;
     114     3055756 :         if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
     115     1721977 :             view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
     116             :             view->tnokey[0] != view->tnokey[1]) {
     117     1267960 :                 view->tnokey[0] -= l;
     118     1267960 :                 view->tnokey[1] -= l;
     119             :         } else {
     120     1787796 :                 view->tnokey[0] = view->tnokey[1] = 0;
     121             :         }
     122     3055756 :         if (view->tminpos >= l && view->tminpos < l + cnt)
     123     1309236 :                 view->tminpos -= l;
     124             :         else
     125     1746520 :                 view->tminpos = BUN_NONE;
     126     3055756 :         if (view->tmaxpos >= l && view->tmaxpos < l + cnt)
     127     1151452 :                 view->tmaxpos -= l;
     128             :         else
     129     1904304 :                 view->tmaxpos = BUN_NONE;
     130     3055756 :         view->tkey |= cnt <= 1;
     131     3055756 :         view->tnil = false;  /* we don't know */
     132             : }
     133             : 
     134             : void
     135           2 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
     136             : {
     137           2 :         BATiter bi = bat_iterator(b);
     138           2 :         VIEWboundsbi(&bi, view, l, h);
     139           2 :         bat_iterator_end(&bi);
     140           2 : }
     141             : 
     142             : BAT *
     143    12047362 : VIEWcreate(oid seq, BAT *b, BUN l, BUN h)
     144             : {
     145    12047362 :         BAT *bn;
     146    12047362 :         bat tp = 0;
     147             : 
     148    12047362 :         BATcheck(b, NULL);
     149             : 
     150    12047362 :         if (b->ttype == TYPE_void) {
     151             :                 /* we don't do views on void bats */
     152        1906 :                 if (h > b->batCount)
     153             :                         h = b->batCount;
     154        1906 :                 if (l > h)
     155           0 :                         l = h = 0;
     156        1906 :                 return BATdense(seq, b->tseqbase + l, h - l);
     157             :         }
     158             : 
     159    12045456 :         bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
     160    12005262 :         if (bn == NULL)
     161             :                 return NULL;
     162    12005262 :         assert(bn->theap == NULL);
     163             : 
     164    12005262 :         MT_lock_set(&b->theaplock);
     165    12033122 :         BATiter bi = bat_iterator_nolock(b);
     166    12033122 :         bn->batInserted = 0;
     167    12033122 :         bn->batCount = bi.count;
     168    12033122 :         bn->batCapacity = b->batCapacity;
     169    12033122 :         bn->batRestricted = BAT_READ;
     170             : 
     171             :         /* the T column descriptor is fully copied except for the
     172             :          * accelerator data. We need copies because in case of a mark,
     173             :          * we are going to override a column with a void. */
     174    12033122 :         bn->tkey = bi.key;
     175    12033122 :         bn->tseqbase = bi.tseq;
     176    12033122 :         bn->tsorted = bi.sorted;
     177    12033122 :         bn->trevsorted = bi.revsorted;
     178    12033122 :         bn->twidth = bi.width;
     179    12033122 :         bn->tshift = bi.shift;
     180    12033122 :         bn->tnonil = bi.nonil;
     181    12033122 :         bn->tnil = bi.nil;
     182    12033122 :         bn->tascii = bi.ascii;
     183    12033122 :         bn->tnokey[0] = bi.nokey[0];
     184    12033122 :         bn->tnokey[1] = bi.nokey[1];
     185    12033122 :         bn->tnosorted = bi.nosorted;
     186    12033122 :         bn->tnorevsorted = bi.norevsorted;
     187    12033122 :         bn->tminpos = bi.minpos;
     188    12033122 :         bn->tmaxpos = bi.maxpos;
     189    12033122 :         bn->tunique_est = bi.unique_est;
     190    12033122 :         bn->theap = bi.h;
     191    12033122 :         bn->tbaseoff = bi.baseoff;
     192    12033122 :         bn->tvheap = bi.vh;
     193             : 
     194    12033122 :         tp = VIEWtparent(b);
     195    12033122 :         if (tp == 0 && b->ttype != TYPE_void)
     196    10066114 :                 tp = b->batCacheid;
     197    12033122 :         assert(b->ttype != TYPE_void || !tp);
     198    12033122 :         HEAPincref(bi.h);
     199    12066680 :         if (bi.vh)
     200     2541219 :                 HEAPincref(bi.vh);
     201    12065498 :         if (l != 0 || h < bi.count)
     202     3079240 :                 VIEWboundsbi(&bi, bn, l, h);
     203    12049270 :         MT_lock_unset(&b->theaplock);
     204             : 
     205    12067941 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {      /* enter in BBP */
     206           0 :                 if (bn->tvheap)
     207           0 :                         HEAPdecref(bn->tvheap, false);
     208           0 :                 HEAPdecref(bn->theap, false);
     209           0 :                 MT_lock_destroy(&bn->theaplock);
     210           0 :                 MT_lock_destroy(&bn->batIdxLock);
     211           0 :                 MT_rwlock_destroy(&bn->thashlock);
     212           0 :                 GDKfree(bn);
     213           0 :                 return NULL;
     214             :         }
     215    12066260 :         BBPretain(bn->theap->parentid);
     216    12064398 :         if (bn->tvheap)
     217     2541209 :                 BBPretain(bn->tvheap->parentid);
     218    12065049 :         TRC_DEBUG(ALGO, ALGOBATFMT " " BUNFMT "," BUNFMT " -> " ALGOBATFMT "\n",
     219             :                   ALGOBATPAR(b), l, h, ALGOBATPAR(bn));
     220             :         return bn;
     221             : }
     222             : 
     223             : /*
     224             :  * The BATmaterialize routine produces in-place materialized version
     225             :  * of a void bat (which should not be a VIEW) (later we should add the
     226             :  * code for VIEWs).
     227             :  */
     228             : 
     229             : gdk_return
     230       17508 : BATmaterialize(BAT *b, BUN cap)
     231             : {
     232       17508 :         Heap *tail;
     233       17508 :         Heap *h, *vh = NULL;
     234       17508 :         BUN p, q;
     235       17508 :         oid t, *x;
     236             : 
     237       17508 :         BATcheck(b, GDK_FAIL);
     238       17508 :         assert(!isVIEW(b));
     239       17508 :         if (cap == BUN_NONE || cap < BATcapacity(b))
     240       13660 :                 cap = BATcapacity(b);
     241       17508 :         MT_lock_set(&b->theaplock);
     242       17517 :         if (b->ttype != TYPE_void) {
     243             :                 /* no voids; just call BATextend to make sure of capacity */
     244       13669 :                 MT_lock_unset(&b->theaplock);
     245       13671 :                 return BATextend(b, cap);
     246             :         }
     247             : 
     248        3848 :         if ((tail = GDKmalloc(sizeof(Heap))) == NULL) {
     249           0 :                 MT_lock_unset(&b->theaplock);
     250           0 :                 return GDK_FAIL;
     251             :         }
     252        3848 :         p = 0;
     253        3848 :         q = BATcount(b);
     254        3848 :         assert(cap >= q - p);
     255        3848 :         TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
     256             : 
     257        7695 :         *tail = (Heap) {
     258        3848 :                 .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
     259        3847 :                 .parentid = b->batCacheid,
     260             :                 .dirty = true,
     261             :                 .refs = ATOMIC_VAR_INIT(1),
     262             :         };
     263        3847 :         settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
     264        3848 :         if (HEAPalloc(tail, cap, sizeof(oid)) != GDK_SUCCEED) {
     265           0 :                 MT_lock_unset(&b->theaplock);
     266           0 :                 GDKfree(tail);
     267           0 :                 return GDK_FAIL;
     268             :         }
     269        3848 :         x = (oid *) tail->base;
     270        3848 :         t = b->tseqbase;
     271        3848 :         if (is_oid_nil(t)) {
     272           0 :                 for (p = 0; p < q; p++)
     273           0 :                         x[p] = oid_nil;
     274             :         } else {
     275    23815433 :                 for (p = 0; p < q; p++)
     276    23811585 :                         x[p] = t++;
     277             :         }
     278             :         /* point of no return */
     279        3848 :         assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 0);
     280             :         /* can only look at tvheap when lock is held */
     281        3848 :         if (complex_cand(b)) {
     282           0 :                 assert(b->batRole == TRANSIENT);
     283           0 :                 if (negoid_cand(b)) {
     284           0 :                         assert(ccand_free(b) % SIZEOF_OID == 0);
     285           0 :                         BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
     286           0 :                         const oid *exc = (const oid *) ccand_first(b);
     287           0 :                         BUN i;
     288           0 :                         for (p = 0, i = 0; p < q; p++) {
     289           0 :                                 while (i < nexc && t == exc[i]) {
     290           0 :                                         i++;
     291           0 :                                         t++;
     292             :                                 }
     293           0 :                                 x[p] = t++;
     294             :                         }
     295             :                 } else {
     296           0 :                         assert(mask_cand(b));
     297           0 :                         BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
     298           0 :                         const uint32_t *src = (const uint32_t *) ccand_first(b);
     299           0 :                         BUN n = 0;
     300           0 :                         t -= (oid) CCAND(b)->firstbit;
     301           0 :                         for (p = 0; p < nmsk; p++) {
     302           0 :                                 uint32_t val = src[p];
     303           0 :                                 if (val == 0)
     304           0 :                                         continue;
     305           0 :                                 for (uint32_t i = 0; i < 32; i++) {
     306           0 :                                         if (val & (1U << i)) {
     307           0 :                                                 assert(n < q);
     308           0 :                                                 x[n++] = t + p * 32 + i;
     309             :                                         }
     310             :                                 }
     311             :                         }
     312           0 :                         assert(n == q);
     313             :                 }
     314           0 :                 vh = b->tvheap;
     315           0 :                 b->tvheap = NULL;
     316             :         }
     317        3848 :         h = b->theap;
     318        3848 :         b->theap = tail;
     319        3848 :         b->tbaseoff = 0;
     320        3848 :         b->theap->dirty = true;
     321        3848 :         b->tunique_est = is_oid_nil(t) ? 1.0 : (double) b->batCount;
     322        3848 :         b->ttype = TYPE_oid;
     323        3848 :         BATsetdims(b, 0);
     324        3848 :         BATsetcount(b, b->batCount);
     325        3847 :         BATsetcapacity(b, cap);
     326        3847 :         MT_lock_unset(&b->theaplock);
     327        3848 :         if (h->parentid != b->batCacheid)
     328           0 :                 BBPrelease(h->parentid);
     329        3848 :         HEAPdecref(h, false);
     330        3848 :         if (vh) {
     331           0 :                 if (vh->parentid != b->batCacheid)
     332           0 :                         BBPrelease(vh->parentid);
     333           0 :                 HEAPdecref(vh, true);
     334             :         }
     335             : 
     336             :         return GDK_SUCCEED;
     337             : }
     338             : 
     339             : /*
     340             :  * Destroy a view.
     341             :  */
     342             : void
     343           0 : VIEWdestroy(BAT *b)
     344             : {
     345           0 :         assert(isVIEW(b));
     346           0 :         bat tp = 0, tvp = 0;
     347             : 
     348             :         /* remove any leftover private hash structures */
     349           0 :         HASHdestroy(b);
     350           0 :         OIDXdestroy(b);
     351           0 :         STRMPdestroy(b);
     352           0 :         RTREEdestroy(b);
     353             : 
     354           0 :         MT_lock_set(&b->theaplock);
     355           0 :         PROPdestroy_nolock(b);
     356             :         /* heaps that are left after VIEWunlink are ours, so need to be
     357             :          * destroyed (and files deleted) */
     358           0 :         if (b->theap) {
     359           0 :                 tp = b->theap->parentid;
     360           0 :                 HEAPdecref(b->theap, tp == b->batCacheid);
     361           0 :                 b->theap = NULL;
     362             :         }
     363           0 :         if (b->tvheap) {
     364             :                 /* should never happen: if this heap exists, then it was
     365             :                  * our own (not a view), and then it doesn't make sense
     366             :                  * that the offset heap was a view (at least one of them
     367             :                  * had to be) */
     368           0 :                 tvp = b->tvheap->parentid;
     369           0 :                 HEAPdecref(b->tvheap, tvp == b->batCacheid);
     370           0 :                 b->tvheap = NULL;
     371             :         }
     372           0 :         MT_lock_unset(&b->theaplock);
     373           0 :         if (tp != 0 && tp != b->batCacheid)
     374           0 :                 BBPrelease(tp);
     375           0 :         if (tvp != 0 && tvp != b->batCacheid)
     376           0 :                 BBPrelease(tvp);
     377           0 :         BATfree(b);
     378           0 : }

Generated by: LCOV version 1.14