Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a Peter Boncz, Niels Nes
15 : * @* BAT Alignment
16 : * For BATs that result from a n-ary relational scheme it may help to
17 : * align the BATs on their head value. In particular, it permits
18 : * replacing a hash-join by a merge-join, which is significantly
19 : * faster on large tables. Especially if the BATs involved cause page
20 : * activity or when you can not afford the large hash structures to
21 : * speed-up processing.
22 : *
23 : * For orthogonality, we support alignment between arbitrary columns
24 : * (head or tail).
25 : *
26 : * All standard GDK set-calls update the alignment info in their
27 : * respective ways. For example, the routine @emph{BUNclustercopy}
28 : * shuffles the first argument, such that the BUNs are in the same
29 : * order as those in the second argument. This operation will mark
30 : * both columns of the first @emph{BAT} as synced with the second
31 : * (likewise, @emph{Colcopy()}, which makes a copy, instead of
32 : * in-place shuffling, has the same alignment effect.
33 : *
34 : * Each alignment sequence is given a unique identifier, so as to
35 : * easily detect this situation. It is retained in the @emph{BAT
36 : * descriptor}.
37 : * @+ Alignment Design Considerations
38 : * Alignment primitives require the right hooks to be inserted in
39 : * several places in the GDK, apart form this file:
40 : * @itemize
41 : * @item @emph{ BUN update operations}.
42 : * The updated BATs have to be marked as un-aligned.
43 : * @item @emph{ set operations}.
44 : * For most relational operations some statements can be made about
45 : * the size and order of the BATs they produce. This information can
46 : * be formalized by indicating alignment information automatically.
47 : * @item @emph{ transaction operations}.
48 : * Alignment statuses must be kept consistent under database commits
49 : * and aborts.
50 : * @end itemize
51 : */
52 : #include "monetdb_config.h"
53 : #include "gdk.h"
54 : #include "gdk_private.h"
55 :
56 : /* Return TRUE if the two BATs are aligned (same size, same
57 : * hseqbase). */
58 : int
59 0 : ALIGNsynced(BAT *b1, BAT *b2)
60 : {
61 0 : if (b1 == NULL || b2 == NULL)
62 : return 0;
63 :
64 0 : assert(!is_oid_nil(b1->hseqbase));
65 0 : assert(!is_oid_nil(b2->hseqbase));
66 :
67 0 : return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
68 : }
69 :
70 : /*
71 : * @+ View BATS
72 : * The general routine for getting a 'view' BAT upon another BAT is
73 : * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
74 : * support for this), you can then make vertical slices.
75 : *
76 : * It is possible to create a view on a writable BAT. Updates in the
77 : * parent are then automatically reflected in the VIEW. Note that the
78 : * VIEW bat itself can never be modified.
79 : *
80 : * Horizontal views should only be given out on a view BAT, but only
81 : * if it is dead sure the parent BAT is read-only. This because they
82 : * cannot physically share the batBuns heap with the parent, as they
83 : * need a modified version.
84 : */
85 : static void
86 2917606 : VIEWboundsbi(BATiter *bi, BAT *view, BUN l, BUN h)
87 : {
88 2917606 : BUN cnt;
89 2917606 : BUN baseoff;
90 :
91 2917606 : if (bi == NULL || view == NULL)
92 : return;
93 2917606 : if (h > bi->count)
94 : h = bi->count;
95 2917606 : baseoff = bi->baseoff;
96 2917606 : if (h < l)
97 : h = l;
98 2917606 : cnt = h - l;
99 2917606 : if (view->ttype != TYPE_void) {
100 2917641 : view->tbaseoff = baseoff + l;
101 : }
102 2917606 : if (!is_oid_nil(view->tseqbase))
103 62 : view->tseqbase += l;
104 2917606 : BATsetcount(view, cnt);
105 2906759 : BATsetcapacity(view, cnt);
106 2907104 : if (view->tnosorted > l && view->tnosorted < l + cnt)
107 509092 : view->tnosorted -= l;
108 : else
109 2398012 : view->tnosorted = 0;
110 2907104 : if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
111 594607 : view->tnorevsorted -= l;
112 : else
113 2312497 : view->tnorevsorted = 0;
114 2907104 : if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
115 1681594 : view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
116 : view->tnokey[0] != view->tnokey[1]) {
117 1243887 : view->tnokey[0] -= l;
118 1243887 : view->tnokey[1] -= l;
119 : } else {
120 1663217 : view->tnokey[0] = view->tnokey[1] = 0;
121 : }
122 2907104 : if (view->tminpos >= l && view->tminpos < l + cnt)
123 1293946 : view->tminpos -= l;
124 : else
125 1613158 : view->tminpos = BUN_NONE;
126 2907104 : if (view->tmaxpos >= l && view->tmaxpos < l + cnt)
127 1136752 : view->tmaxpos -= l;
128 : else
129 1770352 : view->tmaxpos = BUN_NONE;
130 2907104 : view->tkey |= cnt <= 1;
131 2907104 : view->tnil = false; /* we don't know */
132 : }
133 :
134 : void
135 2 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
136 : {
137 2 : BATiter bi = bat_iterator(b);
138 2 : VIEWboundsbi(&bi, view, l, h);
139 2 : bat_iterator_end(&bi);
140 2 : }
141 :
142 : BAT *
143 10503081 : VIEWcreate(oid seq, BAT *b, BUN l, BUN h)
144 : {
145 10503081 : BAT *bn;
146 10503081 : bat tp = 0;
147 :
148 10503081 : BATcheck(b, NULL);
149 :
150 10503081 : if (b->ttype == TYPE_void) {
151 : /* we don't do views on void bats */
152 1815 : if (h > b->batCount)
153 : h = b->batCount;
154 1815 : if (l > h)
155 0 : l = h = 0;
156 1815 : return BATdense(seq, b->tseqbase + l, h - l);
157 : }
158 :
159 10501266 : bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
160 10481587 : if (bn == NULL)
161 : return NULL;
162 10481587 : assert(bn->theap == NULL);
163 :
164 10481587 : MT_lock_set(&b->theaplock);
165 10498784 : BATiter bi = bat_iterator_nolock(b);
166 10498784 : bn->batInserted = 0;
167 10498784 : bn->batCount = bi.count;
168 10498784 : bn->batCapacity = b->batCapacity;
169 10498784 : bn->batRestricted = BAT_READ;
170 :
171 : /* the T column descriptor is fully copied except for the
172 : * accelerator data. We need copies because in case of a mark,
173 : * we are going to override a column with a void. */
174 10498784 : bn->tkey = bi.key;
175 10498784 : bn->tseqbase = bi.tseq;
176 10498784 : bn->tsorted = bi.sorted;
177 10498784 : bn->trevsorted = bi.revsorted;
178 10498784 : bn->twidth = bi.width;
179 10498784 : bn->tshift = bi.shift;
180 10498784 : bn->tnonil = bi.nonil;
181 10498784 : bn->tnil = bi.nil;
182 10498784 : bn->tascii = bi.ascii;
183 10498784 : bn->tnokey[0] = bi.nokey[0];
184 10498784 : bn->tnokey[1] = bi.nokey[1];
185 10498784 : bn->tnosorted = bi.nosorted;
186 10498784 : bn->tnorevsorted = bi.norevsorted;
187 10498784 : bn->tminpos = bi.minpos;
188 10498784 : bn->tmaxpos = bi.maxpos;
189 10498784 : bn->tunique_est = bi.unique_est;
190 10498784 : bn->theap = bi.h;
191 10498784 : bn->tbaseoff = bi.baseoff;
192 10498784 : bn->tvheap = bi.vh;
193 :
194 10498784 : tp = VIEWtparent(b);
195 10498784 : if (tp == 0 && b->ttype != TYPE_void)
196 8688873 : tp = b->batCacheid;
197 10498784 : assert(b->ttype != TYPE_void || !tp);
198 10498784 : HEAPincref(bi.h);
199 10516472 : if (bi.vh)
200 2091215 : HEAPincref(bi.vh);
201 10517414 : if (l != 0 || h < bi.count)
202 2921920 : VIEWboundsbi(&bi, bn, l, h);
203 10501840 : MT_lock_unset(&b->theaplock);
204 :
205 10518268 : if (BBPcacheit(bn, true) != GDK_SUCCEED) { /* enter in BBP */
206 0 : if (bn->tvheap)
207 0 : HEAPdecref(bn->tvheap, false);
208 0 : HEAPdecref(bn->theap, false);
209 0 : MT_lock_destroy(&bn->theaplock);
210 0 : MT_lock_destroy(&bn->batIdxLock);
211 0 : MT_rwlock_destroy(&bn->thashlock);
212 0 : GDKfree(bn);
213 0 : return NULL;
214 : }
215 10518586 : BBPretain(bn->theap->parentid);
216 10517021 : if (bn->tvheap)
217 2091208 : BBPretain(bn->tvheap->parentid);
218 10518419 : TRC_DEBUG(ALGO, ALGOBATFMT " " BUNFMT "," BUNFMT " -> " ALGOBATFMT "\n",
219 : ALGOBATPAR(b), l, h, ALGOBATPAR(bn));
220 : return bn;
221 : }
222 :
223 : /*
224 : * The BATmaterialize routine produces in-place materialized version
225 : * of a void bat (which should not be a VIEW) (later we should add the
226 : * code for VIEWs).
227 : */
228 :
229 : gdk_return
230 14345 : BATmaterialize(BAT *b, BUN cap)
231 : {
232 14345 : Heap *tail;
233 14345 : Heap *h, *vh = NULL;
234 14345 : BUN p, q;
235 14345 : oid t, *x;
236 :
237 14345 : BATcheck(b, GDK_FAIL);
238 14345 : assert(!isVIEW(b));
239 14345 : if (cap == BUN_NONE || cap < BATcapacity(b))
240 10410 : cap = BATcapacity(b);
241 14345 : MT_lock_set(&b->theaplock);
242 14347 : if (b->ttype != TYPE_void) {
243 : /* no voids; just call BATextend to make sure of capacity */
244 10412 : MT_lock_unset(&b->theaplock);
245 10413 : return BATextend(b, cap);
246 : }
247 :
248 3935 : if ((tail = GDKmalloc(sizeof(Heap))) == NULL) {
249 0 : MT_lock_unset(&b->theaplock);
250 0 : return GDK_FAIL;
251 : }
252 3935 : p = 0;
253 3935 : q = BATcount(b);
254 3935 : assert(cap >= q - p);
255 3935 : TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
256 :
257 7870 : *tail = (Heap) {
258 3935 : .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
259 3935 : .parentid = b->batCacheid,
260 : .dirty = true,
261 : .refs = ATOMIC_VAR_INIT(1),
262 : };
263 3935 : settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
264 3935 : if (HEAPalloc(tail, cap, sizeof(oid)) != GDK_SUCCEED) {
265 0 : MT_lock_unset(&b->theaplock);
266 0 : GDKfree(tail);
267 0 : return GDK_FAIL;
268 : }
269 3935 : x = (oid *) tail->base;
270 3935 : t = b->tseqbase;
271 3935 : if (is_oid_nil(t)) {
272 0 : for (p = 0; p < q; p++)
273 0 : x[p] = oid_nil;
274 : } else {
275 25619714 : for (p = 0; p < q; p++)
276 25615779 : x[p] = t++;
277 : }
278 : /* point of no return */
279 3935 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 0);
280 : /* can only look at tvheap when lock is held */
281 3935 : if (complex_cand(b)) {
282 0 : assert(b->batRole == TRANSIENT);
283 0 : if (negoid_cand(b)) {
284 0 : assert(ccand_free(b) % SIZEOF_OID == 0);
285 0 : BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
286 0 : const oid *exc = (const oid *) ccand_first(b);
287 0 : BUN i;
288 0 : for (p = 0, i = 0; p < q; p++) {
289 0 : while (i < nexc && t == exc[i]) {
290 0 : i++;
291 0 : t++;
292 : }
293 0 : x[p] = t++;
294 : }
295 : } else {
296 0 : assert(mask_cand(b));
297 0 : BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
298 0 : const uint32_t *src = (const uint32_t *) ccand_first(b);
299 0 : BUN n = 0;
300 0 : t -= (oid) CCAND(b)->firstbit;
301 0 : for (p = 0; p < nmsk; p++) {
302 0 : uint32_t val = src[p];
303 0 : if (val == 0)
304 0 : continue;
305 0 : for (uint32_t i = 0; i < 32; i++) {
306 0 : if (val & (1U << i)) {
307 0 : assert(n < q);
308 0 : x[n++] = t + p * 32 + i;
309 : }
310 : }
311 : }
312 0 : assert(n == q);
313 : }
314 0 : vh = b->tvheap;
315 0 : b->tvheap = NULL;
316 : }
317 3935 : h = b->theap;
318 3935 : b->theap = tail;
319 3935 : b->tbaseoff = 0;
320 3935 : b->theap->dirty = true;
321 3935 : b->tunique_est = is_oid_nil(t) ? 1.0 : (double) b->batCount;
322 3935 : b->ttype = TYPE_oid;
323 3935 : BATsetdims(b, 0);
324 3935 : BATsetcount(b, b->batCount);
325 3935 : BATsetcapacity(b, cap);
326 3935 : MT_lock_unset(&b->theaplock);
327 3934 : if (h->parentid != b->batCacheid)
328 0 : BBPrelease(h->parentid);
329 3934 : HEAPdecref(h, false);
330 3935 : if (vh) {
331 0 : if (vh->parentid != b->batCacheid)
332 0 : BBPrelease(vh->parentid);
333 0 : HEAPdecref(vh, true);
334 : }
335 :
336 : return GDK_SUCCEED;
337 : }
338 :
339 : /*
340 : * Destroy a view.
341 : */
342 : void
343 0 : VIEWdestroy(BAT *b)
344 : {
345 0 : assert(isVIEW(b));
346 0 : bat tp = 0, tvp = 0;
347 :
348 : /* remove any leftover private hash structures */
349 0 : HASHdestroy(b);
350 0 : OIDXdestroy(b);
351 0 : STRMPdestroy(b);
352 0 : RTREEdestroy(b);
353 :
354 0 : MT_lock_set(&b->theaplock);
355 0 : PROPdestroy_nolock(b);
356 : /* heaps that are left after VIEWunlink are ours, so need to be
357 : * destroyed (and files deleted) */
358 0 : if (b->theap) {
359 0 : tp = b->theap->parentid;
360 0 : HEAPdecref(b->theap, tp == b->batCacheid);
361 0 : b->theap = NULL;
362 : }
363 0 : if (b->tvheap) {
364 : /* should never happen: if this heap exists, then it was
365 : * our own (not a view), and then it doesn't make sense
366 : * that the offset heap was a view (at least one of them
367 : * had to be) */
368 0 : tvp = b->tvheap->parentid;
369 0 : HEAPdecref(b->tvheap, tvp == b->batCacheid);
370 0 : b->tvheap = NULL;
371 : }
372 0 : MT_lock_unset(&b->theaplock);
373 0 : if (tp != 0 && tp != b->batCacheid)
374 0 : BBPrelease(tp);
375 0 : if (tvp != 0 && tvp != b->batCacheid)
376 0 : BBPrelease(tvp);
377 0 : BATfree(b);
378 0 : }
|