Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a Peter Boncz, Niels Nes
15 : * @* BAT Alignment
16 : * For BATs that result from a n-ary relational scheme it may help to
17 : * align the BATs on their head value. In particular, it permits
18 : * replacing a hash-join by a merge-join, which is significantly
19 : * faster on large tables. Especially if the BATs involved cause page
20 : * activity or when you can not afford the large hash structures to
21 : * speed-up processing.
22 : *
23 : * For orthogonality, we support alignment between arbitrary columns
24 : * (head or tail).
25 : *
26 : * All standard GDK set-calls update the alignment info in their
27 : * respective ways. For example, the routine @emph{BUNclustercopy}
28 : * shuffles the first argument, such that the BUNs are in the same
29 : * order as those in the second argument. This operation will mark
30 : * both columns of the first @emph{BAT} as synced with the second
31 : * (likewise, @emph{Colcopy()}, which makes a copy, instead of
32 : * in-place shuffling, has the same alignment effect.
33 : *
34 : * Each alignment sequence is given a unique identifier, so as to
35 : * easily detect this situation. It is retained in the @emph{BAT
36 : * descriptor}.
37 : * @+ Alignment Design Considerations
38 : * Alignment primitives require the right hooks to be inserted in
39 : * several places in the GDK, apart form this file:
40 : * @itemize
41 : * @item @emph{ BUN update operations}.
42 : * The updated BATs have to be marked as un-aligned.
43 : * @item @emph{ set operations}.
44 : * For most relational operations some statements can be made about
45 : * the size and order of the BATs they produce. This information can
46 : * be formalized by indicating alignment information automatically.
47 : * @item @emph{ transaction operations}.
48 : * Alignment statuses must be kept consistent under database commits
49 : * and aborts.
50 : * @end itemize
51 : */
52 : #include "monetdb_config.h"
53 : #include "gdk.h"
54 : #include "gdk_private.h"
55 :
56 : /* Return TRUE if the two BATs are aligned (same size, same
57 : * hseqbase). */
58 : int
59 0 : ALIGNsynced(BAT *b1, BAT *b2)
60 : {
61 0 : if (b1 == NULL || b2 == NULL)
62 : return 0;
63 :
64 0 : assert(!is_oid_nil(b1->hseqbase));
65 0 : assert(!is_oid_nil(b2->hseqbase));
66 :
67 0 : return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
68 : }
69 :
70 : /*
71 : * @+ View BATS
72 : * The general routine for getting a 'view' BAT upon another BAT is
73 : * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
74 : * support for this), you can then make vertical slices.
75 : *
76 : * It is possible to create a view on a writable BAT. Updates in the
77 : * parent are then automatically reflected in the VIEW. Note that the
78 : * VIEW bat itself can never be modified.
79 : *
80 : * Horizontal views should only be given out on a view BAT, but only
81 : * if it is dead sure the parent BAT is read-only. This because they
82 : * cannot physically share the batBuns heap with the parent, as they
83 : * need a modified version.
84 : */
85 : static void
86 2881811 : VIEWboundsbi(BATiter *bi, BAT *view, BUN l, BUN h)
87 : {
88 2881811 : BUN cnt;
89 2881811 : BUN baseoff;
90 :
91 2881811 : if (bi == NULL || view == NULL)
92 : return;
93 2881811 : if (h > bi->count)
94 : h = bi->count;
95 2881811 : baseoff = bi->baseoff;
96 2881811 : if (h < l)
97 : h = l;
98 2881811 : cnt = h - l;
99 2881811 : if (view->ttype != TYPE_void) {
100 2881395 : view->tbaseoff = baseoff + l;
101 : }
102 2881811 : if (!is_oid_nil(view->tseqbase))
103 62 : view->tseqbase += l;
104 2881811 : BATsetcount(view, cnt);
105 2869868 : BATsetcapacity(view, cnt);
106 2870440 : if (view->tnosorted > l && view->tnosorted < l + cnt)
107 454461 : view->tnosorted -= l;
108 : else
109 2415979 : view->tnosorted = 0;
110 2870440 : if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
111 539088 : view->tnorevsorted -= l;
112 : else
113 2331352 : view->tnorevsorted = 0;
114 2870440 : if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
115 1595036 : view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
116 : view->tnokey[0] != view->tnokey[1]) {
117 1187146 : view->tnokey[0] -= l;
118 1187146 : view->tnokey[1] -= l;
119 : } else {
120 1683294 : view->tnokey[0] = view->tnokey[1] = 0;
121 : }
122 2870440 : if (view->tminpos >= l && view->tminpos < l + cnt)
123 1211969 : view->tminpos -= l;
124 : else
125 1658471 : view->tminpos = BUN_NONE;
126 2870440 : if (view->tmaxpos >= l && view->tmaxpos < l + cnt)
127 1066412 : view->tmaxpos -= l;
128 : else
129 1804028 : view->tmaxpos = BUN_NONE;
130 2870440 : view->tkey |= cnt <= 1;
131 2870440 : view->tnil = false; /* we don't know */
132 : }
133 :
134 : void
135 2 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
136 : {
137 2 : BATiter bi = bat_iterator(b);
138 2 : VIEWboundsbi(&bi, view, l, h);
139 2 : bat_iterator_end(&bi);
140 2 : }
141 :
142 : BAT *
143 11357384 : VIEWcreate(oid seq, BAT *b, BUN l, BUN h)
144 : {
145 11357384 : BAT *bn;
146 11357384 : bat tp = 0;
147 :
148 11357384 : BATcheck(b, NULL);
149 :
150 11357384 : if (b->ttype == TYPE_void) {
151 : /* we don't do views on void bats */
152 2042 : if (h > b->batCount)
153 : h = b->batCount;
154 2042 : if (l > h)
155 0 : l = h = 0;
156 2042 : return BATdense(seq, b->tseqbase + l, h - l);
157 : }
158 :
159 11355342 : bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
160 11319171 : if (bn == NULL)
161 : return NULL;
162 11319171 : assert(bn->theap == NULL);
163 :
164 11319171 : MT_lock_set(&b->theaplock);
165 11360784 : BATiter bi = bat_iterator_nolock(b);
166 11360784 : bn->batInserted = 0;
167 11360784 : bn->batCount = bi.count;
168 11360784 : bn->batCapacity = b->batCapacity;
169 11360784 : bn->batRestricted = BAT_READ;
170 :
171 : /* the T column descriptor is fully copied except for the
172 : * accelerator data. We need copies because in case of a mark,
173 : * we are going to override a column with a void. */
174 11360784 : bn->tkey = bi.key;
175 11360784 : bn->tseqbase = bi.tseq;
176 11360784 : bn->tsorted = bi.sorted;
177 11360784 : bn->trevsorted = bi.revsorted;
178 11360784 : bn->twidth = bi.width;
179 11360784 : bn->tshift = bi.shift;
180 11360784 : bn->tnonil = bi.nonil;
181 11360784 : bn->tnil = bi.nil;
182 11360784 : bn->tnokey[0] = bi.nokey[0];
183 11360784 : bn->tnokey[1] = bi.nokey[1];
184 11360784 : bn->tnosorted = bi.nosorted;
185 11360784 : bn->tnorevsorted = bi.norevsorted;
186 11360784 : bn->tminpos = bi.minpos;
187 11360784 : bn->tmaxpos = bi.maxpos;
188 11360784 : bn->tunique_est = bi.unique_est;
189 11360784 : bn->theap = bi.h;
190 11360784 : bn->tbaseoff = bi.baseoff;
191 11360784 : bn->tvheap = bi.vh;
192 :
193 11360784 : tp = VIEWtparent(b);
194 11360784 : if (tp == 0 && b->ttype != TYPE_void)
195 9506757 : tp = b->batCacheid;
196 11360784 : assert(b->ttype != TYPE_void || !tp);
197 11360784 : HEAPincref(bi.h);
198 11380813 : if (bi.vh)
199 2386868 : HEAPincref(bi.vh);
200 11376334 : if (l != 0 || h < bi.count)
201 2887253 : VIEWboundsbi(&bi, bn, l, h);
202 11359231 : MT_lock_unset(&b->theaplock);
203 :
204 11377448 : if (BBPcacheit(bn, true) != GDK_SUCCEED) { /* enter in BBP */
205 0 : if (bn->tvheap)
206 0 : HEAPdecref(bn->tvheap, false);
207 0 : HEAPdecref(bn->theap, false);
208 0 : MT_lock_destroy(&bn->theaplock);
209 0 : MT_lock_destroy(&bn->batIdxLock);
210 0 : MT_rwlock_destroy(&bn->thashlock);
211 0 : GDKfree(bn);
212 0 : return NULL;
213 : }
214 11378765 : BBPretain(bn->theap->parentid);
215 11375137 : if (bn->tvheap)
216 2386627 : BBPretain(bn->tvheap->parentid);
217 11377111 : TRC_DEBUG(ALGO, ALGOBATFMT " " BUNFMT "," BUNFMT " -> " ALGOBATFMT "\n",
218 : ALGOBATPAR(b), l, h, ALGOBATPAR(bn));
219 : return bn;
220 : }
221 :
222 : /*
223 : * The BATmaterialize routine produces in-place materialized version
224 : * of a void bat (which should not be a VIEW) (later we should add the
225 : * code for VIEWs).
226 : */
227 :
228 : gdk_return
229 13122 : BATmaterialize(BAT *b, BUN cap)
230 : {
231 13122 : Heap *tail;
232 13122 : Heap *h, *vh = NULL;
233 13122 : BUN p, q;
234 13122 : oid t, *x;
235 :
236 13122 : BATcheck(b, GDK_FAIL);
237 13122 : assert(!isVIEW(b));
238 13122 : if (cap == BUN_NONE || cap < BATcapacity(b))
239 12408 : cap = BATcapacity(b);
240 13122 : if (b->ttype != TYPE_void) {
241 : /* no voids; just call BATextend to make sure of capacity */
242 12408 : return BATextend(b, cap);
243 : }
244 :
245 714 : if ((tail = GDKmalloc(sizeof(Heap))) == NULL)
246 : return GDK_FAIL;
247 714 : p = 0;
248 714 : q = BATcount(b);
249 714 : assert(cap >= q - p);
250 714 : TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
251 :
252 : /* cleanup possible ACC's */
253 714 : HASHdestroy(b);
254 714 : IMPSdestroy(b);
255 714 : OIDXdestroy(b);
256 :
257 1428 : *tail = (Heap) {
258 714 : .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
259 714 : .parentid = b->batCacheid,
260 : .dirty = true,
261 : .refs = ATOMIC_VAR_INIT(1),
262 : };
263 714 : settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
264 714 : if (HEAPalloc(tail, cap, sizeof(oid)) != GDK_SUCCEED) {
265 0 : GDKfree(tail);
266 0 : return GDK_FAIL;
267 : }
268 714 : x = (oid *) tail->base;
269 714 : t = b->tseqbase;
270 714 : if (is_oid_nil(t)) {
271 0 : for (p = 0; p < q; p++)
272 0 : x[p] = oid_nil;
273 : } else {
274 2180501 : for (p = 0; p < q; p++)
275 2179787 : x[p] = t++;
276 : }
277 : /* point of no return */
278 714 : MT_lock_set(&b->theaplock);
279 714 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 0);
280 : /* can only look at tvheap when lock is held */
281 714 : if (complex_cand(b)) {
282 0 : assert(b->batRole == TRANSIENT);
283 0 : if (negoid_cand(b)) {
284 0 : assert(ccand_free(b) % SIZEOF_OID == 0);
285 0 : BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
286 0 : const oid *exc = (const oid *) ccand_first(b);
287 0 : BUN i;
288 0 : for (p = 0, i = 0; p < q; p++) {
289 0 : while (i < nexc && t == exc[i]) {
290 0 : i++;
291 0 : t++;
292 : }
293 0 : x[p] = t++;
294 : }
295 : } else {
296 0 : assert(mask_cand(b));
297 0 : BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
298 0 : const uint32_t *src = (const uint32_t *) ccand_first(b);
299 0 : BUN n = 0;
300 0 : t -= (oid) CCAND(b)->firstbit;
301 0 : for (p = 0; p < nmsk; p++) {
302 0 : uint32_t val = src[p];
303 0 : if (val == 0)
304 0 : continue;
305 0 : for (uint32_t i = 0; i < 32; i++) {
306 0 : if (val & (1U << i)) {
307 0 : assert(n < q);
308 0 : x[n++] = t + p * 32 + i;
309 : }
310 : }
311 : }
312 0 : assert(n == q);
313 : }
314 0 : vh = b->tvheap;
315 0 : b->tvheap = NULL;
316 : }
317 714 : h = b->theap;
318 714 : b->theap = tail;
319 714 : b->tbaseoff = 0;
320 714 : b->theap->dirty = true;
321 714 : b->tunique_est = is_oid_nil(t) ? 1.0 : (double) b->batCount;
322 714 : b->ttype = TYPE_oid;
323 714 : BATsetdims(b, 0);
324 714 : BATsetcount(b, b->batCount);
325 714 : BATsetcapacity(b, cap);
326 714 : MT_lock_unset(&b->theaplock);
327 714 : if (h->parentid != b->batCacheid)
328 0 : BBPrelease(h->parentid);
329 714 : HEAPdecref(h, false);
330 714 : if (vh) {
331 0 : if (vh->parentid != b->batCacheid)
332 0 : BBPrelease(vh->parentid);
333 0 : HEAPdecref(vh, true);
334 : }
335 :
336 : return GDK_SUCCEED;
337 : }
338 :
339 : /*
340 : * Destroy a view.
341 : */
342 : void
343 0 : VIEWdestroy(BAT *b)
344 : {
345 0 : assert(isVIEW(b));
346 0 : bat tp = 0, tvp = 0;
347 :
348 : /* remove any leftover private hash structures */
349 0 : HASHdestroy(b);
350 0 : IMPSdestroy(b);
351 0 : OIDXdestroy(b);
352 0 : STRMPdestroy(b);
353 0 : RTREEdestroy(b);
354 :
355 0 : MT_lock_set(&b->theaplock);
356 0 : PROPdestroy_nolock(b);
357 : /* heaps that are left after VIEWunlink are ours, so need to be
358 : * destroyed (and files deleted) */
359 0 : if (b->theap) {
360 0 : tp = b->theap->parentid;
361 0 : HEAPdecref(b->theap, tp == b->batCacheid);
362 0 : b->theap = NULL;
363 : }
364 0 : if (b->tvheap) {
365 : /* should never happen: if this heap exists, then it was
366 : * our own (not a view), and then it doesn't make sense
367 : * that the offset heap was a view (at least one of them
368 : * had to be) */
369 0 : tvp = b->tvheap->parentid;
370 0 : HEAPdecref(b->tvheap, tvp == b->batCacheid);
371 0 : b->tvheap = NULL;
372 : }
373 0 : MT_lock_unset(&b->theaplock);
374 0 : if (tp != 0 && tp != b->batCacheid)
375 0 : BBPrelease(tp);
376 0 : if (tvp != 0 && tvp != b->batCacheid)
377 0 : BBPrelease(tvp);
378 0 : BATfree(b);
379 0 : }
|