Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a Peter Boncz, Niels Nes
15 : * @* BAT Alignment
16 : * For BATs that result from a n-ary relational scheme it may help to
17 : * align the BATs on their head value. In particular, it permits
18 : * replacing a hash-join by a merge-join, which is significantly
19 : * faster on large tables. Especially if the BATs involved cause page
20 : * activity or when you can not afford the large hash structures to
21 : * speed-up processing.
22 : *
23 : * For orthogonality, we support alignment between arbitrary columns
24 : * (head or tail).
25 : *
26 : * All standard GDK set-calls update the alignment info in their
27 : * respective ways. For example, the routine @emph{BUNclustercopy}
28 : * shuffles the first argument, such that the BUNs are in the same
29 : * order as those in the second argument. This operation will mark
30 : * both columns of the first @emph{BAT} as synced with the second
31 : * (likewise, @emph{Colcopy()}, which makes a copy, instead of
32 : * in-place shuffling, has the same alignment effect.
33 : *
34 : * Each alignment sequence is given a unique identifier, so as to
35 : * easily detect this situation. It is retained in the @emph{BAT
36 : * descriptor}.
37 : * @+ Alignment Design Considerations
38 : * Alignment primitives require the right hooks to be inserted in
39 : * several places in the GDK, apart form this file:
40 : * @itemize
41 : * @item @emph{ BUN update operations}.
42 : * The updated BATs have to be marked as un-aligned.
43 : * @item @emph{ set operations}.
44 : * For most relational operations some statements can be made about
45 : * the size and order of the BATs they produce. This information can
46 : * be formalized by indicating alignment information automatically.
47 : * @item @emph{ transaction operations}.
48 : * Alignment statuses must be kept consistent under database commits
49 : * and aborts.
50 : * @end itemize
51 : */
52 : #include "monetdb_config.h"
53 : #include "gdk.h"
54 : #include "gdk_private.h"
55 :
56 : /* Return TRUE if the two BATs are aligned (same size, same
57 : * hseqbase). */
58 : int
59 0 : ALIGNsynced(BAT *b1, BAT *b2)
60 : {
61 0 : if (b1 == NULL || b2 == NULL)
62 : return 0;
63 :
64 0 : assert(!is_oid_nil(b1->hseqbase));
65 0 : assert(!is_oid_nil(b2->hseqbase));
66 :
67 0 : return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
68 : }
69 :
70 : /*
71 : * @+ View BATS
72 : * The general routine for getting a 'view' BAT upon another BAT is
73 : * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
74 : * support for this), you can then make vertical slices.
75 : *
76 : * It is possible to create a view on a writable BAT. Updates in the
77 : * parent are then automatically reflected in the VIEW. Note that the
78 : * VIEW bat itself can never be modified.
79 : *
80 : * Horizontal views should only be given out on a view BAT, but only
81 : * if it is dead sure the parent BAT is read-only. This because they
82 : * cannot physically share the batBuns heap with the parent, as they
83 : * need a modified version.
84 : */
85 : BAT *
86 6810544 : VIEWcreate(oid seq, BAT *b)
87 : {
88 6810544 : BAT *bn;
89 6810544 : bat tp = 0;
90 :
91 6810544 : BATcheck(b, NULL);
92 :
93 6810544 : if (b->ttype == TYPE_void) {
94 : /* we don't do views on void bats */
95 2221 : return BATdense(seq, b->tseqbase, b->batCount);
96 : }
97 :
98 6808323 : bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
99 6807865 : if (bn == NULL)
100 : return NULL;
101 6807865 : assert(bn->theap == NULL);
102 :
103 6807865 : MT_lock_set(&b->theaplock);
104 6808183 : bn->batInserted = 0;
105 6808183 : bn->batCount = b->batCount;
106 6808183 : bn->batCapacity = b->batCapacity;
107 6808183 : bn->batRestricted = BAT_READ;
108 :
109 : /* the T column descriptor is fully copied except for the
110 : * accelerator data. We need copies because in case of a mark,
111 : * we are going to override a column with a void. */
112 6808183 : bn->tkey = b->tkey;
113 6808183 : bn->tseqbase = b->tseqbase;
114 6808183 : bn->tsorted = b->tsorted;
115 6808183 : bn->trevsorted = b->trevsorted;
116 6808183 : bn->twidth = b->twidth;
117 6808183 : bn->tshift = b->tshift;
118 6808183 : bn->tnonil = b->tnonil;
119 6808183 : bn->tnil = b->tnil;
120 6808183 : bn->tnokey[0] = b->tnokey[0];
121 6808183 : bn->tnokey[1] = b->tnokey[1];
122 6808183 : bn->tnosorted = b->tnosorted;
123 6808183 : bn->tnorevsorted = b->tnorevsorted;
124 6808183 : bn->tminpos = b->tminpos;
125 6808183 : bn->tmaxpos = b->tmaxpos;
126 6808183 : bn->tunique_est = b->tunique_est;
127 6808183 : bn->theap = b->theap;
128 6808183 : bn->tbaseoff = b->tbaseoff;
129 6808183 : bn->tvheap = b->tvheap;
130 :
131 6808183 : tp = VIEWtparent(b);
132 6808183 : if (tp == 0 && b->ttype != TYPE_void)
133 5740491 : tp = b->batCacheid;
134 6808183 : assert(b->ttype != TYPE_void || !tp);
135 6808183 : HEAPincref(b->theap);
136 6808134 : if (b->tvheap)
137 1471430 : HEAPincref(b->tvheap);
138 6808149 : MT_lock_unset(&b->theaplock);
139 :
140 6807499 : if (BBPcacheit(bn, true) != GDK_SUCCEED) { /* enter in BBP */
141 0 : if (bn->tvheap)
142 0 : HEAPdecref(bn->tvheap, false);
143 0 : HEAPdecref(bn->theap, false);
144 0 : MT_lock_destroy(&bn->theaplock);
145 0 : MT_lock_destroy(&bn->batIdxLock);
146 0 : MT_rwlock_destroy(&bn->thashlock);
147 0 : GDKfree(bn);
148 0 : return NULL;
149 : }
150 6807723 : BBPretain(bn->theap->parentid);
151 6807437 : if (bn->tvheap)
152 1471382 : BBPretain(bn->tvheap->parentid);
153 6807160 : TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
154 : ALGOBATPAR(b), ALGOBATPAR(bn));
155 : return bn;
156 : }
157 :
158 : /*
159 : * The BATmaterialize routine produces in-place materialized version
160 : * of a void bat (which should not be a VIEW) (later we should add the
161 : * code for VIEWs).
162 : */
163 :
164 : gdk_return
165 13355 : BATmaterialize(BAT *b, BUN cap)
166 : {
167 13355 : Heap *tail;
168 13355 : Heap *h, *vh = NULL;
169 13355 : BUN p, q;
170 13355 : oid t, *x;
171 :
172 13355 : BATcheck(b, GDK_FAIL);
173 13355 : assert(!isVIEW(b));
174 13355 : if (cap == BUN_NONE || cap < BATcapacity(b))
175 12221 : cap = BATcapacity(b);
176 13355 : if (b->ttype != TYPE_void) {
177 : /* no voids; just call BATextend to make sure of capacity */
178 12221 : return BATextend(b, cap);
179 : }
180 :
181 1134 : if ((tail = GDKmalloc(sizeof(Heap))) == NULL)
182 : return GDK_FAIL;
183 1134 : p = 0;
184 1134 : q = BATcount(b);
185 1134 : assert(cap >= q - p);
186 1134 : TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
187 :
188 : /* cleanup possible ACC's */
189 1134 : HASHdestroy(b);
190 1134 : IMPSdestroy(b);
191 1134 : OIDXdestroy(b);
192 :
193 2268 : *tail = (Heap) {
194 1134 : .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
195 1134 : .parentid = b->batCacheid,
196 : .dirty = true,
197 : };
198 1134 : settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
199 1134 : if (HEAPalloc(tail, cap, sizeof(oid)) != GDK_SUCCEED) {
200 0 : GDKfree(tail);
201 0 : return GDK_FAIL;
202 : }
203 1134 : x = (oid *) tail->base;
204 1134 : t = b->tseqbase;
205 1134 : if (is_oid_nil(t)) {
206 0 : for (p = 0; p < q; p++)
207 0 : x[p] = oid_nil;
208 : } else {
209 4855504 : for (p = 0; p < q; p++)
210 4854370 : x[p] = t++;
211 : }
212 1134 : ATOMIC_INIT(&tail->refs, 1);
213 : /* point of no return */
214 1134 : MT_lock_set(&b->theaplock);
215 1134 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 0);
216 : /* can only look at tvheap when lock is held */
217 1134 : if (complex_cand(b)) {
218 0 : assert(b->batRole == TRANSIENT);
219 0 : if (negoid_cand(b)) {
220 0 : assert(ccand_free(b) % SIZEOF_OID == 0);
221 0 : BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
222 0 : const oid *exc = (const oid *) ccand_first(b);
223 0 : BUN i;
224 0 : for (p = 0, i = 0; p < q; p++) {
225 0 : while (i < nexc && t == exc[i]) {
226 0 : i++;
227 0 : t++;
228 : }
229 0 : x[p] = t++;
230 : }
231 : } else {
232 0 : assert(mask_cand(b));
233 0 : BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
234 0 : const uint32_t *src = (const uint32_t *) ccand_first(b);
235 0 : BUN n = 0;
236 0 : t -= (oid) CCAND(b)->firstbit;
237 0 : for (p = 0; p < nmsk; p++) {
238 0 : uint32_t val = src[p];
239 0 : if (val == 0)
240 0 : continue;
241 0 : for (uint32_t i = 0; i < 32; i++) {
242 0 : if (val & (1U << i)) {
243 0 : assert(n < q);
244 0 : x[n++] = t + p * 32 + i;
245 : }
246 : }
247 : }
248 0 : assert(n == q);
249 : }
250 0 : vh = b->tvheap;
251 0 : b->tvheap = NULL;
252 : }
253 1134 : h = b->theap;
254 1134 : b->theap = tail;
255 1134 : b->tbaseoff = 0;
256 1134 : b->theap->dirty = true;
257 1134 : b->tunique_est = is_oid_nil(t) ? 1.0 : (double) b->batCount;
258 1134 : b->ttype = TYPE_oid;
259 1134 : BATsetdims(b, 0);
260 1134 : BATsetcount(b, b->batCount);
261 1134 : BATsetcapacity(b, cap);
262 1134 : MT_lock_unset(&b->theaplock);
263 1134 : if (h->parentid != b->batCacheid)
264 0 : BBPrelease(h->parentid);
265 1134 : HEAPdecref(h, false);
266 1134 : if (vh) {
267 0 : if (vh->parentid != b->batCacheid)
268 0 : BBPrelease(vh->parentid);
269 0 : HEAPdecref(vh, true);
270 : }
271 :
272 : return GDK_SUCCEED;
273 : }
274 :
275 : /*
276 : * The remainder are utilities to manipulate the BAT view and not to
277 : * forget some details in the process. It expects a position range in
278 : * the underlying BAT and compensates for outliers.
279 : */
280 : void
281 6795756 : VIEWboundsbi(BATiter *bi, BAT *view, BUN l, BUN h)
282 : {
283 6795756 : BUN cnt;
284 6795756 : BUN baseoff;
285 :
286 6795756 : if (bi == NULL || view == NULL)
287 : return;
288 6795756 : if (h > bi->count)
289 : h = bi->count;
290 6795756 : baseoff = bi->baseoff;
291 6795756 : if (h < l)
292 : h = l;
293 6795756 : cnt = h - l;
294 6795756 : view->batInserted = 0;
295 6795756 : if (view->ttype != TYPE_void) {
296 6794128 : view->tbaseoff = baseoff + l;
297 : }
298 6795756 : BATsetcount(view, cnt);
299 6795052 : BATsetcapacity(view, cnt);
300 6795683 : if (view->tnosorted > l && view->tnosorted < l + cnt)
301 3319203 : view->tnosorted -= l;
302 : else
303 3476480 : view->tnosorted = 0;
304 6795683 : if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
305 4207838 : view->tnorevsorted -= l;
306 : else
307 2587845 : view->tnorevsorted = 0;
308 6795683 : if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
309 6065290 : view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
310 : view->tnokey[0] != view->tnokey[1]) {
311 2436529 : view->tnokey[0] -= l;
312 2436529 : view->tnokey[1] -= l;
313 : } else {
314 4359154 : view->tnokey[0] = view->tnokey[1] = 0;
315 : }
316 6795683 : if (view->tminpos >= l && view->tminpos < l + cnt)
317 5400041 : view->tminpos -= l;
318 : else
319 1395642 : view->tminpos = BUN_NONE;
320 6795683 : if (view->tmaxpos >= l && view->tmaxpos < l + cnt)
321 5284286 : view->tmaxpos -= l;
322 : else
323 1511397 : view->tmaxpos = BUN_NONE;
324 6795683 : view->tkey |= cnt <= 1;
325 : }
326 : void
327 3 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
328 : {
329 3 : BATiter bi = bat_iterator(b);
330 3 : VIEWboundsbi(&bi, view, l, h);
331 3 : bat_iterator_end(&bi);
332 3 : }
333 :
334 : /*
335 : * Destroy a view.
336 : */
337 : void
338 0 : VIEWdestroy(BAT *b)
339 : {
340 0 : assert(isVIEW(b));
341 0 : bat tp = 0, tvp = 0;
342 :
343 : /* remove any leftover private hash structures */
344 0 : HASHdestroy(b);
345 0 : IMPSdestroy(b);
346 0 : OIDXdestroy(b);
347 0 : STRMPdestroy(b);
348 0 : RTREEdestroy(b);
349 :
350 0 : MT_lock_set(&b->theaplock);
351 0 : PROPdestroy_nolock(b);
352 : /* heaps that are left after VIEWunlink are ours, so need to be
353 : * destroyed (and files deleted) */
354 0 : if (b->theap) {
355 0 : tp = b->theap->parentid;
356 0 : HEAPdecref(b->theap, tp == b->batCacheid);
357 0 : b->theap = NULL;
358 : }
359 0 : if (b->tvheap) {
360 : /* should never happen: if this heap exists, then it was
361 : * our own (not a view), and then it doesn't make sense
362 : * that the offset heap was a view (at least one of them
363 : * had to be) */
364 0 : tvp = b->tvheap->parentid;
365 0 : HEAPdecref(b->tvheap, tvp == b->batCacheid);
366 0 : b->tvheap = NULL;
367 : }
368 0 : MT_lock_unset(&b->theaplock);
369 0 : if (tp != 0 && tp != b->batCacheid)
370 0 : BBPrelease(tp);
371 0 : if (tvp != 0 && tvp != b->batCacheid)
372 0 : BBPrelease(tvp);
373 0 : BATfree(b);
374 0 : }
|