Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 57413560 : unshare_varsized_heap(BAT *b)
27 : {
28 57413560 : if (ATOMvarsized(b->ttype) &&
29 23173504 : b->tvheap->parentid != b->batCacheid) {
30 848 : Heap *h = GDKmalloc(sizeof(Heap));
31 848 : if (h == NULL)
32 : return GDK_FAIL;
33 848 : MT_thread_setalgorithm("unshare vheap");
34 1696 : *h = (Heap) {
35 848 : .parentid = b->batCacheid,
36 848 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 848 : strconcat_len(h->filename, sizeof(h->filename),
40 848 : BBP_physical(b->batCacheid), ".theap", NULL);
41 848 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 848 : MT_lock_set(&b->theaplock);
47 848 : Heap *oh = b->tvheap;
48 848 : b->tvheap = h;
49 848 : MT_lock_unset(&b->theaplock);
50 848 : BBPrelease(oh->parentid);
51 848 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 97840 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 97840 : size_t toff = ~(size_t) 0; /* tail offset */
66 97840 : BUN p, r; /* loop variables */
67 97840 : const void *tp = NULL; /* tail value pointer */
68 97840 : var_t v;
69 97840 : size_t off; /* offset within n's string heap */
70 97840 : BUN cnt = ci->ncand;
71 97840 : BUN oldcnt = BATcount(b);
72 :
73 97840 : assert(b->ttype == TYPE_str);
74 97840 : assert(b->tbaseoff == 0);
75 97840 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 97840 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 97840 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 97838 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 27799 : toff = 0;
85 27799 : MT_thread_setalgorithm("shared vheap");
86 70039 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 56538 : MT_lock_set(&b->theaplock);
90 56537 : bat bid = b->tvheap->parentid;
91 56537 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 56543 : HEAPincref(ni->vh);
93 56544 : b->tvheap = ni->vh;
94 56544 : b->tascii = ni->ascii;
95 56544 : MT_lock_unset(&b->theaplock);
96 56544 : BBPretain(ni->vh->parentid);
97 56544 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 56544 : toff = 0;
100 56544 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 14349 : if (b->tvheap->parentid != b->batCacheid &&
106 848 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 13501 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 72 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5829807 : for (int i = 0; i < 1024; i++) {
120 5824112 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5824591 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5823885 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5695 : len = (len + 512) / 1024; /* rounded average length */
125 5695 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5695 : if (r < ci->ncand)
129 908 : len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
130 : else
131 4787 : len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
132 : /* len is total estimated expected size of vheap */
133 :
134 5695 : if (len > ni->vhfree / 2) {
135 : /* we copy the string heap, perhaps appending */
136 5684 : if (oldcnt == 0) {
137 5656 : toff = 0;
138 5656 : MT_thread_setalgorithm("copy vheap");
139 : } else {
140 28 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
141 28 : MT_thread_setalgorithm("append vheap");
142 : }
143 :
144 5684 : MT_lock_set(&b->theaplock);
145 5684 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
146 0 : MT_lock_unset(&b->theaplock);
147 0 : return GDK_FAIL;
148 : }
149 5683 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
150 5683 : b->tvheap->free = toff + ni->vhfree;
151 5683 : b->tvheap->dirty = true;
152 5683 : b->tascii &= ni->ascii;
153 5683 : MT_lock_unset(&b->theaplock);
154 : }
155 : }
156 : }
157 : /* if toff has the initial value of ~0, we insert strings
158 : * individually, otherwise we only copy (insert) offsets */
159 90027 : if (toff == ~(size_t) 0)
160 : v = GDK_VAROFFSET;
161 : else
162 90027 : v = b->tvheap->free - 1;
163 :
164 : /* make sure there is (vertical) space in the offset heap, we
165 : * may also widen thanks to v, set above */
166 97847 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
167 : return GDK_FAIL;
168 : }
169 :
170 97845 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
171 : /* we don't need to do any translation of offset
172 : * values, so we can use fast memcpy */
173 89784 : MT_thread_setalgorithm("memcpy offsets");
174 89780 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
175 8061 : } else if (toff != ~(size_t) 0) {
176 : /* we don't need to insert any actual strings since we
177 : * have already made sure that they are all in b's
178 : * string heap at known locations (namely the offset
179 : * in n added to toff), so insert offsets from n after
180 : * adding toff into b */
181 : /* note the use of the "restrict" qualifier here: all
182 : * four pointers below point to the same value, but
183 : * only one of them will actually be used, hence we
184 : * still obey the rule for restrict-qualified
185 : * pointers */
186 240 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
187 240 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
188 240 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
189 : #if SIZEOF_VAR_T == 8
190 240 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
191 : #endif
192 :
193 240 : MT_thread_setalgorithm("copy offset values");
194 240 : r = b->batCount;
195 3090894 : TIMEOUT_LOOP(cnt, qry_ctx) {
196 3090242 : p = canditer_next(ci) - ni->b->hseqbase;
197 3090242 : switch (ni->width) {
198 224 : case 1:
199 224 : v = (var_t) tbp[p] + GDK_VAROFFSET;
200 224 : break;
201 4645 : case 2:
202 4645 : v = (var_t) tsp[p] + GDK_VAROFFSET;
203 4645 : break;
204 3085373 : case 4:
205 3085373 : v = (var_t) tip[p];
206 3085373 : break;
207 : #if SIZEOF_VAR_T == 8
208 0 : case 8:
209 0 : v = (var_t) tlp[p];
210 0 : break;
211 : #endif
212 : default:
213 0 : MT_UNREACHABLE();
214 : }
215 3090242 : v = (var_t) ((size_t) v + toff);
216 3090242 : assert(v >= GDK_VAROFFSET);
217 3090242 : assert((size_t) v < b->tvheap->free);
218 3090242 : switch (b->twidth) {
219 4639 : case 1:
220 4639 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
221 4639 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
222 4639 : break;
223 879 : case 2:
224 879 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
225 879 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
226 879 : break;
227 3084724 : case 4:
228 : #if SIZEOF_VAR_T == 8
229 3084724 : assert(v < ((var_t) 1 << 32));
230 : #endif
231 3084724 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
232 3084724 : break;
233 : #if SIZEOF_VAR_T == 8
234 0 : case 8:
235 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
236 0 : break;
237 : #endif
238 : default:
239 3090242 : MT_UNREACHABLE();
240 : }
241 : }
242 7821 : } else if (b->tvheap->free < ni->vhfree / 2 ||
243 : GDK_ELIMDOUBLES(b->tvheap)) {
244 : /* if b's string heap is much smaller than n's string
245 : * heap, don't bother checking whether n's string
246 : * values occur in b's string heap; also, if b is
247 : * (still) fully double eliminated, we must continue
248 : * to use the double elimination mechanism */
249 7778 : r = b->batCount;
250 7778 : oid hseq = ni->b->hseqbase;
251 7778 : MT_thread_setalgorithm("insert string values");
252 11431716 : TIMEOUT_LOOP(cnt, qry_ctx) {
253 11415745 : p = canditer_next(ci) - hseq;
254 11209069 : tp = BUNtvar(*ni, p);
255 11119651 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
256 : return GDK_FAIL;
257 : }
258 11415747 : r++;
259 : }
260 : } else {
261 : /* Insert values from n individually into b; however,
262 : * we check whether there is a string in b's string
263 : * heap at the same offset as the string is in n's
264 : * string heap (in case b's string heap is a copy of
265 : * n's). If this is the case, we just copy the
266 : * offset, otherwise we insert normally. */
267 43 : r = b->batCount;
268 43 : MT_thread_setalgorithm("insert string values with check");
269 6055 : TIMEOUT_LOOP(cnt, qry_ctx) {
270 5969 : p = canditer_next(ci) - ni->b->hseqbase;
271 5969 : off = BUNtvaroff(*ni, p); /* the offset */
272 5969 : tp = ni->vh->base + off; /* the string */
273 5969 : if (off < b->tvheap->free &&
274 5969 : strcmp(b->tvheap->base + off, tp) == 0) {
275 : /* we found the string at the same
276 : * offset in b's string heap as it was
277 : * in n's string heap, so we don't
278 : * have to insert a new string into b:
279 : * we can just copy the offset */
280 2314 : v = (var_t) off;
281 2314 : switch (b->twidth) {
282 0 : case 1:
283 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
284 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
285 0 : break;
286 4 : case 2:
287 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
288 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
289 4 : break;
290 2310 : case 4:
291 : #if SIZEOF_VAR_T == 8
292 2310 : assert(v < ((var_t) 1 << 32));
293 : #endif
294 2310 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
295 2310 : break;
296 : #if SIZEOF_VAR_T == 8
297 0 : case 8:
298 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
299 0 : break;
300 : #endif
301 : default:
302 0 : MT_UNREACHABLE();
303 : }
304 : } else {
305 3655 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
306 : return GDK_FAIL;
307 : }
308 : }
309 5969 : r++;
310 : }
311 : }
312 97841 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
313 97847 : MT_rwlock_wrlock(&b->thashlock);
314 97850 : MT_lock_set(&b->theaplock);
315 97834 : BATsetcount(b, oldcnt + ci->ncand);
316 97841 : assert(b->batCapacity >= b->batCount);
317 97841 : MT_lock_unset(&b->theaplock);
318 : /* maintain hash */
319 100832 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
320 2989 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
321 : }
322 97843 : BUN nunique = b->thash ? b->thash->nunique : 0;
323 97843 : MT_rwlock_wrunlock(&b->thashlock);
324 97840 : if (nunique != 0) {
325 9 : MT_lock_set(&b->theaplock);
326 9 : b->tunique_est = (double) nunique;
327 9 : MT_lock_unset(&b->theaplock);
328 : }
329 : return GDK_SUCCEED;
330 : }
331 :
332 : static gdk_return
333 448 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
334 : {
335 448 : BUN cnt = ci->ncand, r;
336 448 : oid hseq = ni->b->hseqbase;
337 :
338 : /* only transient bats can use some other bat's vheap */
339 448 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
340 : /* make sure the bats use var_t */
341 448 : assert(b->twidth == ni->width);
342 448 : assert(b->twidth == SIZEOF_VAR_T);
343 448 : if (cnt == 0)
344 : return GDK_SUCCEED;
345 448 : if (cnt > BATcapacity(b) - BATcount(b)) {
346 : /* if needed space exceeds a normal growth extend just
347 : * with what's needed */
348 18 : BUN ncap = BATcount(b) + cnt;
349 18 : BUN grows = BATgrows(b);
350 :
351 18 : if (ncap > grows)
352 : grows = ncap;
353 18 : if (BATextend(b, grows) != GDK_SUCCEED)
354 : return GDK_FAIL;
355 : }
356 448 : if (mayshare &&
357 448 : BATcount(b) == 0 &&
358 233 : b->batRole == TRANSIENT &&
359 148 : ni->restricted == BAT_READ &&
360 148 : b->tvheap != ni->vh) {
361 : /* if b is still empty, in the transient farm, and n
362 : * is read-only, we replace b's vheap with a reference
363 : * to n's */
364 148 : MT_lock_set(&b->theaplock);
365 148 : bat bid = b->tvheap->parentid;
366 148 : HEAPdecref(b->tvheap, true);
367 148 : HEAPincref(ni->vh);
368 148 : b->tvheap = ni->vh;
369 148 : MT_lock_unset(&b->theaplock);
370 148 : BBPretain(ni->vh->parentid);
371 148 : if (bid != b->batCacheid)
372 0 : BBPrelease(bid);
373 : }
374 448 : if (b->tvheap == ni->vh) {
375 : /* if b and n use the same vheap, we only need to copy
376 : * the offsets from n to b */
377 286 : if (ci->tpe == cand_dense) {
378 : /* fast memcpy since we copy a consecutive
379 : * chunk of memory */
380 286 : memcpy(Tloc(b, BATcount(b)),
381 286 : (const var_t *) ni->base + (ci->seq - hseq),
382 286 : cnt << b->tshift);
383 : } else {
384 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
385 0 : const var_t *restrict src = (const var_t *) ni->base;
386 0 : while (cnt > 0) {
387 0 : cnt--;
388 0 : *dst++ = src[canditer_next(ci) - hseq];
389 : }
390 : }
391 286 : MT_rwlock_wrlock(&b->thashlock);
392 286 : MT_lock_set(&b->theaplock);
393 286 : BATsetcount(b, BATcount(b) + ci->ncand);
394 286 : MT_lock_unset(&b->theaplock);
395 : /* maintain hash table */
396 286 : for (BUN i = BATcount(b) - ci->ncand;
397 286 : b->thash && i < BATcount(b);
398 0 : i++) {
399 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
400 : }
401 286 : BUN nunique = b->thash ? b->thash->nunique : 0;
402 286 : MT_rwlock_wrunlock(&b->thashlock);
403 286 : if (nunique != 0) {
404 0 : MT_lock_set(&b->theaplock);
405 0 : b->tunique_est = (double) nunique;
406 0 : MT_lock_unset(&b->theaplock);
407 : }
408 286 : return GDK_SUCCEED;
409 : }
410 : /* b and n do not share their vheap, so we need to copy data */
411 162 : if (b->tvheap->parentid != b->batCacheid) {
412 : /* if b shares its vheap with some other bat, unshare it */
413 20 : Heap *h = GDKmalloc(sizeof(Heap));
414 20 : if (h == NULL) {
415 : return GDK_FAIL;
416 : }
417 40 : *h = (Heap) {
418 20 : .parentid = b->batCacheid,
419 20 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
420 : .refs = ATOMIC_VAR_INIT(1),
421 : };
422 20 : strconcat_len(h->filename, sizeof(h->filename),
423 20 : BBP_physical(b->batCacheid), ".theap", NULL);
424 20 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
425 0 : HEAPfree(h, true);
426 0 : GDKfree(h);
427 0 : return GDK_FAIL;
428 : }
429 20 : MT_lock_set(&b->theaplock);
430 20 : Heap *oh = b->tvheap;
431 20 : b->tvheap = h;
432 20 : MT_lock_unset(&b->theaplock);
433 20 : if (oh->parentid != b->batCacheid)
434 20 : BBPrelease(oh->parentid);
435 20 : HEAPdecref(oh, false);
436 : }
437 162 : if (BATcount(b) == 0 &&
438 85 : ci->tpe == cand_dense && ci->ncand == ni->count) {
439 : /* just copy the heaps */
440 85 : MT_lock_set(&b->theaplock);
441 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
442 0 : MT_lock_unset(&b->theaplock);
443 0 : return GDK_FAIL;
444 : }
445 85 : memcpy(b->theap->base, ni->base, ni->hfree);
446 85 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
447 85 : b->theap->free = ni->hfree;
448 85 : b->theap->dirty = true;
449 85 : b->tvheap->free = ni->vhfree;
450 85 : b->tvheap->dirty = true;
451 85 : BATsetcount(b, ni->count);
452 85 : b->tnil = ni->nil;
453 85 : b->tnonil = ni->nonil;
454 85 : b->tsorted = ni->sorted;
455 85 : b->tnosorted = ni->nosorted;
456 85 : b->trevsorted = ni->revsorted;
457 85 : b->tnorevsorted = ni->norevsorted;
458 85 : b->tkey = ni->key;
459 85 : b->tnokey[0] = ni->nokey[0];
460 85 : b->tnokey[1] = ni->nokey[1];
461 85 : b->tminpos = ni->minpos;
462 85 : b->tmaxpos = ni->maxpos;
463 85 : b->tunique_est = ni->unique_est;
464 85 : MT_lock_unset(&b->theaplock);
465 85 : return GDK_SUCCEED;
466 : }
467 : /* copy data from n to b */
468 : r = BATcount(b);
469 380587 : for (BUN i = 0; i < cnt; i++) {
470 380510 : BUN p = canditer_next(ci) - hseq;
471 380510 : const void *t = BUNtvar(*ni, p);
472 380510 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
473 : return GDK_FAIL;
474 : }
475 380510 : r++;
476 : }
477 77 : MT_rwlock_wrlock(&b->thashlock);
478 77 : if (b->thash) {
479 0 : r -= cnt;
480 0 : BATiter bi = bat_iterator_nolock(b);
481 0 : for (BUN i = 0; i < cnt; i++) {
482 0 : const void *t = BUNtvar(bi, r);
483 0 : HASHappend_locked(b, r, t);
484 0 : r++;
485 : }
486 : }
487 77 : BUN nunique = b->thash ? b->thash->nunique : 0;
488 77 : MT_lock_set(&b->theaplock);
489 77 : BATsetcount(b, r);
490 77 : if (nunique != 0)
491 0 : b->tunique_est = (double) nunique;
492 77 : MT_lock_unset(&b->theaplock);
493 77 : MT_rwlock_wrunlock(&b->thashlock);
494 77 : return GDK_SUCCEED;
495 : }
496 :
497 : static gdk_return
498 127 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
499 : {
500 127 : if (ci->ncand == 0)
501 : return GDK_SUCCEED;
502 127 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
503 : return GDK_FAIL;
504 :
505 127 : MT_lock_set(&b->theaplock);
506 :
507 127 : uint32_t boff = b->batCount % 32;
508 127 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
509 127 : b->batCount += ci->ncand;
510 127 : b->theap->dirty = true;
511 127 : b->theap->free = ((b->batCount + 31) / 32) * 4;
512 127 : if (ci->tpe == cand_dense) {
513 127 : const uint32_t *np;
514 127 : uint32_t noff, mask;
515 127 : BUN cnt;
516 127 : noff = (ci->seq - ni->b->hseqbase) % 32;
517 127 : cnt = ci->ncand;
518 127 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
519 127 : if (boff == noff) {
520 : /* words of b and n are aligned, so we don't
521 : * need to shift bits around */
522 47 : if (boff + cnt <= 32) {
523 : /* all new bits within one word */
524 41 : if (cnt == 32) {
525 0 : *bp = *np;
526 : } else {
527 41 : mask = ((1U << cnt) - 1) << boff;
528 41 : *bp &= ~mask;
529 41 : *bp |= *np & mask;
530 : }
531 : } else {
532 : /* multiple words of b are affected */
533 6 : if (boff != 0) {
534 : /* first fill up the rest of the first
535 : * word */
536 0 : mask = ~0U << boff;
537 0 : *bp &= ~mask;
538 0 : *bp++ |= *np++ & mask;
539 0 : cnt -= 32 - boff;
540 : }
541 6 : if (cnt >= 32) {
542 : /* copy an integral number of words fast */
543 6 : BUN nw = cnt / 32;
544 6 : memcpy(bp, np, nw*sizeof(int));
545 6 : bp += nw;
546 6 : np += nw;
547 6 : cnt %= 32;
548 : }
549 6 : if (cnt > 0) {
550 : /* do the left over bits */
551 6 : mask = (1U << cnt) - 1;
552 6 : *bp = *np & mask;
553 : }
554 : }
555 80 : } else if (boff > noff) {
556 80 : if (boff + cnt <= 32) {
557 : /* we only need to copy bits from a
558 : * single word of n to a single word
559 : * of b */
560 : /* boff > 0, so cnt < 32, hence the
561 : * shift is ok */
562 71 : mask = (1U << cnt) - 1;
563 71 : *bp &= ~(mask << boff);
564 71 : *bp |= (*np & (mask << noff)) << (boff - noff);
565 : } else {
566 : /* first fill the rest of the last partial
567 : * word of b, so that's 32-boff bits */
568 9 : mask = (1U << (32 - boff)) - 1;
569 9 : *bp &= ~(mask << boff);
570 9 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
571 9 : cnt -= 32 - boff;
572 :
573 : /* set boff and noff to the amount we need to
574 : * shift bits in consecutive words of n around
575 : * to fit into the next word of b; set mask to
576 : * the mask of the bottom bits of n that fit
577 : * in a word of b (and the complement are the
578 : * top bits that go to another word of b) */
579 9 : boff -= noff;
580 9 : noff = 32 - boff;
581 9 : mask = (1U << noff) - 1;
582 141 : while (cnt >= 32) {
583 132 : *bp = (*np++ & ~mask) >> noff;
584 132 : *bp++ |= (*np & mask) << boff;
585 132 : cnt -= 32;
586 : }
587 9 : if (cnt > noff) {
588 : /* the last bits come from two words
589 : * in n */
590 4 : *bp = (*np++ & ~mask) >> noff;
591 4 : cnt -= noff;
592 4 : mask = (1U << cnt) - 1;
593 4 : *bp++ |= (*np & mask) << boff;
594 5 : } else if (cnt > 0) {
595 : /* the last bits come from a single
596 : * word in n */
597 5 : mask = ((1U << cnt) - 1) << noff;
598 5 : *bp = (*np & mask) >> noff;
599 : }
600 : }
601 : } else {
602 : /* boff < noff */
603 0 : if (noff + cnt <= 32) {
604 : /* only need part of the first word of n */
605 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
606 0 : mask = (1U << cnt) - 1;
607 0 : *bp &= ~(mask << boff);
608 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
609 0 : } else if (boff + cnt <= 32) {
610 : /* only need to fill a single word of
611 : * b, but from two of n */
612 0 : if (cnt < 32)
613 0 : *bp &= ~(((1U << cnt) - 1) << boff);
614 : else
615 0 : *bp = 0;
616 0 : mask = ~((1U << noff) - 1);
617 0 : *bp |= (*np++ & mask) >> (noff - boff);
618 0 : cnt -= 32 - noff;
619 0 : mask = (1U << cnt) - 1;
620 0 : *bp |= (*np & mask) << (32 - noff);
621 : } else {
622 0 : if (boff > 0) {
623 : /* fill the rest of the first word of b */
624 0 : cnt -= 32 - boff;
625 0 : *bp &= (1U << boff) - 1;
626 0 : mask = ~((1U << noff) - 1);
627 0 : noff -= boff;
628 0 : boff = 32 - noff;
629 0 : *bp |= (*np++ & mask) >> noff;
630 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
631 : } else {
632 0 : boff = 32 - noff;
633 : }
634 0 : mask = (1U << noff) - 1;
635 0 : while (cnt >= 32) {
636 0 : *bp = (*np++ & ~mask) >> noff;
637 0 : *bp++ |= (*np & mask) << boff;
638 0 : cnt -= 32;
639 : }
640 0 : if (cnt > 0) {
641 0 : *bp = (*np++ & ~mask) >> noff;
642 0 : if (cnt > noff)
643 0 : *bp++ |= (*np & mask) << boff;
644 : }
645 : }
646 : }
647 : } else {
648 0 : oid o;
649 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
650 0 : do {
651 0 : for (uint32_t i = boff; i < 32; i++) {
652 0 : o = canditer_next(ci);
653 0 : if (is_oid_nil(o))
654 : break;
655 0 : o -= ni->b->hseqbase;
656 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
657 : }
658 0 : *bp++ = v;
659 0 : v = 0;
660 0 : boff = 0;
661 0 : } while (!is_oid_nil(o));
662 : }
663 127 : MT_lock_unset(&b->theaplock);
664 127 : return GDK_SUCCEED;
665 : }
666 :
667 : /* Append the contents of BAT n (subject to the optional candidate
668 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
669 : * was passed in, and else the seqbase of n. */
670 : static gdk_return
671 1325268 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
672 : {
673 1325268 : struct canditer ci;
674 1325268 : BUN r;
675 1325268 : oid hseq = n->hseqbase;
676 1325268 : char buf[64];
677 1325268 : lng t0 = 0;
678 1325268 : const ValRecord *prop = NULL;
679 1325268 : ValRecord minprop, maxprop;
680 1325268 : const void *minbound = NULL, *maxbound = NULL;
681 1325268 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
682 1325268 : bool hlocked = false;
683 :
684 1325268 : if (b == NULL || n == NULL || BATcount(n) == 0) {
685 : return GDK_SUCCEED;
686 : }
687 845966 : assert(b->theap->parentid == b->batCacheid);
688 :
689 845966 : TRC_DEBUG_IF(ALGO) {
690 0 : t0 = GDKusec();
691 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
692 : }
693 :
694 845966 : ALIGNapp(b, force, GDK_FAIL);
695 :
696 2515165 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
697 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
698 0 : return GDK_FAIL;
699 : }
700 :
701 845955 : if (BATttype(b) != BATttype(n) &&
702 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
703 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
704 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
705 : }
706 :
707 845955 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
708 :
709 845921 : BATiter ni = bat_iterator(n);
710 :
711 845931 : canditer_init(&ci, n, s);
712 845965 : if (ci.ncand == 0) {
713 0 : goto doreturn;
714 : }
715 :
716 845965 : if (BATcount(b) + ci.ncand > BUN_MAX) {
717 0 : bat_iterator_end(&ni);
718 0 : GDKerror("combined BATs too large\n");
719 0 : return GDK_FAIL;
720 : }
721 :
722 845965 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
723 0 : bat_iterator_end(&ni);
724 0 : GDKerror("overflow of head value\n");
725 0 : return GDK_FAIL;
726 : }
727 :
728 845965 : IMPSdestroy(b); /* imprints do not support updates yet */
729 846061 : OIDXdestroy(b);
730 846020 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
731 846045 : RTREEdestroy(b);
732 :
733 845995 : MT_lock_set(&b->theaplock);
734 845334 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
735 845306 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
736 48 : VALcopy(&minprop, prop) != NULL) {
737 48 : minbound = VALptr(&minprop);
738 48 : if (ci.ncand == BATcount(n) &&
739 62 : ni.minpos != BUN_NONE &&
740 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
741 0 : assert(0);
742 : GDKerror("value out of bounds\n");
743 : MT_lock_unset(&b->theaplock);
744 : goto bailout;
745 : }
746 : }
747 845264 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
748 40 : VALcopy(&maxprop, prop) != NULL) {
749 40 : maxbound = VALptr(&maxprop);
750 40 : if (ci.ncand == BATcount(n) &&
751 52 : ni.maxpos != BUN_NONE &&
752 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
753 0 : assert(0);
754 : GDKerror("value out of bounds\n");
755 : MT_lock_unset(&b->theaplock);
756 : goto bailout;
757 : }
758 : }
759 :
760 845541 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
761 401396 : if (ni.maxpos != BUN_NONE) {
762 149500 : BATiter bi = bat_iterator_nolock(b);
763 149500 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
764 145727 : if (s == NULL) {
765 145591 : b->tmaxpos = BATcount(b) + ni.maxpos;
766 : } else {
767 136 : b->tmaxpos = BUN_NONE;
768 : }
769 : }
770 : } else {
771 251896 : b->tmaxpos = BUN_NONE;
772 : }
773 : }
774 845541 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
775 401316 : if (ni.minpos != BUN_NONE) {
776 149447 : BATiter bi = bat_iterator_nolock(b);
777 149447 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
778 145056 : if (s == NULL) {
779 144923 : b->tminpos = BATcount(b) + ni.minpos;
780 : } else {
781 133 : b->tminpos = BUN_NONE;
782 : }
783 : }
784 : } else {
785 251869 : b->tminpos = BUN_NONE;
786 : }
787 : }
788 845541 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
789 844981 : b->tunique_est = 0;
790 : }
791 845541 : MT_lock_unset(&b->theaplock);
792 : /* load hash so that we can maintain it */
793 845989 : (void) BATcheckhash(b);
794 :
795 846080 : if (b->ttype == TYPE_void) {
796 : /* b does not have storage, keep it that way if we can */
797 10723 : HASHdestroy(b); /* we're not maintaining the hash here */
798 10724 : MT_lock_set(&b->theaplock);
799 10722 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
800 9716 : (BATcount(b) == 0 ||
801 6683 : (BATtdense(b) &&
802 6683 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
803 : /* n is also dense and consecutive with b */
804 9656 : if (BATcount(b) == 0) {
805 3033 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
806 0 : assert(0);
807 : GDKerror("value not within bounds\n");
808 : MT_lock_unset(&b->theaplock);
809 : goto bailout;
810 : }
811 3033 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
812 : }
813 9653 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
814 0 : assert(0);
815 : GDKerror("value not within bounds\n");
816 : MT_lock_unset(&b->theaplock);
817 : goto bailout;
818 : }
819 9653 : BATsetcount(b, BATcount(b) + ci.ncand);
820 9653 : MT_lock_unset(&b->theaplock);
821 9655 : goto doreturn;
822 : }
823 1066 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
824 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
825 : /* both b and n are void/nil */
826 0 : if (notnull) {
827 0 : assert(0);
828 : GDKerror("NULL value not within bounds\n");
829 : MT_lock_unset(&b->theaplock);
830 : goto bailout;
831 : }
832 0 : BATtseqbase(b, oid_nil);
833 0 : BATsetcount(b, BATcount(b) + ci.ncand);
834 0 : MT_lock_unset(&b->theaplock);
835 0 : goto doreturn;
836 : }
837 : /* we need to materialize b; allocate enough capacity */
838 1066 : MT_lock_unset(&b->theaplock);
839 1066 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
840 0 : goto bailout;
841 : }
842 : }
843 :
844 : /* property setting */
845 836423 : MT_lock_set(&b->theaplock);
846 836295 : r = BATcount(b);
847 :
848 836295 : if (BATcount(b) == 0) {
849 391641 : b->tsorted = ni.sorted;
850 391641 : b->trevsorted = ni.revsorted;
851 391641 : b->tseqbase = oid_nil;
852 391641 : b->tnonil = ni.nonil;
853 391641 : b->tnil = ni.nil && ci.ncand == BATcount(n);
854 391641 : if (ci.tpe == cand_dense) {
855 391491 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
856 391491 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
857 391491 : if (BATtdensebi(&ni)) {
858 1938 : b->tseqbase = n->tseqbase + ci.seq - hseq;
859 : }
860 : } else {
861 150 : b->tnosorted = 0;
862 150 : b->tnorevsorted = 0;
863 : }
864 391641 : b->tkey = ni.key;
865 391641 : if (ci.ncand == BATcount(n)) {
866 391004 : b->tnokey[0] = ni.nokey[0];
867 391004 : b->tnokey[1] = ni.nokey[1];
868 : } else {
869 637 : b->tnokey[0] = b->tnokey[1] = 0;
870 : }
871 : } else {
872 444654 : BUN last = r - 1;
873 444654 : BATiter bi = bat_iterator_nolock(b);
874 444736 : int xx = ATOMcmp(b->ttype,
875 : BUNtail(ni, ci.seq - hseq),
876 : BUNtail(bi, last));
877 444532 : if (b->tsorted && (!ni.sorted || xx < 0)) {
878 17539 : b->tsorted = false;
879 17539 : b->tnosorted = 0;
880 17539 : b->tseqbase = oid_nil;
881 : }
882 444532 : if (b->trevsorted &&
883 47690 : (!ni.revsorted || xx > 0)) {
884 13809 : b->trevsorted = false;
885 13809 : b->tnorevsorted = 0;
886 : }
887 444532 : if (b->tkey &&
888 45654 : (!(b->tsorted || b->trevsorted) ||
889 33931 : !ni.key || xx == 0)) {
890 16241 : BATkey(b, false);
891 : }
892 444535 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
893 1132 : (!BATtdensebi(&ni) ||
894 892 : ci.tpe != cand_dense ||
895 446 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
896 703 : b->tseqbase = oid_nil;
897 : }
898 444535 : b->tnonil &= ni.nonil;
899 880213 : b->tnil |= ni.nil && ci.ncand == ni.count;
900 : }
901 836176 : MT_lock_unset(&b->theaplock);
902 836166 : if (b->ttype == TYPE_str) {
903 97836 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
904 0 : goto bailout;
905 : }
906 738330 : } else if (ATOMvarsized(b->ttype)) {
907 448 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
908 0 : goto bailout;
909 : }
910 737882 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
911 : /* no bounds and NOT_NULL property on MSK bats */
912 127 : assert(minbound == NULL && maxbound == NULL && !notnull);
913 127 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
914 0 : goto bailout;
915 : }
916 : } else {
917 737755 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
918 : /* if needed space exceeds a normal growth
919 : * extend just with what's needed */
920 10221 : BUN ncap = BATcount(b) + ci.ncand;
921 10221 : BUN grows = BATgrows(b);
922 :
923 10228 : if (ncap > grows)
924 : grows = ncap;
925 10228 : if (BATextend(b, grows) != GDK_SUCCEED) {
926 0 : goto bailout;
927 : }
928 : }
929 737771 : MT_rwlock_wrlock(&b->thashlock);
930 737776 : hlocked = true;
931 737776 : if (b->ttype != TYPE_void &&
932 737737 : ni.type != TYPE_void &&
933 735293 : ci.tpe == cand_dense) {
934 : /* use fast memcpy if we can */
935 735134 : memcpy(Tloc(b, BATcount(b)),
936 735134 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
937 735134 : ci.ncand << ni.shift);
938 735401 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
939 192 : HASHappend_locked(b, r, Tloc(b, r));
940 267 : r++;
941 : }
942 : } else {
943 2642 : const void *atomnil = ATOMnilptr(b->ttype);
944 1759029 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
945 1753725 : BUN p = canditer_next(&ci) - hseq;
946 1753725 : const void *t = BUNtail(ni, p);
947 1753713 : bool isnil = atomcmp(t, atomnil) == 0;
948 1753716 : if (notnull && isnil) {
949 0 : assert(0);
950 : GDKerror("NULL value not within bounds\n");
951 : goto bailout;
952 1753716 : } else if (minbound &&
953 1753716 : !isnil &&
954 0 : atomcmp(t, minbound) < 0) {
955 0 : assert(0);
956 : GDKerror("value not within bounds\n");
957 : goto bailout;
958 1753716 : } else if (maxbound &&
959 0 : !isnil &&
960 0 : atomcmp(t, maxbound) >= 0) {
961 0 : assert(0);
962 : GDKerror("value not within bounds\n");
963 : goto bailout;
964 1753716 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
965 0 : goto bailout;
966 : }
967 1753724 : if (b->thash)
968 0 : HASHappend_locked(b, r, t);
969 1753724 : r++;
970 : }
971 2641 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
972 : }
973 737850 : BUN nunique;
974 737850 : nunique = b->thash ? b->thash->nunique : 0;
975 737850 : MT_lock_set(&b->theaplock);
976 737600 : BATsetcount(b, b->batCount + ci.ncand);
977 737653 : if (nunique != 0)
978 44 : b->tunique_est = (double) nunique;
979 737653 : MT_lock_unset(&b->theaplock);
980 737785 : assert(hlocked);
981 737785 : MT_rwlock_wrunlock(&b->thashlock);
982 737785 : hlocked = false;
983 : }
984 :
985 845984 : doreturn:
986 845984 : bat_iterator_end(&ni);
987 846137 : if (minbound)
988 48 : VALclear(&minprop);
989 846132 : if (maxbound)
990 40 : VALclear(&maxprop);
991 846132 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
992 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
993 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
994 : GDKusec() - t0);
995 :
996 : return GDK_SUCCEED;
997 : bailout:
998 0 : if (hlocked)
999 0 : MT_rwlock_wrunlock(&b->thashlock);
1000 0 : if (minbound)
1001 0 : VALclear(&minprop);
1002 0 : if (maxbound)
1003 0 : VALclear(&maxprop);
1004 0 : bat_iterator_end(&ni);
1005 0 : return GDK_FAIL;
1006 : }
1007 :
1008 : gdk_return
1009 1325393 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1010 : {
1011 1325393 : return BATappend2(b, n, s, force, true);
1012 : }
1013 :
1014 : gdk_return
1015 4 : BATdel(BAT *b, BAT *d)
1016 : {
1017 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1018 4 : MT_lock_set(&b->theaplock);
1019 4 : BATiter bi = bat_iterator_nolock(b);
1020 4 : MT_lock_unset(&b->theaplock);
1021 :
1022 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1023 4 : assert(d->tsorted);
1024 4 : assert(d->tkey);
1025 4 : if (BATcount(d) == 0)
1026 : return GDK_SUCCEED;
1027 4 : IMPSdestroy(b);
1028 4 : OIDXdestroy(b);
1029 4 : HASHdestroy(b);
1030 4 : PROPdestroy(b);
1031 4 : STRMPdestroy(b);
1032 4 : RTREEdestroy(b);
1033 4 : if (BATtdense(d)) {
1034 2 : oid o = d->tseqbase;
1035 2 : BUN c = BATcount(d);
1036 :
1037 2 : if (o + c <= b->hseqbase)
1038 : return GDK_SUCCEED;
1039 2 : if (o < b->hseqbase) {
1040 0 : c -= b->hseqbase - o;
1041 0 : o = b->hseqbase;
1042 : }
1043 2 : if (o - b->hseqbase < b->batInserted) {
1044 0 : GDKerror("cannot delete committed values\n");
1045 0 : return GDK_FAIL;
1046 : }
1047 2 : if (o + c > b->hseqbase + BATcount(b))
1048 0 : c = b->hseqbase + BATcount(b) - o;
1049 2 : if (c == 0)
1050 : return GDK_SUCCEED;
1051 2 : if (atmdel) {
1052 0 : BUN p = o - b->hseqbase;
1053 0 : BUN q = p + c;
1054 0 : while (p < q) {
1055 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1056 0 : p++;
1057 : }
1058 : }
1059 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1060 : return GDK_FAIL;
1061 2 : MT_lock_set(&b->theaplock);
1062 2 : if (o + c < b->hseqbase + BATcount(b)) {
1063 0 : o -= b->hseqbase;
1064 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1065 0 : BUN n = BATcount(b) - (o + c);
1066 : /* not very efficient, but first see
1067 : * how much this is used */
1068 0 : for (BUN i = 0; i < n; i++)
1069 0 : mskSetVal(b, o + i,
1070 0 : mskGetVal(b, o + c + i));
1071 : } else {
1072 0 : memmove(Tloc(b, o),
1073 0 : Tloc(b, o + c),
1074 0 : b->twidth * (BATcount(b) - (o + c)));
1075 : }
1076 0 : b->theap->dirty = true;
1077 : // o += b->hseqbase; // if this were to be used again
1078 : }
1079 2 : b->batCount -= c;
1080 : } else {
1081 2 : BATiter di = bat_iterator(d);
1082 2 : const oid *o = (const oid *) di.base;
1083 2 : const oid *s;
1084 2 : BUN c = di.count;
1085 2 : BUN nd = 0;
1086 2 : BUN pos;
1087 2 : char *p = NULL;
1088 :
1089 2 : if (o[c - 1] <= b->hseqbase) {
1090 0 : bat_iterator_end(&di);
1091 0 : return GDK_SUCCEED;
1092 : }
1093 2 : while (*o < b->hseqbase) {
1094 0 : o++;
1095 0 : c--;
1096 : }
1097 2 : if (*o - b->hseqbase < b->batInserted) {
1098 0 : bat_iterator_end(&di);
1099 0 : GDKerror("cannot delete committed values\n");
1100 0 : return GDK_FAIL;
1101 : }
1102 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1103 0 : bat_iterator_end(&di);
1104 0 : return GDK_FAIL;
1105 : }
1106 2 : s = o;
1107 2 : pos = *o - b->hseqbase;
1108 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1109 2 : p = Tloc(b, pos);
1110 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1111 4 : size_t n;
1112 4 : if (atmdel)
1113 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1114 4 : o++;
1115 4 : c--;
1116 4 : nd++;
1117 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1118 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1119 2 : else if ((oid) (o - s) < *o - *s)
1120 2 : n = o[0] - o[-1] - 1;
1121 : else
1122 : n = 0;
1123 4 : if (n > 0) {
1124 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1125 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1126 : /* not very efficient, but
1127 : * first see how much this is
1128 : * used */
1129 0 : for (BUN i = 0; i < n; i++) {
1130 0 : mskSetVal(b, pos + i,
1131 0 : mskGetVal(b, opos + i));
1132 : }
1133 0 : pos += n;
1134 : } else {
1135 2 : n *= b->twidth;
1136 2 : memmove(p,
1137 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1138 : n);
1139 2 : p += n;
1140 : }
1141 : s = o;
1142 : }
1143 : }
1144 2 : bat_iterator_end(&di);
1145 2 : MT_lock_set(&b->theaplock);
1146 2 : b->theap->dirty = true;
1147 2 : b->batCount -= nd;
1148 : }
1149 4 : if (b->batCount <= 1) {
1150 : /* some trivial properties */
1151 2 : b->tkey = true;
1152 2 : b->tsorted = b->trevsorted = true;
1153 2 : if (b->batCount == 0) {
1154 2 : b->tnil = false;
1155 2 : b->tnonil = true;
1156 : }
1157 : }
1158 : /* not sure about these anymore */
1159 4 : b->tnosorted = b->tnorevsorted = 0;
1160 4 : b->tnokey[0] = b->tnokey[1] = 0;
1161 4 : b->tminpos = BUN_NONE;
1162 4 : b->tmaxpos = BUN_NONE;
1163 4 : b->tunique_est = 0.0;
1164 4 : MT_lock_unset(&b->theaplock);
1165 :
1166 4 : return GDK_SUCCEED;
1167 : }
1168 :
1169 : /*
1170 : * Replace all values in b with values from n whose location is given by
1171 : * the oid in either p or positions.
1172 : * If positions is used, autoincr specifies whether it is the first of a
1173 : * dense range of positions or whether it is a full-blown array of
1174 : * position.
1175 : * If mayappend is set, the position in p/positions may refer to
1176 : * locations beyond the end of b.
1177 : */
1178 : static gdk_return
1179 81938 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1180 : bool mayappend, bool autoincr, bool force)
1181 : {
1182 81938 : lng t0 = GDKusec();
1183 81934 : oid pos = oid_nil;
1184 81934 : BUN nunique = 0;
1185 :
1186 81934 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1187 : return GDK_SUCCEED;
1188 : }
1189 : /* either p or positions */
1190 81934 : assert((p == NULL) != (positions == NULL));
1191 81934 : if (p != NULL) {
1192 81758 : if (BATcount(p) != BATcount(n)) {
1193 0 : GDKerror("update BATs not the same size\n");
1194 0 : return GDK_FAIL;
1195 : }
1196 81758 : if (ATOMtype(p->ttype) != TYPE_oid) {
1197 0 : GDKerror("positions BAT not type OID\n");
1198 0 : return GDK_FAIL;
1199 : }
1200 81758 : if (BATtdense(p)) {
1201 72959 : pos = p->tseqbase;
1202 72959 : positions = &pos;
1203 72959 : autoincr = true;
1204 72959 : p = NULL;
1205 8799 : } else if (p->ttype != TYPE_void) {
1206 8797 : positions = (const oid *) Tloc(p, 0);
1207 8797 : autoincr = false;
1208 : } else {
1209 : autoincr = false;
1210 : }
1211 176 : } else if (autoincr) {
1212 176 : pos = *positions;
1213 : }
1214 81934 : if (BATcount(n) == 0) {
1215 : return GDK_SUCCEED;
1216 : }
1217 :
1218 25247 : BATiter ni = bat_iterator(n);
1219 :
1220 25247 : OIDXdestroy(b);
1221 25247 : IMPSdestroy(b);
1222 25245 : STRMPdestroy(b);
1223 25247 : RTREEdestroy(b);
1224 : /* load hash so that we can maintain it */
1225 25247 : (void) BATcheckhash(b);
1226 :
1227 25247 : MT_lock_set(&b->theaplock);
1228 25246 : if (!force && (b->batRestricted != BAT_WRITE ||
1229 15 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1230 0 : MT_lock_unset(&b->theaplock);
1231 0 : bat_iterator_end(&ni);
1232 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1233 0 : return GDK_FAIL;
1234 : }
1235 25246 : BATiter bi = bat_iterator_nolock(b);
1236 25246 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1237 24717 : b->tunique_est = 0;
1238 : }
1239 :
1240 25246 : b->tsorted = b->trevsorted = false;
1241 25246 : b->tnosorted = b->tnorevsorted = 0;
1242 25246 : b->tseqbase = oid_nil;
1243 25246 : b->tkey = false;
1244 25246 : b->tnokey[0] = b->tnokey[1] = 0;
1245 :
1246 25246 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1247 25246 : const void *nil = ATOMnilptr(b->ttype);
1248 25246 : oid hseqend = b->hseqbase + BATcount(b);
1249 :
1250 25246 : MT_lock_unset(&b->theaplock);
1251 :
1252 30167 : bool anynil = false;
1253 30167 : bool locked = false;
1254 :
1255 30167 : if (b->tvheap) {
1256 1234338 : for (BUN i = 0; i < ni.count; i++) {
1257 1231689 : oid updid;
1258 1231689 : if (positions) {
1259 1230676 : updid = autoincr ? pos++ : *positions++;
1260 : } else {
1261 1013 : updid = BUNtoid(p, i);
1262 : }
1263 :
1264 1231689 : if (updid < b->hseqbase ||
1265 1231689 : (!mayappend && updid >= hseqend)) {
1266 0 : GDKerror("id out of range\n");
1267 0 : goto bailout;
1268 : }
1269 1231689 : updid -= b->hseqbase;
1270 1231689 : if (!force && updid < b->batInserted) {
1271 0 : GDKerror("updating committed value\n");
1272 0 : goto bailout;
1273 : }
1274 :
1275 1231689 : const void *new = BUNtvar(ni, i);
1276 :
1277 1225088 : if (updid >= BATcount(b)) {
1278 23504 : assert(mayappend);
1279 23504 : if (locked) {
1280 4 : MT_rwlock_wrunlock(&b->thashlock);
1281 4 : locked = false;
1282 : }
1283 23504 : if (b->tminpos != bi.minpos ||
1284 23503 : b->tmaxpos != bi.maxpos) {
1285 1 : MT_lock_set(&b->theaplock);
1286 1 : b->tminpos = bi.minpos;
1287 1 : b->tmaxpos = bi.maxpos;
1288 1 : MT_lock_unset(&b->theaplock);
1289 : }
1290 23504 : if (BATcount(b) < updid &&
1291 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1292 0 : bat_iterator_end(&ni);
1293 0 : return GDK_FAIL;
1294 : }
1295 23504 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1296 0 : bat_iterator_end(&ni);
1297 0 : return GDK_FAIL;
1298 : }
1299 23504 : bi = bat_iterator_nolock(b);
1300 155404 : continue;
1301 : }
1302 :
1303 : /* it is possible that a previous run was killed
1304 : * after an update (with a mmapped tail file)
1305 : * but before that was committed, then the
1306 : * offset may point outside of the vheap */
1307 1201584 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1308 :
1309 1201419 : if (old && atomcmp(old, new) == 0) {
1310 : /* replacing with the same value:
1311 : * nothing to do */
1312 131900 : continue;
1313 : }
1314 :
1315 1069136 : bool isnil = atomcmp(new, nil) == 0;
1316 1070834 : anynil |= isnil;
1317 1070834 : MT_lock_set(&b->theaplock);
1318 1071263 : if (old == NULL ||
1319 1071263 : (b->tnil &&
1320 711 : !anynil &&
1321 712 : atomcmp(old, nil) == 0)) {
1322 : /* if old value is nil and no new
1323 : * value is, we're not sure anymore
1324 : * about the nil property, so we must
1325 : * clear it */
1326 709 : b->tnil = false;
1327 : }
1328 1071262 : b->tnonil &= !isnil;
1329 1071262 : b->tnil |= isnil;
1330 1071262 : MT_lock_unset(&b->theaplock);
1331 1071553 : if (bi.maxpos != BUN_NONE) {
1332 3412 : if (!isnil &&
1333 1706 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1334 : /* new value is larger than
1335 : * previous largest */
1336 23 : bi.maxpos = updid;
1337 3366 : } else if (old == NULL ||
1338 1696 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1339 13 : atomcmp(new, old) != 0)) {
1340 : /* old value is equal to
1341 : * largest and new value is
1342 : * smaller, so we don't know
1343 : * anymore which is the
1344 : * largest */
1345 13 : bi.maxpos = BUN_NONE;
1346 : }
1347 : }
1348 1071553 : if (bi.minpos != BUN_NONE) {
1349 3396 : if (!isnil &&
1350 1698 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1351 : /* new value is smaller than
1352 : * previous smallest */
1353 14 : bi.minpos = updid;
1354 3368 : } else if (old == NULL ||
1355 1706 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1356 22 : atomcmp(new, old) != 0)) {
1357 : /* old value is equal to
1358 : * smallest and new value is
1359 : * larger, so we don't know
1360 : * anymore which is the
1361 : * smallest */
1362 22 : bi.minpos = BUN_NONE;
1363 : }
1364 : }
1365 1071553 : if (!locked) {
1366 2202 : MT_rwlock_wrlock(&b->thashlock);
1367 2202 : locked = true;
1368 : }
1369 1071553 : if (old)
1370 1071553 : HASHdelete_locked(&bi, updid, old);
1371 0 : else if (b->thash) {
1372 0 : doHASHdestroy(b, b->thash);
1373 0 : b->thash = NULL;
1374 : }
1375 :
1376 1071533 : var_t d;
1377 1071533 : switch (b->twidth) {
1378 1058166 : case 1:
1379 1058166 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1380 1058166 : break;
1381 10380 : case 2:
1382 10380 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1383 10380 : break;
1384 2952 : case 4:
1385 2952 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1386 2952 : break;
1387 : #if SIZEOF_VAR_T == 8
1388 35 : case 8:
1389 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1390 35 : break;
1391 : #endif
1392 : default:
1393 0 : MT_UNREACHABLE();
1394 : }
1395 1071533 : MT_lock_set(&b->theaplock);
1396 1071338 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1397 1071308 : MT_lock_unset(&b->theaplock);
1398 1071289 : if (rc != GDK_SUCCEED) {
1399 0 : goto bailout;
1400 : }
1401 1071289 : if (b->twidth < SIZEOF_VAR_T &&
1402 1071185 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1403 : /* doesn't fit in current heap, upgrade it */
1404 20 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1405 0 : goto bailout;
1406 : }
1407 : }
1408 : /* in case ATOMreplaceVAR and/or
1409 : * GDKupgradevarheap replaces a heap, we need to
1410 : * reinitialize the iterator */
1411 : {
1412 : /* save and restore minpos/maxpos */
1413 1071289 : BUN minpos = bi.minpos;
1414 1071289 : BUN maxpos = bi.maxpos;
1415 1071289 : bi = bat_iterator_nolock(b);
1416 1071289 : bi.minpos = minpos;
1417 1071289 : bi.maxpos = maxpos;
1418 : }
1419 1071289 : switch (b->twidth) {
1420 1057904 : case 1:
1421 1057904 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1422 1057904 : break;
1423 10396 : case 2:
1424 10396 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1425 10396 : break;
1426 2954 : case 4:
1427 2954 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1428 2954 : break;
1429 : #if SIZEOF_VAR_T == 8
1430 35 : case 8:
1431 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1432 35 : break;
1433 : #endif
1434 : default:
1435 0 : MT_UNREACHABLE();
1436 : }
1437 1071289 : HASHinsert_locked(&bi, updid, new);
1438 :
1439 : }
1440 2649 : if (locked) {
1441 2198 : if (b->thash)
1442 2 : nunique = b->thash->nunique;
1443 2198 : MT_rwlock_wrunlock(&b->thashlock);
1444 2198 : locked = false;
1445 : }
1446 2649 : MT_lock_set(&b->theaplock);
1447 2649 : b->tvheap->dirty = true;
1448 2649 : MT_lock_unset(&b->theaplock);
1449 22597 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1450 0 : assert(b->thash == NULL);
1451 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1452 0 : for (BUN i = 0; i < ni.count; i++) {
1453 0 : oid updid;
1454 0 : if (positions) {
1455 0 : updid = autoincr ? pos++ : *positions++;
1456 : } else {
1457 0 : updid = BUNtoid(p, i);
1458 : }
1459 :
1460 0 : if (updid < b->hseqbase ||
1461 0 : (!mayappend && updid >= hseqend)) {
1462 0 : GDKerror("id out of range\n");
1463 0 : bat_iterator_end(&ni);
1464 0 : return GDK_FAIL;
1465 : }
1466 0 : updid -= b->hseqbase;
1467 0 : if (!force && updid < b->batInserted) {
1468 0 : GDKerror("updating committed value\n");
1469 0 : bat_iterator_end(&ni);
1470 0 : return GDK_FAIL;
1471 : }
1472 0 : if (updid >= BATcount(b)) {
1473 0 : assert(mayappend);
1474 0 : if (BATcount(b) < updid &&
1475 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1476 0 : bat_iterator_end(&ni);
1477 0 : return GDK_FAIL;
1478 : }
1479 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1480 0 : bat_iterator_end(&ni);
1481 0 : return GDK_FAIL;
1482 : }
1483 0 : continue;
1484 : }
1485 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1486 : }
1487 0 : bi = bat_iterator_nolock(b);
1488 22597 : } else if (autoincr) {
1489 14246 : if (pos < b->hseqbase ||
1490 13510 : (!mayappend && pos + ni.count > hseqend)) {
1491 0 : GDKerror("id out of range\n");
1492 0 : bat_iterator_end(&ni);
1493 0 : return GDK_FAIL;
1494 : }
1495 14246 : pos -= b->hseqbase;
1496 14246 : if (!force && pos < b->batInserted) {
1497 0 : GDKerror("updating committed value\n");
1498 0 : bat_iterator_end(&ni);
1499 0 : return GDK_FAIL;
1500 : }
1501 :
1502 14246 : if (pos >= BATcount(b)) {
1503 442 : assert(mayappend);
1504 442 : bat_iterator_end(&ni);
1505 442 : if (BATcount(b) < pos &&
1506 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1507 : return GDK_FAIL;
1508 : }
1509 442 : return BATappend(b, n, NULL, force);
1510 : }
1511 13810 : if (pos + ni.count > BATcount(b) &&
1512 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1513 0 : bat_iterator_end(&ni);
1514 0 : return GDK_FAIL;
1515 : }
1516 13804 : bi = bat_iterator_nolock(b);
1517 :
1518 : /* we copy all of n, so if there are nils in n we get
1519 : * nils in b (and else we don't know) */
1520 13804 : b->tnil = ni.nil;
1521 : /* we may not copy over all of b, so we only know that
1522 : * there are no nils in b afterward if there weren't
1523 : * any in either b or n to begin with */
1524 13804 : b->tnonil &= ni.nonil;
1525 : /* if there is no hash, we don't start the loop, if
1526 : * there is only a persisted hash, it will get destroyed
1527 : * in the first iteration, after which there is no hash
1528 : * and the loop ends */
1529 13804 : MT_rwlock_wrlock(&b->thashlock);
1530 13801 : locked = true;
1531 13805 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1532 3 : HASHdelete_locked(&bi, i, Tloc(b, i));
1533 13802 : if (ni.type == TYPE_void) {
1534 0 : assert(b->ttype == TYPE_oid);
1535 0 : oid *o = Tloc(b, pos);
1536 0 : if (is_oid_nil(ni.tseq)) {
1537 : /* we may or may not overwrite the old
1538 : * min/max values */
1539 0 : bi.minpos = BUN_NONE;
1540 0 : bi.maxpos = BUN_NONE;
1541 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1542 0 : o[i] = oid_nil;
1543 0 : b->tnil = true;
1544 : } else {
1545 0 : oid v = ni.tseq;
1546 : /* we know min/max of n, so we know
1547 : * the new min/max of b if those of n
1548 : * are smaller/larger than the old */
1549 0 : if (bi.minpos != BUN_NONE) {
1550 0 : if (v <= BUNtoid(b, bi.minpos))
1551 0 : bi.minpos = pos;
1552 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1553 0 : bi.minpos = BUN_NONE;
1554 : }
1555 0 : if (complex_cand(n)) {
1556 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1557 0 : o[i] = *(oid *)Tpos(&ni, i);
1558 : /* last value */
1559 0 : v = o[ni.count - 1];
1560 : } else {
1561 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1562 0 : o[i] = v++;
1563 : /* last value added (not one beyond) */
1564 0 : v--;
1565 : }
1566 0 : if (bi.maxpos != BUN_NONE) {
1567 0 : if (v >= BUNtoid(b, bi.maxpos))
1568 0 : bi.maxpos = pos + ni.count - 1;
1569 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1570 0 : bi.maxpos = BUN_NONE;
1571 : }
1572 : }
1573 : } else {
1574 : /* if the extremes of n are at least as
1575 : * extreme as those of b, we can replace b's
1576 : * min/max, else we don't know what b's new
1577 : * min/max are*/
1578 14051 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1579 249 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1580 139 : bi.minpos = pos + ni.minpos;
1581 : } else {
1582 13663 : bi.minpos = BUN_NONE;
1583 : }
1584 14057 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1585 255 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1586 174 : bi.maxpos = pos + ni.maxpos;
1587 : } else {
1588 13628 : bi.maxpos = BUN_NONE;
1589 : }
1590 13802 : memcpy(Tloc(b, pos), ni.base,
1591 13802 : ni.count << b->tshift);
1592 : }
1593 : /* either we have a hash that was updated above, or we
1594 : * have no hash; we cannot have the case where there is
1595 : * only a persisted (unloaded) hash since it would have
1596 : * been destroyed above */
1597 13802 : if (b->thash != NULL) {
1598 7 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1599 3 : HASHinsert_locked(&bi, i, Tloc(b, i));
1600 4 : if (b->thash)
1601 4 : nunique = b->thash->nunique;
1602 : }
1603 13803 : MT_rwlock_wrunlock(&b->thashlock);
1604 13803 : locked = false;
1605 13803 : if (ni.count == BATcount(b)) {
1606 : /* if we replaced all values of b by values
1607 : * from n, we can also copy the min/max
1608 : * properties */
1609 6294 : bi.minpos = ni.minpos;
1610 6294 : bi.maxpos = ni.maxpos;
1611 6294 : if (BATtdensebi(&ni)) {
1612 : /* replaced all of b with a dense sequence */
1613 47 : MT_lock_set(&b->theaplock);
1614 47 : BATtseqbase(b, ni.tseq);
1615 47 : MT_lock_unset(&b->theaplock);
1616 : }
1617 : }
1618 : } else {
1619 158587928 : for (BUN i = 0; i < ni.count; i++) {
1620 158579576 : oid updid;
1621 158579576 : if (positions) {
1622 : /* assert(!autoincr) */
1623 158579576 : updid = *positions++;
1624 : } else {
1625 0 : updid = BUNtoid(p, i);
1626 : }
1627 :
1628 158579576 : if (updid < b->hseqbase ||
1629 158579576 : (!mayappend && updid >= hseqend)) {
1630 0 : GDKerror("id out of range\n");
1631 0 : goto bailout;
1632 : }
1633 158579576 : updid -= b->hseqbase;
1634 158579576 : if (!force && updid < b->batInserted) {
1635 0 : GDKerror("updating committed value\n");
1636 0 : goto bailout;
1637 : }
1638 :
1639 158579576 : const void *new = BUNtloc(ni, i);
1640 :
1641 158579576 : if (updid >= BATcount(b)) {
1642 16059 : assert(mayappend);
1643 16059 : if (locked) {
1644 10 : MT_rwlock_wrunlock(&b->thashlock);
1645 10 : locked = false;
1646 : }
1647 16059 : if (b->tminpos != bi.minpos ||
1648 16057 : b->tmaxpos != bi.maxpos) {
1649 3 : MT_lock_set(&b->theaplock);
1650 3 : b->tminpos = bi.minpos;
1651 3 : b->tmaxpos = bi.maxpos;
1652 3 : MT_lock_unset(&b->theaplock);
1653 : }
1654 16059 : if (BATcount(b) < updid &&
1655 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1656 0 : goto bailout;
1657 : }
1658 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1659 0 : bat_iterator_end(&ni);
1660 0 : return GDK_FAIL;
1661 : }
1662 16059 : bi = bat_iterator_nolock(b);
1663 16059 : continue;
1664 : }
1665 :
1666 158563517 : const void *old = BUNtloc(bi, updid);
1667 158563517 : bool isnil = atomcmp(new, nil) == 0;
1668 158670956 : anynil |= isnil;
1669 158670956 : if (b->tnil &&
1670 1915 : !anynil &&
1671 1915 : atomcmp(old, nil) == 0) {
1672 : /* if old value is nil and no new
1673 : * value is, we're not sure anymore
1674 : * about the nil property, so we must
1675 : * clear it */
1676 1911 : b->tnil = false;
1677 : }
1678 158670956 : b->tnonil &= !isnil;
1679 158670956 : b->tnil |= isnil;
1680 158670956 : if (bi.maxpos != BUN_NONE) {
1681 8864 : if (!isnil &&
1682 4430 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1683 : /* new value is larger than
1684 : * previous largest */
1685 69 : bi.maxpos = updid;
1686 4372 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1687 7 : atomcmp(new, old) != 0) {
1688 : /* old value is equal to
1689 : * largest and new value is
1690 : * smaller, so we don't know
1691 : * anymore which is the
1692 : * largest */
1693 7 : bi.maxpos = BUN_NONE;
1694 : }
1695 : }
1696 158670956 : if (bi.minpos != BUN_NONE) {
1697 8864 : if (!isnil &&
1698 4430 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1699 : /* new value is smaller than
1700 : * previous smallest */
1701 6 : bi.minpos = updid;
1702 4434 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1703 6 : atomcmp(new, old) != 0) {
1704 : /* old value is equal to
1705 : * smallest and new value is
1706 : * larger, so we don't know
1707 : * anymore which is the
1708 : * smallest */
1709 6 : bi.minpos = BUN_NONE;
1710 : }
1711 : }
1712 :
1713 158670956 : if (!locked) {
1714 8352 : MT_rwlock_wrlock(&b->thashlock);
1715 8352 : locked = true;
1716 : }
1717 158670956 : HASHdelete_locked(&bi, updid, old);
1718 158998019 : switch (b->twidth) {
1719 30758582 : case 1:
1720 30758582 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1721 30758582 : break;
1722 527206 : case 2:
1723 527206 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1724 527206 : break;
1725 22561461 : case 4:
1726 22561461 : ((int *) b->theap->base)[updid] = * (int *) new;
1727 22561461 : break;
1728 99661535 : case 8:
1729 99661535 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1730 99661535 : break;
1731 5489235 : case 16:
1732 : #ifdef HAVE_HGE
1733 5489235 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1734 : #else
1735 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1736 : #endif
1737 5489235 : break;
1738 0 : default:
1739 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1740 0 : break;
1741 : }
1742 158998019 : HASHinsert_locked(&bi, updid, new);
1743 : }
1744 8352 : if (locked) {
1745 8342 : if (b->thash)
1746 0 : nunique = b->thash->nunique;
1747 8342 : MT_rwlock_wrunlock(&b->thashlock);
1748 8342 : locked = false;
1749 : }
1750 : }
1751 24804 : bat_iterator_end(&ni);
1752 24805 : MT_lock_set(&b->theaplock);
1753 24805 : if (nunique != 0)
1754 6 : b->tunique_est = (double) nunique;
1755 24805 : b->tminpos = bi.minpos;
1756 24805 : b->tmaxpos = bi.maxpos;
1757 24805 : b->theap->dirty = true;
1758 24805 : MT_lock_unset(&b->theaplock);
1759 24804 : TRC_DEBUG(ALGO,
1760 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1761 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1762 : GDKusec() - t0);
1763 : return GDK_SUCCEED;
1764 :
1765 0 : bailout:
1766 0 : bat_iterator_end(&ni);
1767 0 : if (locked) {
1768 0 : Hash *h = b->thash;
1769 0 : b->thash = NULL;
1770 0 : MT_rwlock_wrunlock(&b->thashlock);
1771 0 : doHASHdestroy(b, h);
1772 : }
1773 : return GDK_FAIL;
1774 : }
1775 :
1776 : /* replace values from b at locations specified in p with values in n */
1777 : gdk_return
1778 80952 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1779 : {
1780 80952 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1781 : }
1782 :
1783 : /* like BATreplace, but p may specify locations beyond the end of b */
1784 : gdk_return
1785 809 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1786 : {
1787 809 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1788 : }
1789 :
1790 : #if 0 /* not used */
1791 : /* like BATreplace, but the positions are given by an array of oid values */
1792 : gdk_return
1793 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1794 : {
1795 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1796 : }
1797 : #endif
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid
1800 : * values, and they may specify locations beyond the end of b */
1801 : gdk_return
1802 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1803 : {
1804 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1805 : }
1806 :
1807 : /*
1808 : * BAT Selections
1809 : * The BAT selectors are among the most heavily used operators.
1810 : * Their efficient implementation is therefore mandatory.
1811 : *
1812 : * BAT slice
1813 : * This function returns a horizontal slice from a BAT. It optimizes
1814 : * execution by avoiding to copy when the BAT is memory mapped (in
1815 : * this case, an independent submap is created) or else when it is
1816 : * read-only, then a VIEW bat is created as a result.
1817 : *
1818 : * If a new copy has to be created, this function takes care to
1819 : * preserve void-columns (in this case, the seqbase has to be
1820 : * recomputed in the result).
1821 : *
1822 : * The selected range is excluding the high value.
1823 : */
1824 : BAT *
1825 9915847 : BATslice(BAT *b, BUN l, BUN h)
1826 : {
1827 9915847 : BUN low = l;
1828 9915847 : BAT *bn = NULL;
1829 :
1830 9915847 : BATcheck(b, NULL);
1831 9915847 : BATiter bi = bat_iterator(b);
1832 9915993 : if (l > bi.count)
1833 : l = bi.count;
1834 9915993 : if (h > bi.count)
1835 : h = bi.count;
1836 9915993 : if (h < l)
1837 : h = l;
1838 :
1839 9915993 : if (complex_cand(b)) {
1840 : /* slicing a candidate list with exceptions */
1841 78 : struct canditer ci;
1842 78 : canditer_init(&ci, NULL, b);
1843 78 : if (b->hseqbase + l >= ci.hseq) {
1844 78 : l = b->hseqbase + l - ci.hseq;
1845 78 : h = b->hseqbase + h - ci.hseq;
1846 : } else {
1847 0 : l = 0;
1848 0 : if (b->hseqbase + h >= ci.hseq)
1849 0 : h = b->hseqbase + h - ci.hseq;
1850 : else
1851 : h = 0;
1852 : }
1853 78 : bn = canditer_slice(&ci, l, h);
1854 78 : goto doreturn;
1855 : }
1856 : /* If the source BAT is readonly, then we can obtain a VIEW
1857 : * that just reuses the memory of the source. */
1858 9915915 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1859 : /* forget about slices for bit masks: we can't deal
1860 : * with difference in alignment, so we'll just make a
1861 : * copy */
1862 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1863 : /* we use BATappend with a candidate list to easily
1864 : * copy the part of b that we need */
1865 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1866 0 : if (bn == NULL ||
1867 0 : s == NULL ||
1868 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1869 0 : BBPreclaim(bn);
1870 0 : BBPreclaim(s);
1871 0 : bn = NULL;
1872 0 : goto doreturn;
1873 : }
1874 0 : BBPunfix(s->batCacheid);
1875 0 : goto doreturn;
1876 : }
1877 9915915 : restrict_t prestricted;
1878 11007059 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1879 1090978 : BAT *pb = BBP_desc(VIEWtparent(b));
1880 1090978 : MT_lock_set(&pb->theaplock);
1881 1090834 : prestricted = pb->batRestricted;
1882 1090834 : MT_lock_unset(&pb->theaplock);
1883 : } else {
1884 : prestricted = BAT_WRITE; /* just initialize with anything */
1885 : }
1886 9916081 : if (bi.restricted == BAT_READ &&
1887 9875120 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1888 9875119 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1889 9875119 : if (bn == NULL)
1890 : goto doreturn;
1891 : } else {
1892 : /* create a new BAT and put everything into it */
1893 40962 : BUN p = l;
1894 40962 : BUN q = h;
1895 :
1896 40962 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1897 40962 : if (bn == NULL)
1898 0 : goto doreturn;
1899 :
1900 40962 : if (bn->ttype == TYPE_void) {
1901 24099 : BATsetcount(bn, h - l);
1902 24098 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1903 16863 : } else if (bn->tvheap == NULL) {
1904 10470 : assert(BATatoms[bn->ttype].atomPut == NULL);
1905 10470 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1906 10470 : (q - p) << bn->tshift);
1907 10470 : bn->theap->dirty = true;
1908 10470 : BATsetcount(bn, h - l);
1909 : } else {
1910 1694236 : for (; p < q; p++) {
1911 1687844 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1912 0 : BBPreclaim(bn);
1913 0 : bn = NULL;
1914 0 : goto doreturn;
1915 : }
1916 : }
1917 : }
1918 40958 : bn->theap->dirty = true;
1919 40958 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1920 40958 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1921 40958 : bn->tkey = bi.key || bn->batCount <= 1;
1922 40958 : bn->tnonil = bi.nonil;
1923 40958 : bn->tnil = false; /* we don't know */
1924 40958 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1925 1730 : bn->tnosorted = bi.nosorted - l;
1926 : else
1927 39228 : bn->tnosorted = 0;
1928 40958 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1929 4005 : bn->tnorevsorted = bi.norevsorted - l;
1930 : else
1931 36953 : bn->tnorevsorted = 0;
1932 40958 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1933 34087 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1934 407 : bi.nokey[0] != bi.nokey[1] &&
1935 : !bn->tkey) {
1936 407 : bn->tnokey[0] = bi.nokey[0] - l;
1937 407 : bn->tnokey[1] = bi.nokey[1] - l;
1938 : } else {
1939 40551 : bn->tnokey[0] = bn->tnokey[1] = 0;
1940 : }
1941 : }
1942 9915612 : doreturn:
1943 9915612 : bat_iterator_end(&bi);
1944 9916069 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1945 : ALGOOPTBATFMT "\n",
1946 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1947 : return bn;
1948 : }
1949 :
1950 : #define BAT_ORDERED(TPE) \
1951 : do { \
1952 : const TPE *restrict vals = Tloc(b, 0); \
1953 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1954 : if (vals[p - 1] > vals[p]) { \
1955 : b->tnosorted = p; \
1956 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1957 : goto doreturn; \
1958 : } else if (vals[p - 1] < vals[p]) { \
1959 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1960 : b->tnorevsorted = p; \
1961 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1962 : } \
1963 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1964 : b->tnokey[0] = p - 1; \
1965 : b->tnokey[1] = p; \
1966 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1967 : } \
1968 : } \
1969 : } while (0)
1970 :
1971 : #define BAT_ORDERED_FP(TPE) \
1972 : do { \
1973 : const TPE *restrict vals = Tloc(b, 0); \
1974 : TPE prev = vals[0]; \
1975 : bool prevnil = is_##TPE##_nil(prev); \
1976 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1977 : TPE next = vals[p]; \
1978 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1979 : prev = next; \
1980 : if (cmp > 0) { \
1981 : b->tnosorted = bi.nosorted = p; \
1982 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1983 : goto doreturn; \
1984 : } else if (cmp < 0) { \
1985 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1986 : b->tnorevsorted = bi.norevsorted = p; \
1987 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1988 : } \
1989 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1990 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1991 : b->tnokey[1] = bi.nokey[1] = p; \
1992 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1993 : } \
1994 : } \
1995 : } while (0)
1996 :
1997 : /* Return whether the BAT is ordered or not. If we don't know, invest
1998 : * in a scan and record the results in the bat descriptor. If during
1999 : * the scan we happen to find evidence that the BAT is not reverse
2000 : * sorted, we record the location. */
2001 : bool
2002 2968169 : BATordered(BAT *b)
2003 : {
2004 2968169 : lng t0 = GDKusec();
2005 2968130 : bool sorted;
2006 :
2007 2968130 : MT_lock_set(&b->theaplock);
2008 2968157 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2009 470566 : MT_lock_unset(&b->theaplock);
2010 470561 : return true;
2011 : }
2012 2497591 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2013 2094283 : MT_lock_unset(&b->theaplock);
2014 2094277 : return false;
2015 : }
2016 :
2017 : /* There are a few reasons why we need a lock here. It may be
2018 : * that multiple threads call this functions at the same time
2019 : * (happens a lot with mitosis/mergetable), but we only need to
2020 : * scan the bat in one thread: the others can reap the rewards
2021 : * when that one thread is done. Also, we need the heap to
2022 : * remain accessible (could have used bat_iterator for that),
2023 : * and, and this is the killer argument, we may need to make
2024 : * changes to the bat descriptor. */
2025 403308 : BATiter bi = bat_iterator_nolock(b);
2026 403308 : if (!b->tsorted && b->tnosorted == 0) {
2027 744949 : switch (ATOMbasetype(b->ttype)) {
2028 58855 : case TYPE_bte:
2029 127798651 : BAT_ORDERED(bte);
2030 : break;
2031 7020 : case TYPE_sht:
2032 1561563 : BAT_ORDERED(sht);
2033 : break;
2034 293070 : case TYPE_int:
2035 119308469 : BAT_ORDERED(int);
2036 : break;
2037 7297 : case TYPE_lng:
2038 63013825 : BAT_ORDERED(lng);
2039 : break;
2040 : #ifdef HAVE_HGE
2041 376 : case TYPE_hge:
2042 8002682 : BAT_ORDERED(hge);
2043 : break;
2044 : #endif
2045 963 : case TYPE_flt:
2046 8007194 : BAT_ORDERED_FP(flt);
2047 : break;
2048 733 : case TYPE_dbl:
2049 8201239 : BAT_ORDERED_FP(dbl);
2050 : break;
2051 : case TYPE_str:
2052 21191115 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2053 21177324 : int c;
2054 21177324 : const char *p1 = BUNtvar(bi, p - 1);
2055 21177431 : const char *p2 = BUNtvar(bi, p);
2056 21177324 : if (p1 == p2)
2057 : c = 0;
2058 2210571 : else if (p1[0] == '\200') {
2059 1719 : if (p2[0] == '\200')
2060 : c = 0;
2061 : else
2062 : c = -1;
2063 2208852 : } else if (p2[0] == '\200')
2064 : c = 1;
2065 : else
2066 2207757 : c = strcmp(p1, p2);
2067 2207757 : if (c > 0) {
2068 21015 : b->tnosorted = bi.nosorted = p;
2069 21015 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2070 21015 : goto doreturn;
2071 21156309 : } else if (c < 0) {
2072 2189505 : assert(!b->trevsorted);
2073 2189505 : if (b->tnorevsorted == 0) {
2074 9922 : b->tnorevsorted = bi.norevsorted = p;
2075 9922 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2076 : }
2077 18966804 : } else if (b->tnokey[1] == 0) {
2078 2994 : assert(!b->tkey);
2079 2994 : b->tnokey[0] = bi.nokey[0] = p - 1;
2080 2994 : b->tnokey[1] = bi.nokey[1] = p;
2081 21156309 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2082 : }
2083 : }
2084 : break;
2085 188 : default: {
2086 188 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2087 4483 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 4436 : int c;
2089 4436 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2090 141 : b->tnosorted = bi.nosorted = p;
2091 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2092 141 : goto doreturn;
2093 4295 : } else if (c < 0) {
2094 4243 : if (!b->trevsorted && b->tnorevsorted == 0) {
2095 90 : b->tnorevsorted = bi.norevsorted = p;
2096 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2097 : }
2098 52 : } else if (!b->tkey && b->tnokey[1] == 0) {
2099 9 : b->tnokey[0] = bi.nokey[0] = p - 1;
2100 9 : b->tnokey[1] = bi.nokey[1] = p;
2101 4295 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2102 : }
2103 : }
2104 : break;
2105 : }
2106 : }
2107 : /* we only get here if we completed the scan; note that
2108 : * if we didn't record evidence about *reverse*
2109 : * sortedness, we know that the BAT is also reverse
2110 : * sorted; similarly, if we didn't record evidence about
2111 : * keyness, we know the BAT is key */
2112 104823 : b->tsorted = bi.sorted = true;
2113 104823 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2114 104827 : if (!b->trevsorted && b->tnorevsorted == 0) {
2115 41624 : b->trevsorted = bi.revsorted = true;
2116 41624 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2117 : }
2118 104827 : if (!b->tkey && b->tnokey[1] == 0) {
2119 43862 : b->tkey = bi.key = true;
2120 43862 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 : }
2123 104827 : doreturn:
2124 403312 : sorted = b->tsorted;
2125 403312 : bat pbid = VIEWtparent(b);
2126 403312 : MT_lock_unset(&b->theaplock);
2127 403311 : if (pbid) {
2128 224648 : BAT *pb = BBP_desc(pbid);
2129 224648 : MT_lock_set(&pb->theaplock);
2130 224648 : if (bi.count == BATcount(pb) &&
2131 195404 : bi.h == pb->theap &&
2132 195404 : bi.type == pb->ttype) {
2133 : /* add to knowledge in parent bat */
2134 195400 : pb->tsorted |= bi.sorted;
2135 195400 : if (pb->tnosorted == 0)
2136 195400 : pb->tnosorted = bi.nosorted;
2137 195400 : pb->trevsorted |= bi.revsorted;
2138 195400 : if (pb->tnorevsorted == 0)
2139 27156 : pb->tnorevsorted = bi.norevsorted;
2140 195400 : pb->tkey |= bi.key;
2141 195400 : if (pb->tnokey[1] == 0) {
2142 172525 : pb->tnokey[0] = bi.nokey[0];
2143 172525 : pb->tnokey[1] = bi.nokey[1];
2144 : }
2145 : }
2146 224648 : MT_lock_unset(&pb->theaplock);
2147 : }
2148 : return sorted;
2149 : }
2150 :
2151 : #define BAT_REVORDERED(TPE) \
2152 : do { \
2153 : const TPE *restrict vals = Tloc(b, 0); \
2154 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2155 : if (vals[p - 1] < vals[p]) { \
2156 : b->tnorevsorted = p; \
2157 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2158 : goto doreturn; \
2159 : } \
2160 : } \
2161 : } while (0)
2162 :
2163 : #define BAT_REVORDERED_FP(TPE) \
2164 : do { \
2165 : const TPE *restrict vals = Tloc(b, 0); \
2166 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2167 : TPE prev = vals[p - 1], next = vals[p]; \
2168 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2169 : if (cmp < 0) { \
2170 : b->tnorevsorted = bi.norevsorted = p; \
2171 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2172 : goto doreturn; \
2173 : } \
2174 : } \
2175 : } while (0)
2176 :
2177 : /* Return whether the BAT is reverse ordered or not. If we don't
2178 : * know, invest in a scan and record the results in the bat
2179 : * descriptor. */
2180 : bool
2181 2741280 : BATordered_rev(BAT *b)
2182 : {
2183 2741280 : lng t0 = GDKusec();
2184 2741305 : bool revsorted;
2185 :
2186 2741305 : if (b == NULL || !ATOMlinear(b->ttype))
2187 : return false;
2188 2741274 : MT_lock_set(&b->theaplock);
2189 2741268 : if (BATcount(b) <= 1 || b->trevsorted) {
2190 204389 : MT_lock_unset(&b->theaplock);
2191 204389 : return true;
2192 : }
2193 2536879 : if (b->ttype == TYPE_void) {
2194 16766 : MT_lock_unset(&b->theaplock);
2195 16766 : return is_oid_nil(b->tseqbase);
2196 : }
2197 2520113 : if (BATtdense(b) || b->tnorevsorted > 0) {
2198 2444512 : MT_lock_unset(&b->theaplock);
2199 2444498 : return false;
2200 : }
2201 75601 : BATiter bi = bat_iterator_nolock(b);
2202 75601 : if (!b->trevsorted && b->tnorevsorted == 0) {
2203 117262 : switch (ATOMbasetype(b->ttype)) {
2204 32442 : case TYPE_bte:
2205 10290864 : BAT_REVORDERED(bte);
2206 : break;
2207 3388 : case TYPE_sht:
2208 3134091 : BAT_REVORDERED(sht);
2209 : break;
2210 23367 : case TYPE_int:
2211 1467805 : BAT_REVORDERED(int);
2212 : break;
2213 3919 : case TYPE_lng:
2214 2840469 : BAT_REVORDERED(lng);
2215 : break;
2216 : #ifdef HAVE_HGE
2217 136 : case TYPE_hge:
2218 1051 : BAT_REVORDERED(hge);
2219 : break;
2220 : #endif
2221 512 : case TYPE_flt:
2222 1994 : BAT_REVORDERED_FP(flt);
2223 : break;
2224 282 : case TYPE_dbl:
2225 500614 : BAT_REVORDERED_FP(dbl);
2226 : break;
2227 11555 : default: {
2228 11555 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2229 501509 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2230 498302 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2231 8349 : b->tnorevsorted = p;
2232 8349 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2233 8349 : goto doreturn;
2234 : }
2235 : }
2236 : break;
2237 : }
2238 : }
2239 18762 : b->trevsorted = bi.revsorted = true;
2240 18762 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2241 : }
2242 18762 : doreturn:
2243 75602 : revsorted = b->trevsorted;
2244 75602 : bat pbid = VIEWtparent(b);
2245 75602 : MT_lock_unset(&b->theaplock);
2246 75604 : if (pbid) {
2247 17340 : BAT *pb = BBP_desc(pbid);
2248 17340 : MT_lock_set(&pb->theaplock);
2249 17341 : if (bi.count == BATcount(pb) &&
2250 4145 : bi.h == pb->theap &&
2251 4145 : bi.type == pb->ttype) {
2252 : /* add to knowledge in parent bat */
2253 4145 : pb->trevsorted |= bi.revsorted;
2254 4145 : if (pb->tnorevsorted == 0)
2255 4145 : pb->tnorevsorted = bi.norevsorted;
2256 : }
2257 17341 : MT_lock_unset(&pb->theaplock);
2258 : }
2259 : return revsorted;
2260 : }
2261 :
2262 : /* figure out which sort function is to be called
2263 : * stable sort can produce an error (not enough memory available),
2264 : * "quick" sort does not produce errors */
2265 : static gdk_return
2266 3945183 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2267 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2268 : bool stable)
2269 : {
2270 3945183 : if (n <= 1) /* trivially sorted */
2271 : return GDK_SUCCEED;
2272 2750706 : if (stable) {
2273 185 : if (reverse)
2274 1 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2275 : else
2276 184 : return GDKssort(h, t, base, n, hs, ts, tpe);
2277 : } else {
2278 2750521 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2279 : }
2280 2750521 : return GDK_SUCCEED;
2281 : }
2282 :
2283 : /* Sort the bat b according to both o and g. The stable and reverse
2284 : * parameters indicate whether the sort should be stable or descending
2285 : * respectively. The parameter b is required, o and g are optional
2286 : * (i.e., they may be NULL).
2287 : *
2288 : * A sorted copy is returned through the sorted parameter, the new
2289 : * ordering is returned through the order parameter, group information
2290 : * is returned through the groups parameter. All three output
2291 : * parameters may be NULL. If they're all NULL, this function does
2292 : * nothing.
2293 : *
2294 : * If o is specified, it is used to first rearrange b according to the
2295 : * order specified in o, after which b is sorted taking g into
2296 : * account.
2297 : *
2298 : * If g is specified, it indicates groups which should be individually
2299 : * ordered. Each row of consecutive equal values in g indicates a
2300 : * group which is sorted according to stable and reverse. g is used
2301 : * after the order in b was rearranged according to o.
2302 : *
2303 : * The outputs order and groups can be used in subsequent calls to
2304 : * this function. This can be used if multiple BATs need to be sorted
2305 : * together. The BATs should then be sorted in order of significance,
2306 : * and each following call should use the original unordered BAT plus
2307 : * the order and groups bat from the previous call. In this case, the
2308 : * sorted BATs are not of much use, so the sorted output parameter
2309 : * does not need to be specified.
2310 : * Apart from error checking and maintaining reference counts, sorting
2311 : * three columns (col1, col2, col3) could look like this with the
2312 : * sorted results in (col1s, col2s, col3s):
2313 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2314 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2315 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2316 : * Note that the "reverse" parameter can be different for each call.
2317 : */
2318 : gdk_return
2319 29343 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2320 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2321 : {
2322 29343 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2323 29343 : BATiter pbi;
2324 29343 : oid *restrict grps, *restrict ords, prev;
2325 29343 : BUN p, q, r;
2326 29343 : lng t0 = GDKusec();
2327 29343 : bool mkorderidx, orderidxlock = false;
2328 29343 : Heap *oidxh = NULL;
2329 :
2330 : /* we haven't implemented NILs as largest value for stable
2331 : * sort, so NILs come first for ascending and last for
2332 : * descending */
2333 29343 : assert(!stable || reverse == nilslast);
2334 :
2335 29343 : if (b == NULL) {
2336 0 : GDKerror("b must exist\n");
2337 0 : return GDK_FAIL;
2338 : }
2339 29343 : if (stable && reverse != nilslast) {
2340 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2341 0 : return GDK_FAIL;
2342 : }
2343 29343 : if (!ATOMlinear(b->ttype)) {
2344 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2345 0 : return GDK_FAIL;
2346 : }
2347 29343 : MT_lock_set(&b->theaplock);
2348 29343 : if (b->ttype == TYPE_void) {
2349 115 : b->tsorted = true;
2350 168 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2351 0 : b->trevsorted = !b->trevsorted;
2352 : }
2353 230 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2354 0 : b->tkey = !b->tkey;
2355 : }
2356 29228 : } else if (b->batCount <= 1) {
2357 9403 : if (!b->tsorted || !b->trevsorted) {
2358 29 : b->tsorted = b->trevsorted = true;
2359 : }
2360 : }
2361 29343 : MT_lock_unset(&b->theaplock);
2362 29343 : if (o != NULL &&
2363 15945 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2364 15945 : BATcount(o) != BATcount(b) || /* same size as b */
2365 6283 : (o->ttype == TYPE_void && /* no nil tail */
2366 2422 : BATcount(o) != 0 &&
2367 2422 : is_oid_nil(o->tseqbase)))) {
2368 0 : GDKerror("o must have type oid and same size as b\n");
2369 0 : return GDK_FAIL;
2370 : }
2371 29343 : if (g != NULL &&
2372 15945 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2373 15945 : !g->tsorted || /* sorted */
2374 15945 : BATcount(g) != BATcount(b) || /* same size as b */
2375 6544 : (g->ttype == TYPE_void && /* no nil tail */
2376 2683 : BATcount(g) != 0 &&
2377 2683 : is_oid_nil(g->tseqbase)))) {
2378 0 : GDKerror("g must have type oid, sorted on the tail, "
2379 : "and same size as b\n");
2380 0 : return GDK_FAIL;
2381 : }
2382 29343 : if (sorted == NULL && order == NULL) {
2383 : /* no place to put result, so we're done quickly */
2384 0 : GDKerror("no place to put the result.\n");
2385 0 : return GDK_FAIL;
2386 : }
2387 29343 : if (g == NULL && !stable) {
2388 : /* pre-ordering doesn't make sense if we're not
2389 : * subsorting and the sort is not stable */
2390 13116 : o = NULL;
2391 : }
2392 29343 : if (b->tnonil) {
2393 : /* if there are no nils, placement of nils doesn't
2394 : * matter, so set nilslast such that ordered bits can
2395 : * be used */
2396 20648 : nilslast = reverse;
2397 : }
2398 29343 : pbi = bat_iterator(NULL);
2399 30298 : if (BATcount(b) <= 1 ||
2400 19780 : (reverse == nilslast &&
2401 : (reverse ? b->trevsorted : b->tsorted) &&
2402 5002 : o == NULL && g == NULL &&
2403 2307 : (groups == NULL || BATtkey(b) ||
2404 : (reverse ? b->tsorted : b->trevsorted)))) {
2405 : /* trivially (sub)sorted, and either we don't need to
2406 : * return group information, or we can trivially
2407 : * deduce the groups */
2408 11318 : if (sorted) {
2409 9932 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2410 9932 : if (bn == NULL)
2411 0 : goto error;
2412 9932 : *sorted = bn;
2413 : }
2414 11318 : if (order) {
2415 10418 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2416 10418 : if (on == NULL)
2417 0 : goto error;
2418 10418 : *order = on;
2419 : }
2420 11318 : if (groups) {
2421 5870 : if (BATtkey(b)) {
2422 : /* singleton groups */
2423 5457 : gn = BATdense(0, 0, BATcount(b));
2424 5457 : if (gn == NULL)
2425 0 : goto error;
2426 : } else {
2427 : /* single group */
2428 413 : const oid *o = 0;
2429 413 : assert(BATcount(b) == 1 ||
2430 : (b->tsorted && b->trevsorted));
2431 413 : gn = BATconstant(0, TYPE_oid, &o, BATcount(b), TRANSIENT);
2432 413 : if (gn == NULL)
2433 0 : goto error;
2434 : }
2435 5870 : *groups = gn;
2436 : }
2437 11318 : bat_iterator_end(&pbi);
2438 11318 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2439 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2440 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2441 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2442 : ALGOOPTBATFMT " -- trivial (" LLFMT
2443 : " usec)\n",
2444 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2445 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2446 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2447 : ALGOOPTBATPAR(on), GDKusec() - t0);
2448 11318 : return GDK_SUCCEED;
2449 : }
2450 18025 : if (VIEWtparent(b)) {
2451 3549 : pb = BATdescriptor(VIEWtparent(b));
2452 3549 : if (pb != NULL &&
2453 3549 : (b->tbaseoff != pb->tbaseoff ||
2454 2819 : BATcount(b) != BATcount(pb) ||
2455 2574 : b->hseqbase != pb->hseqbase ||
2456 2565 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2457 984 : BBPunfix(pb->batCacheid);
2458 984 : pb = NULL;
2459 : }
2460 : } else {
2461 : pb = b;
2462 : }
2463 18025 : bat_iterator_end(&pbi);
2464 18025 : pbi = bat_iterator(pb);
2465 : /* when we will create an order index if it doesn't already exist */
2466 18025 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2467 18025 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2468 5895 : (void) BATcheckorderidx(pb);
2469 5895 : MT_lock_set(&pb->batIdxLock);
2470 5895 : if (pb->torderidx) {
2471 59 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2472 : /* we can use the order index */
2473 59 : oidxh = pb->torderidx;
2474 59 : HEAPincref(oidxh);
2475 : }
2476 : mkorderidx = false;
2477 5836 : } else if (b != pb) {
2478 : /* don't build orderidx on parent bat */
2479 : mkorderidx = false;
2480 4928 : } else if (mkorderidx) {
2481 : /* keep lock when going to create */
2482 4307 : orderidxlock = true;
2483 : }
2484 5895 : if (!orderidxlock)
2485 1588 : MT_lock_unset(&pb->batIdxLock);
2486 : }
2487 18025 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2488 : /* there is an order index that we can use */
2489 59 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2490 59 : if (on == NULL)
2491 0 : goto error;
2492 59 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2493 59 : BATsetcount(on, BATcount(b));
2494 59 : HEAPdecref(oidxh, false);
2495 59 : oidxh = NULL;
2496 59 : on->tkey = true;
2497 59 : on->tnil = false;
2498 59 : on->tnonil = true;
2499 59 : on->tsorted = on->trevsorted = false;
2500 59 : on->tseqbase = oid_nil;
2501 59 : if (sorted || groups) {
2502 59 : bn = BATproject(on, b);
2503 59 : if (bn == NULL)
2504 0 : goto error;
2505 59 : bn->tsorted = true;
2506 59 : if (groups) {
2507 4 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2508 0 : goto error;
2509 4 : if (sorted &&
2510 4 : (*groups)->tkey &&
2511 : g == NULL) {
2512 : /* if new groups bat is key
2513 : * and since there is no input
2514 : * groups bat, we know the
2515 : * result bat is key */
2516 4 : bn->tkey = true;
2517 : }
2518 : }
2519 59 : if (sorted)
2520 59 : *sorted = bn;
2521 : else {
2522 0 : BBPunfix(bn->batCacheid);
2523 0 : bn = NULL;
2524 : }
2525 : }
2526 59 : if (order)
2527 9 : *order = on;
2528 : else {
2529 50 : BBPunfix(on->batCacheid);
2530 50 : on = NULL;
2531 : }
2532 59 : bat_iterator_end(&pbi);
2533 59 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2534 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2535 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2536 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2537 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2538 : " usec)\n",
2539 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2540 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2541 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2542 : ALGOOPTBATPAR(on), GDKusec() - t0);
2543 59 : if (pb != NULL && pb != b)
2544 53 : BBPunfix(pb->batCacheid);
2545 59 : return GDK_SUCCEED;
2546 11586 : } else if (oidxh) {
2547 0 : HEAPdecref(oidxh, false);
2548 0 : oidxh = NULL;
2549 : }
2550 17966 : if (o) {
2551 11017 : bn = BATproject(o, b);
2552 11017 : if (bn == NULL)
2553 0 : goto error;
2554 11017 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2555 4840 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2556 2420 : BBPunfix(bn->batCacheid);
2557 2420 : bn = b2;
2558 : }
2559 11017 : if (pb) {
2560 10598 : bat_iterator_end(&pbi);
2561 10598 : if (pb != b)
2562 1266 : BBPunfix(pb->batCacheid);
2563 10598 : pbi = bat_iterator(NULL);
2564 10598 : pb = NULL;
2565 : }
2566 : } else {
2567 6949 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2568 : }
2569 17966 : if (bn == NULL)
2570 0 : goto error;
2571 17966 : if (order) {
2572 : /* prepare order bat */
2573 15141 : if (o) {
2574 : /* make copy of input so that we can refine it;
2575 : * copy can be read-only if we take the shortcut
2576 : * below in the case g is "key" */
2577 9375 : on = COLcopy(o, TYPE_oid,
2578 9375 : g == NULL ||
2579 9375 : !(g->tkey || g->ttype == TYPE_void),
2580 : TRANSIENT);
2581 9375 : if (on == NULL)
2582 0 : goto error;
2583 9375 : BAThseqbase(on, b->hseqbase);
2584 9375 : on->tminpos = BUN_NONE;
2585 9375 : on->tmaxpos = BUN_NONE;
2586 : } else {
2587 : /* create new order */
2588 5766 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2589 5766 : if (on == NULL)
2590 0 : goto error;
2591 5766 : ords = (oid *) Tloc(on, 0);
2592 31469193 : for (p = 0, q = BATcount(bn); p < q; p++)
2593 31463427 : ords[p] = p + b->hseqbase;
2594 5766 : BATsetcount(on, BATcount(bn));
2595 5766 : on->tkey = true;
2596 5766 : on->tnil = false;
2597 5766 : on->tnonil = true;
2598 : }
2599 : /* COLcopy above can create TYPE_void */
2600 15141 : if (on->ttype != TYPE_void) {
2601 14393 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2602 14393 : on->tseqbase = oid_nil; /* and hence not dense */
2603 14393 : on->tnosorted = on->tnorevsorted = 0;
2604 : }
2605 15141 : *order = on;
2606 15141 : ords = (oid *) Tloc(on, 0);
2607 : } else {
2608 : ords = NULL;
2609 : }
2610 17966 : if (g) {
2611 11017 : if (g->tkey || g->ttype == TYPE_void) {
2612 : /* if g is "key", all groups are size 1, so no
2613 : * subsorting needed */
2614 4973 : if (sorted) {
2615 4597 : *sorted = bn;
2616 : } else {
2617 376 : BBPunfix(bn->batCacheid);
2618 376 : bn = NULL;
2619 : }
2620 4973 : if (order) {
2621 3802 : *order = on;
2622 3802 : if (o) {
2623 : /* we can inherit sortedness
2624 : * after all */
2625 3802 : on->tsorted = o->tsorted;
2626 3802 : on->trevsorted = o->trevsorted;
2627 3802 : if (o->tnosorted)
2628 56 : on->tnosorted = o->tnosorted;
2629 3802 : if (o->tnorevsorted)
2630 74 : on->tnorevsorted = o->tnorevsorted;
2631 : } else {
2632 : /* we didn't rearrange, so
2633 : * still sorted */
2634 0 : on->tsorted = true;
2635 0 : on->trevsorted = false;
2636 : }
2637 3802 : if (BATcount(on) <= 1) {
2638 0 : on->tsorted = true;
2639 0 : on->trevsorted = true;
2640 : }
2641 : }
2642 4973 : if (groups) {
2643 2881 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2644 2881 : if (gn == NULL)
2645 0 : goto error;
2646 2881 : *groups = gn;
2647 : }
2648 4973 : bat_iterator_end(&pbi);
2649 4973 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2650 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2651 : ",reverse=%d,nilslast=%d,stable=%d"
2652 : ") = (" ALGOOPTBATFMT ","
2653 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2654 : " -- key group (" LLFMT " usec)\n",
2655 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2656 : ALGOBATPAR(g), reverse, nilslast,
2657 : stable, ALGOOPTBATPAR(bn),
2658 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2659 : GDKusec() - t0);
2660 4973 : if (pb != NULL && pb != b)
2661 0 : BBPunfix(pb->batCacheid);
2662 4973 : return GDK_SUCCEED;
2663 : }
2664 6044 : assert(g->ttype == TYPE_oid);
2665 6044 : grps = (oid *) Tloc(g, 0);
2666 6044 : prev = grps[0];
2667 6044 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2668 0 : goto error;
2669 45640139 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2670 45634095 : if (grps[p] != prev) {
2671 : /* sub sort [r,p) */
2672 7565528 : if (do_sort(Tloc(bn, r),
2673 3632748 : ords ? ords + r : NULL,
2674 3932780 : bn->tvheap ? bn->tvheap->base : NULL,
2675 3932780 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2676 3932780 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2677 0 : goto error;
2678 3932780 : r = p;
2679 3932780 : prev = grps[p];
2680 : }
2681 : }
2682 : /* sub sort [r,q) */
2683 11617 : if (do_sort(Tloc(bn, r),
2684 5573 : ords ? ords + r : NULL,
2685 6044 : bn->tvheap ? bn->tvheap->base : NULL,
2686 6044 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2687 6044 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2688 0 : goto error;
2689 : /* if single group (r==0) the result is (rev)sorted,
2690 : * otherwise (maybe) not */
2691 6044 : bn->tsorted = r == 0 && !reverse && !nilslast;
2692 12059 : bn->trevsorted = r == 0 && reverse && nilslast;
2693 : } else {
2694 6949 : Heap *m = NULL;
2695 : /* only invest in creating an order index if the BAT
2696 : * is persistent */
2697 6949 : if (mkorderidx) {
2698 4307 : assert(orderidxlock);
2699 4307 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2700 : ords == NULL) {
2701 0 : ords = (oid *) m->base + ORDERIDXOFF;
2702 0 : if (o && o->ttype != TYPE_void)
2703 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2704 0 : else if (o)
2705 0 : for (p = 0, q = BATcount(o); p < q; p++)
2706 0 : ords[p] = p + o->tseqbase;
2707 : else
2708 0 : for (p = 0, q = BATcount(b); p < q; p++)
2709 0 : ords[p] = p + b->hseqbase;
2710 : }
2711 : }
2712 6949 : if ((reverse != nilslast ||
2713 13232 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2714 12750 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2715 6375 : do_sort(Tloc(bn, 0),
2716 : ords,
2717 6375 : bn->tvheap ? bn->tvheap->base : NULL,
2718 6375 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2719 6375 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2720 0 : if (m != NULL) {
2721 0 : HEAPfree(m, true);
2722 0 : GDKfree(m);
2723 : }
2724 0 : goto error;
2725 : }
2726 6949 : bn->tsorted = !reverse && !nilslast;
2727 6949 : bn->trevsorted = reverse && nilslast;
2728 6949 : if (m != NULL) {
2729 4307 : assert(orderidxlock);
2730 4307 : if (pb->torderidx == NULL) {
2731 4307 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2732 4307 : memcpy((oid *) m->base + ORDERIDXOFF,
2733 : ords,
2734 4307 : pbi.count * sizeof(oid));
2735 : }
2736 4307 : pb->torderidx = m;
2737 4307 : persistOIDX(pb);
2738 : } else {
2739 0 : HEAPfree(m, true);
2740 0 : GDKfree(m);
2741 : }
2742 : }
2743 : }
2744 12993 : if (orderidxlock) {
2745 4307 : MT_lock_unset(&pb->batIdxLock);
2746 4307 : orderidxlock = false;
2747 : }
2748 12993 : bn->theap->dirty = true;
2749 12993 : bn->tnosorted = 0;
2750 12993 : bn->tnorevsorted = 0;
2751 12993 : bn->tnokey[0] = bn->tnokey[1] = 0;
2752 12993 : bn->tminpos = BUN_NONE;
2753 12993 : bn->tmaxpos = BUN_NONE;
2754 12993 : if (groups) {
2755 7864 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2756 0 : goto error;
2757 7864 : if ((*groups)->tkey &&
2758 898 : (g == NULL || (g->tsorted && g->trevsorted))) {
2759 : /* if new groups bat is key and the input
2760 : * group bat has a single value (both sorted
2761 : * and revsorted), we know the result bat is
2762 : * key */
2763 1122 : bn->tkey = true;
2764 : }
2765 : }
2766 :
2767 12993 : bat_iterator_end(&pbi);
2768 12993 : if (sorted)
2769 9896 : *sorted = bn;
2770 : else {
2771 3097 : BBPunfix(bn->batCacheid);
2772 3097 : bn = NULL;
2773 : }
2774 :
2775 12993 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2776 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2777 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2778 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2779 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2780 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2781 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2782 : g ? "grouped " : "", GDKusec() - t0);
2783 12993 : if (pb && pb != b)
2784 1246 : BBPunfix(pb->batCacheid);
2785 : return GDK_SUCCEED;
2786 :
2787 0 : error:
2788 0 : bat_iterator_end(&pbi);
2789 0 : if (orderidxlock)
2790 0 : MT_lock_unset(&pb->batIdxLock);
2791 0 : if (oidxh)
2792 0 : HEAPdecref(oidxh, false);
2793 0 : BBPreclaim(bn);
2794 0 : if (pb && pb != b)
2795 0 : BBPunfix(pb->batCacheid);
2796 0 : BBPreclaim(on);
2797 0 : if (sorted)
2798 0 : *sorted = NULL;
2799 0 : if (order)
2800 0 : *order = NULL;
2801 0 : if (groups)
2802 0 : *groups = NULL;
2803 : return GDK_FAIL;
2804 : }
2805 :
2806 : /* return a new BAT of length n with seqbase hseq, and the constant v
2807 : * in the tail */
2808 : BAT *
2809 901529 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2810 : {
2811 901529 : BAT *bn;
2812 901529 : void *restrict p;
2813 901529 : BUN i;
2814 901529 : lng t0 = 0;
2815 :
2816 901529 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2817 901529 : if (v == NULL)
2818 : return NULL;
2819 901529 : bn = COLnew(hseq, tailtype, n, role);
2820 901294 : if (bn != NULL && n > 0) {
2821 73862 : p = Tloc(bn, 0);
2822 73862 : switch (ATOMstorage(tailtype)) {
2823 23 : case TYPE_void:
2824 23 : v = &oid_nil;
2825 23 : BATtseqbase(bn, oid_nil);
2826 23 : break;
2827 0 : case TYPE_msk:
2828 0 : if (*(msk*)v) {
2829 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2830 0 : if (n & 31) {
2831 0 : uint32_t *m = p;
2832 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2833 : }
2834 : } else
2835 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2836 : break;
2837 9952 : case TYPE_bte:
2838 9952 : memset(p, *(bte*)v, n);
2839 9952 : break;
2840 : case TYPE_sht:
2841 7158552 : for (i = 0; i < n; i++)
2842 7142860 : ((sht *) p)[i] = *(sht *) v;
2843 : break;
2844 : case TYPE_int:
2845 : case TYPE_flt:
2846 : assert(sizeof(int) == sizeof(flt));
2847 219232934 : for (i = 0; i < n; i++)
2848 219227484 : ((int *) p)[i] = *(int *) v;
2849 : break;
2850 : case TYPE_lng:
2851 : case TYPE_dbl:
2852 : assert(sizeof(lng) == sizeof(dbl));
2853 237429550 : for (i = 0; i < n; i++)
2854 237402313 : ((lng *) p)[i] = *(lng *) v;
2855 : break;
2856 : #ifdef HAVE_HGE
2857 : case TYPE_hge:
2858 29077239 : for (i = 0; i < n; i++)
2859 29076033 : ((hge *) p)[i] = *(hge *) v;
2860 : break;
2861 : #endif
2862 : case TYPE_uuid:
2863 200047 : for (i = 0; i < n; i++)
2864 200038 : ((uuid *) p)[i] = *(uuid *) v;
2865 : break;
2866 14151 : case TYPE_str:
2867 : /* insert the first value, then just copy the
2868 : * offset lots of times */
2869 14151 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2870 0 : BBPreclaim(bn);
2871 0 : return NULL;
2872 : }
2873 14150 : char val[sizeof(var_t)];
2874 14150 : memcpy(val, Tloc(bn, 0), bn->twidth);
2875 14150 : if (bn->twidth == 1 && n > 1) {
2876 : /* single byte value: we have a
2877 : * function for that */
2878 7391 : memset(Tloc(bn, 1), val[0], n - 1);
2879 : } else {
2880 6759 : char *p = Tloc(bn, 0);
2881 6779 : for (i = 1; i < n; i++) {
2882 20 : p += bn->twidth;
2883 20 : memcpy(p, val, bn->twidth);
2884 : }
2885 : }
2886 : break;
2887 : default:
2888 333705 : for (i = 0; i < n; i++)
2889 333563 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2890 0 : BBPreclaim(bn);
2891 0 : return NULL;
2892 : }
2893 : break;
2894 : }
2895 73861 : bn->theap->dirty = true;
2896 73861 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2897 73856 : BATsetcount(bn, n);
2898 73858 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2899 73858 : bn->tnonil = !bn->tnil;
2900 73858 : bn->tkey = BATcount(bn) <= 1;
2901 : }
2902 901290 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2903 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2904 : return bn;
2905 : }
2906 :
2907 : /*
2908 : * BAT Aggregates
2909 : *
2910 : * We retain the size() and card() aggregate results in the column
2911 : * descriptor. We would like to have such functionality in an
2912 : * extensible way for many aggregates, for DD (1) we do not want to
2913 : * change the binary BAT format on disk and (2) aggr and size are the
2914 : * most relevant aggregates.
2915 : *
2916 : * It is all hacked into the aggr[3] records; three adjacent integers
2917 : * that were left over in the column record. We refer to these as if
2918 : * it where an int aggr[3] array. The below routines set and retrieve
2919 : * the aggregate values from the tail of the BAT, as many
2920 : * aggregate-manipulating BAT functions work on tail.
2921 : *
2922 : * The rules are as follows: aggr[0] contains the alignment ID of the
2923 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2924 : * equal to b->talign, the precomputed aggregate values in
2925 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2926 : * of them may be set at the time. This is encoded by the value
2927 : * int_nil, which cannot occur in these two aggregates.
2928 : *
2929 : * This was now extended to record the property whether we know there
2930 : * is a nil value present by mis-using the highest bits of both
2931 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2932 : */
2933 :
2934 : void
2935 38059027 : PROPdestroy_nolock(BAT *b)
2936 : {
2937 38059027 : PROPrec *p = b->tprops;
2938 38059027 : PROPrec *n;
2939 :
2940 38059027 : b->tprops = NULL;
2941 38063228 : while (p) {
2942 4100 : n = p->next;
2943 4100 : assert(p->id != (enum prop_t) 20);
2944 4100 : VALclear(&p->v);
2945 4100 : GDKfree(p);
2946 4100 : p = n;
2947 : }
2948 38059128 : }
2949 :
2950 : void
2951 427 : PROPdestroy(BAT *b)
2952 : {
2953 427 : MT_lock_set(&b->theaplock);
2954 427 : PROPdestroy_nolock(b);
2955 427 : MT_lock_unset(&b->theaplock);
2956 427 : }
2957 :
2958 : ValPtr
2959 203000490 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2960 : {
2961 203000490 : PROPrec *p;
2962 :
2963 203000490 : p = b->tprops;
2964 203006158 : while (p && p->id != idx)
2965 5668 : p = p->next;
2966 203000490 : return p ? &p->v : NULL;
2967 : }
2968 :
2969 : void
2970 408717 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2971 : {
2972 408717 : PROPrec *prop = b->tprops, *prev = NULL;
2973 :
2974 408952 : while (prop) {
2975 408825 : if (prop->id == idx) {
2976 408590 : if (prev)
2977 106 : prev->next = prop->next;
2978 : else
2979 408484 : b->tprops = prop->next;
2980 408590 : VALclear(&prop->v);
2981 408590 : GDKfree(prop);
2982 408590 : return;
2983 : }
2984 235 : prev = prop;
2985 235 : prop = prop->next;
2986 : }
2987 : }
2988 :
2989 : ValPtr
2990 412729 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
2991 : {
2992 412729 : PROPrec *p;
2993 :
2994 412729 : p = b->tprops;
2995 419501 : while (p && p->id != idx)
2996 6772 : p = p->next;
2997 412729 : if (p == NULL) {
2998 412722 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
2999 : /* properties are hints, so if we can't create
3000 : * one we ignore the error */
3001 0 : GDKclrerr();
3002 0 : return NULL;
3003 : }
3004 412722 : p->id = idx;
3005 412722 : p->next = b->tprops;
3006 412722 : p->v.vtype = 0;
3007 412722 : b->tprops = p;
3008 : } else {
3009 7 : VALclear(&p->v);
3010 : }
3011 412729 : if (VALinit(&p->v, type, v) == NULL) {
3012 : /* failed to initialize, so remove property */
3013 0 : BATrmprop_nolock(b, idx);
3014 0 : GDKclrerr();
3015 0 : p = NULL;
3016 : }
3017 0 : return p ? &p->v : NULL;
3018 : }
3019 :
3020 : ValPtr
3021 2874112 : BATgetprop(BAT *b, enum prop_t idx)
3022 : {
3023 2874112 : ValPtr p;
3024 :
3025 2874112 : MT_lock_set(&b->theaplock);
3026 2874003 : p = BATgetprop_nolock(b, idx);
3027 2874047 : MT_lock_unset(&b->theaplock);
3028 2873994 : return p;
3029 : }
3030 :
3031 : ValPtr
3032 3910 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3033 : {
3034 3910 : ValPtr p;
3035 3910 : MT_lock_set(&b->theaplock);
3036 3910 : p = BATsetprop_nolock(b, idx, type, v);
3037 3910 : MT_lock_unset(&b->theaplock);
3038 3910 : return p;
3039 : }
3040 :
3041 : void
3042 2 : BATrmprop(BAT *b, enum prop_t idx)
3043 : {
3044 2 : MT_lock_set(&b->theaplock);
3045 2 : BATrmprop_nolock(b, idx);
3046 2 : MT_lock_unset(&b->theaplock);
3047 2 : }
3048 :
3049 : /*
3050 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3051 : * non-nil tail value.
3052 : * This function does not fail (the callers currently don't check for failure).
3053 : */
3054 : BUN
3055 2203 : BATcount_no_nil(BAT *b, BAT *s)
3056 : {
3057 2203 : BUN cnt = 0;
3058 2203 : const void *restrict p, *restrict nil;
3059 2203 : const char *restrict base;
3060 2203 : int t;
3061 2203 : int (*cmp)(const void *, const void *);
3062 2203 : struct canditer ci;
3063 2203 : oid hseq;
3064 :
3065 2203 : BATcheck(b, 0);
3066 :
3067 2203 : hseq = b->hseqbase;
3068 2203 : canditer_init(&ci, b, s);
3069 2203 : BATiter bi = bat_iterator(b);
3070 2203 : if (bi.nonil) {
3071 1999 : bat_iterator_end(&bi);
3072 1999 : return ci.ncand;
3073 : }
3074 204 : p = bi.base;
3075 204 : t = ATOMbasetype(bi.type);
3076 204 : switch (t) {
3077 0 : case TYPE_void:
3078 0 : cnt = ci.ncand * BATtdensebi(&bi);
3079 0 : break;
3080 0 : case TYPE_msk:
3081 0 : cnt = ci.ncand;
3082 0 : break;
3083 15 : case TYPE_bte:
3084 31 : CAND_LOOP(&ci)
3085 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3086 : break;
3087 0 : case TYPE_sht:
3088 0 : CAND_LOOP(&ci)
3089 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3090 : break;
3091 79 : case TYPE_int:
3092 5681510 : CAND_LOOP(&ci)
3093 5681431 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3094 : break;
3095 85 : case TYPE_lng:
3096 156873 : CAND_LOOP(&ci)
3097 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3098 : break;
3099 : #ifdef HAVE_HGE
3100 0 : case TYPE_hge:
3101 0 : CAND_LOOP(&ci)
3102 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3103 : break;
3104 : #endif
3105 0 : case TYPE_flt:
3106 0 : CAND_LOOP(&ci)
3107 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3108 : break;
3109 0 : case TYPE_dbl:
3110 0 : CAND_LOOP(&ci)
3111 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3112 : break;
3113 0 : case TYPE_uuid:
3114 0 : CAND_LOOP(&ci)
3115 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3116 : break;
3117 25 : case TYPE_str:
3118 25 : base = bi.vh->base;
3119 25 : switch (bi.width) {
3120 23 : case 1:
3121 4805 : CAND_LOOP(&ci)
3122 4782 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3123 : break;
3124 1 : case 2:
3125 144 : CAND_LOOP(&ci)
3126 143 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3127 : break;
3128 1 : case 4:
3129 168 : CAND_LOOP(&ci)
3130 167 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3131 : break;
3132 : #if SIZEOF_VAR_T == 8
3133 0 : case 8:
3134 0 : CAND_LOOP(&ci)
3135 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3136 : break;
3137 : #endif
3138 : default:
3139 0 : MT_UNREACHABLE();
3140 : }
3141 : break;
3142 0 : default:
3143 0 : nil = ATOMnilptr(t);
3144 0 : cmp = ATOMcompare(t);
3145 0 : if (nil == NULL) {
3146 0 : cnt = ci.ncand;
3147 0 : } else if (b->tvheap) {
3148 0 : base = b->tvheap->base;
3149 0 : CAND_LOOP(&ci)
3150 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3151 : } else {
3152 0 : CAND_LOOP(&ci)
3153 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3154 : }
3155 : break;
3156 : }
3157 204 : if (cnt == bi.count) {
3158 24 : MT_lock_set(&b->theaplock);
3159 24 : if (cnt == BATcount(b) && bi.h == b->theap) {
3160 : /* we learned something */
3161 24 : b->tnonil = true;
3162 24 : assert(!b->tnil);
3163 24 : b->tnil = false;
3164 : }
3165 24 : bat pbid = VIEWtparent(b);
3166 24 : MT_lock_unset(&b->theaplock);
3167 24 : if (pbid) {
3168 15 : BAT *pb = BATdescriptor(pbid);
3169 15 : if (pb) {
3170 15 : MT_lock_set(&pb->theaplock);
3171 15 : if (cnt == BATcount(pb) &&
3172 0 : bi.h == pb->theap &&
3173 0 : !pb->tnonil) {
3174 0 : pb->tnonil = true;
3175 0 : assert(!pb->tnil);
3176 0 : pb->tnil = false;
3177 : }
3178 15 : MT_lock_unset(&pb->theaplock);
3179 15 : BBPunfix(pb->batCacheid);
3180 : }
3181 : }
3182 : }
3183 204 : bat_iterator_end(&bi);
3184 204 : return cnt;
3185 : }
|