Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 54282365 : unshare_varsized_heap(BAT *b)
27 : {
28 54282365 : if (ATOMvarsized(b->ttype) &&
29 22099300 : b->tvheap->parentid != b->batCacheid) {
30 1021 : Heap *h = GDKmalloc(sizeof(Heap));
31 1025 : if (h == NULL)
32 : return GDK_FAIL;
33 1025 : MT_thread_setalgorithm("unshare vheap");
34 2044 : *h = (Heap) {
35 1019 : .parentid = b->batCacheid,
36 1025 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 1019 : strconcat_len(h->filename, sizeof(h->filename),
40 1019 : BBP_physical(b->batCacheid), ".theap", NULL);
41 1025 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 1020 : MT_lock_set(&b->theaplock);
47 1023 : Heap *oh = b->tvheap;
48 1023 : b->tvheap = h;
49 1023 : MT_lock_unset(&b->theaplock);
50 1024 : BBPrelease(oh->parentid);
51 1023 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 85631 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 85631 : size_t toff = ~(size_t) 0; /* tail offset */
66 85631 : BUN p, r; /* loop variables */
67 85631 : const void *tp = NULL; /* tail value pointer */
68 85631 : var_t v;
69 85631 : size_t off; /* offset within n's string heap */
70 85631 : BUN cnt = ci->ncand;
71 85631 : BUN oldcnt = BATcount(b);
72 :
73 85631 : assert(b->ttype == TYPE_str);
74 85631 : assert(b->tbaseoff == 0);
75 85631 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 85631 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 85631 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 85640 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 20401 : toff = 0;
85 20401 : MT_thread_setalgorithm("shared vheap");
86 65239 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 51068 : MT_lock_set(&b->theaplock);
90 51204 : bat bid = b->tvheap->parentid;
91 51204 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 51277 : HEAPincref(ni->vh);
93 51284 : b->tvheap = ni->vh;
94 51284 : b->tascii = ni->ascii;
95 51284 : MT_lock_unset(&b->theaplock);
96 51304 : BBPretain(ni->vh->parentid);
97 51297 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 51297 : toff = 0;
100 51297 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 15196 : if (b->tvheap->parentid != b->batCacheid &&
106 1022 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 14174 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 91 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5805320 : for (int i = 0; i < 1024; i++) {
120 5799591 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5799675 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5799675 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5729 : len = (len + 512) / 1024; /* rounded average length */
125 5729 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5729 : BUN ecnt = ci->ncand;
129 5729 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 55 : ecnt = (BUN)ni->b->tunique_est;
131 5729 : if (r < ecnt)
132 920 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4809 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5729 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5719 : if (oldcnt == 0) {
140 5684 : toff = 0;
141 5684 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 35 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 35 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5720 : MT_lock_set(&b->theaplock);
148 5719 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5715 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5715 : b->tvheap->free = toff + ni->vhfree;
154 5715 : b->tvheap->dirty = true;
155 5715 : b->tascii &= ni->ascii;
156 5715 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 77415 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 77319 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 85858 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 85799 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 77080 : MT_thread_setalgorithm("memcpy offsets");
177 77236 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 8719 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 182 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 182 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 182 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 182 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 182 : MT_thread_setalgorithm("copy offset values");
197 182 : r = b->batCount;
198 3526810 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3526250 : p = canditer_next(ci) - ni->b->hseqbase;
200 3526250 : switch (ni->width) {
201 106 : case 1:
202 106 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 106 : break;
204 4638 : case 2:
205 4638 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 4638 : break;
207 3521506 : case 4:
208 3521506 : v = (var_t) tip[p];
209 3521506 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3526250 : v = (var_t) ((size_t) v + toff);
219 3526250 : assert(v >= GDK_VAROFFSET);
220 3526250 : assert((size_t) v < b->tvheap->free);
221 3526250 : switch (b->twidth) {
222 4640 : case 1:
223 4640 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 4640 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 4640 : break;
226 761 : case 2:
227 761 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 761 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 761 : break;
230 3520849 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3520849 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3520849 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3520849 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3526250 : MT_UNREACHABLE();
243 : }
244 : }
245 8537 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 8482 : r = b->batCount;
253 8482 : oid hseq = ni->b->hseqbase;
254 8482 : MT_thread_setalgorithm("insert string values");
255 11460656 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 11443160 : p = canditer_next(ci) - hseq;
257 11295391 : tp = BUNtvar(*ni, p);
258 11295391 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 11443221 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 55 : r = b->batCount;
271 55 : MT_thread_setalgorithm("insert string values with check");
272 6808 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 6698 : p = canditer_next(ci) - ni->b->hseqbase;
274 6698 : off = BUNtvaroff(*ni, p); /* the offset */
275 6698 : tp = ni->vh->base + off; /* the string */
276 6698 : if (off < b->tvheap->free &&
277 6698 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 2831 : v = (var_t) off;
284 2831 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 2827 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 2827 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 2827 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 2827 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3867 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 6698 : r++;
313 : }
314 : }
315 86018 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 85630 : MT_rwlock_wrlock(&b->thashlock);
317 86030 : MT_lock_set(&b->theaplock);
318 85889 : BATsetcount(b, oldcnt + ci->ncand);
319 85906 : assert(b->batCapacity >= b->batCount);
320 85906 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 89013 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 3029 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 85984 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 85984 : MT_rwlock_wrunlock(&b->thashlock);
327 86007 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 514 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 514 : BUN cnt = ci->ncand, r;
339 514 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 514 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 514 : assert(b->twidth == ni->width);
345 514 : assert(b->twidth == SIZEOF_VAR_T);
346 514 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 514 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 514 : if (mayshare &&
360 515 : BATcount(b) == 0 &&
361 233 : b->batRole == TRANSIENT &&
362 148 : ni->restricted == BAT_READ &&
363 148 : b->tvheap != ni->vh) {
364 : /* if b is still empty, in the transient farm, and n
365 : * is read-only, we replace b's vheap with a reference
366 : * to n's */
367 148 : MT_lock_set(&b->theaplock);
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : MT_lock_unset(&b->theaplock);
373 148 : BBPretain(ni->vh->parentid);
374 148 : if (bid != b->batCacheid)
375 0 : BBPrelease(bid);
376 : }
377 514 : if (b->tvheap == ni->vh) {
378 : /* if b and n use the same vheap, we only need to copy
379 : * the offsets from n to b */
380 341 : if (ci->tpe == cand_dense) {
381 : /* fast memcpy since we copy a consecutive
382 : * chunk of memory */
383 341 : memcpy(Tloc(b, BATcount(b)),
384 341 : (const var_t *) ni->base + (ci->seq - hseq),
385 341 : cnt << b->tshift);
386 : } else {
387 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
388 0 : const var_t *restrict src = (const var_t *) ni->base;
389 0 : while (cnt > 0) {
390 0 : cnt--;
391 0 : *dst++ = src[canditer_next(ci) - hseq];
392 : }
393 : }
394 341 : MT_rwlock_wrlock(&b->thashlock);
395 341 : MT_lock_set(&b->theaplock);
396 341 : BATsetcount(b, BATcount(b) + ci->ncand);
397 341 : MT_lock_unset(&b->theaplock);
398 : /* maintain hash table */
399 341 : for (BUN i = BATcount(b) - ci->ncand;
400 341 : b->thash && i < BATcount(b);
401 0 : i++) {
402 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
403 : }
404 341 : BUN nunique = b->thash ? b->thash->nunique : 0;
405 341 : MT_rwlock_wrunlock(&b->thashlock);
406 341 : if (nunique != 0) {
407 0 : MT_lock_set(&b->theaplock);
408 0 : b->tunique_est = (double) nunique;
409 0 : MT_lock_unset(&b->theaplock);
410 : }
411 341 : return GDK_SUCCEED;
412 : }
413 : /* b and n do not share their vheap, so we need to copy data */
414 173 : if (b->tvheap->parentid != b->batCacheid) {
415 : /* if b shares its vheap with some other bat, unshare it */
416 18 : Heap *h = GDKmalloc(sizeof(Heap));
417 18 : if (h == NULL) {
418 : return GDK_FAIL;
419 : }
420 36 : *h = (Heap) {
421 18 : .parentid = b->batCacheid,
422 18 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
423 : .refs = ATOMIC_VAR_INIT(1),
424 : };
425 18 : strconcat_len(h->filename, sizeof(h->filename),
426 18 : BBP_physical(b->batCacheid), ".theap", NULL);
427 18 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
428 0 : HEAPfree(h, true);
429 0 : GDKfree(h);
430 0 : return GDK_FAIL;
431 : }
432 18 : MT_lock_set(&b->theaplock);
433 18 : Heap *oh = b->tvheap;
434 18 : b->tvheap = h;
435 18 : MT_lock_unset(&b->theaplock);
436 18 : if (oh->parentid != b->batCacheid)
437 18 : BBPrelease(oh->parentid);
438 18 : HEAPdecref(oh, false);
439 : }
440 173 : if (BATcount(b) == 0 &&
441 85 : ci->tpe == cand_dense && ci->ncand == ni->count) {
442 : /* just copy the heaps */
443 84 : MT_lock_set(&b->theaplock);
444 84 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
445 0 : MT_lock_unset(&b->theaplock);
446 0 : return GDK_FAIL;
447 : }
448 84 : memcpy(b->theap->base, ni->base, ni->hfree);
449 84 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
450 84 : b->theap->free = ni->hfree;
451 84 : b->theap->dirty = true;
452 84 : b->tvheap->free = ni->vhfree;
453 84 : b->tvheap->dirty = true;
454 84 : BATsetcount(b, ni->count);
455 84 : b->tnil = ni->nil;
456 84 : b->tnonil = ni->nonil;
457 84 : b->tsorted = ni->sorted;
458 84 : b->tnosorted = ni->nosorted;
459 84 : b->trevsorted = ni->revsorted;
460 84 : b->tnorevsorted = ni->norevsorted;
461 84 : b->tkey = ni->key;
462 84 : b->tnokey[0] = ni->nokey[0];
463 84 : b->tnokey[1] = ni->nokey[1];
464 84 : b->tminpos = ni->minpos;
465 84 : b->tmaxpos = ni->maxpos;
466 84 : b->tunique_est = ni->unique_est;
467 84 : MT_lock_unset(&b->theaplock);
468 84 : return GDK_SUCCEED;
469 : }
470 : /* copy data from n to b */
471 : r = BATcount(b);
472 380593 : for (BUN i = 0; i < cnt; i++) {
473 380503 : BUN p = canditer_next(ci) - hseq;
474 380503 : const void *t = BUNtvar(*ni, p);
475 380503 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
476 : return GDK_FAIL;
477 : }
478 380504 : r++;
479 : }
480 90 : MT_rwlock_wrlock(&b->thashlock);
481 90 : if (b->thash) {
482 0 : r -= cnt;
483 0 : BATiter bi = bat_iterator_nolock(b);
484 0 : for (BUN i = 0; i < cnt; i++) {
485 0 : const void *t = BUNtvar(bi, r);
486 0 : HASHappend_locked(b, r, t);
487 0 : r++;
488 : }
489 : }
490 90 : BUN nunique = b->thash ? b->thash->nunique : 0;
491 90 : MT_lock_set(&b->theaplock);
492 89 : BATsetcount(b, r);
493 90 : if (nunique != 0)
494 0 : b->tunique_est = (double) nunique;
495 90 : MT_lock_unset(&b->theaplock);
496 90 : MT_rwlock_wrunlock(&b->thashlock);
497 90 : return GDK_SUCCEED;
498 : }
499 :
500 : static gdk_return
501 138 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
502 : {
503 138 : if (ci->ncand == 0)
504 : return GDK_SUCCEED;
505 138 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
506 : return GDK_FAIL;
507 :
508 138 : MT_lock_set(&b->theaplock);
509 :
510 138 : uint32_t boff = b->batCount % 32;
511 138 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
512 138 : b->batCount += ci->ncand;
513 138 : b->theap->dirty = true;
514 138 : b->theap->free = ((b->batCount + 31) / 32) * 4;
515 138 : if (ci->tpe == cand_dense) {
516 138 : const uint32_t *np;
517 138 : uint32_t noff, mask;
518 138 : BUN cnt;
519 138 : noff = (ci->seq - ni->b->hseqbase) % 32;
520 138 : cnt = ci->ncand;
521 138 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
522 138 : if (boff == noff) {
523 : /* words of b and n are aligned, so we don't
524 : * need to shift bits around */
525 56 : if (boff + cnt <= 32) {
526 : /* all new bits within one word */
527 50 : if (cnt == 32) {
528 0 : *bp = *np;
529 : } else {
530 50 : mask = ((1U << cnt) - 1) << boff;
531 50 : *bp &= ~mask;
532 50 : *bp |= *np & mask;
533 : }
534 : } else {
535 : /* multiple words of b are affected */
536 6 : if (boff != 0) {
537 : /* first fill up the rest of the first
538 : * word */
539 0 : mask = ~0U << boff;
540 0 : *bp &= ~mask;
541 0 : *bp++ |= *np++ & mask;
542 0 : cnt -= 32 - boff;
543 : }
544 6 : if (cnt >= 32) {
545 : /* copy an integral number of words fast */
546 6 : BUN nw = cnt / 32;
547 6 : memcpy(bp, np, nw*sizeof(int));
548 6 : bp += nw;
549 6 : np += nw;
550 6 : cnt %= 32;
551 : }
552 6 : if (cnt > 0) {
553 : /* do the left over bits */
554 6 : mask = (1U << cnt) - 1;
555 6 : *bp = *np & mask;
556 : }
557 : }
558 82 : } else if (boff > noff) {
559 82 : if (boff + cnt <= 32) {
560 : /* we only need to copy bits from a
561 : * single word of n to a single word
562 : * of b */
563 : /* boff > 0, so cnt < 32, hence the
564 : * shift is ok */
565 74 : mask = (1U << cnt) - 1;
566 74 : *bp &= ~(mask << boff);
567 74 : *bp |= (*np & (mask << noff)) << (boff - noff);
568 : } else {
569 : /* first fill the rest of the last partial
570 : * word of b, so that's 32-boff bits */
571 8 : mask = (1U << (32 - boff)) - 1;
572 8 : *bp &= ~(mask << boff);
573 8 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
574 8 : cnt -= 32 - boff;
575 :
576 : /* set boff and noff to the amount we need to
577 : * shift bits in consecutive words of n around
578 : * to fit into the next word of b; set mask to
579 : * the mask of the bottom bits of n that fit
580 : * in a word of b (and the complement are the
581 : * top bits that go to another word of b) */
582 8 : boff -= noff;
583 8 : noff = 32 - boff;
584 8 : mask = (1U << noff) - 1;
585 140 : while (cnt >= 32) {
586 132 : *bp = (*np++ & ~mask) >> noff;
587 132 : *bp++ |= (*np & mask) << boff;
588 132 : cnt -= 32;
589 : }
590 8 : if (cnt > noff) {
591 : /* the last bits come from two words
592 : * in n */
593 4 : *bp = (*np++ & ~mask) >> noff;
594 4 : cnt -= noff;
595 4 : mask = (1U << cnt) - 1;
596 4 : *bp++ |= (*np & mask) << boff;
597 4 : } else if (cnt > 0) {
598 : /* the last bits come from a single
599 : * word in n */
600 4 : mask = ((1U << cnt) - 1) << noff;
601 4 : *bp = (*np & mask) >> noff;
602 : }
603 : }
604 : } else {
605 : /* boff < noff */
606 0 : if (noff + cnt <= 32) {
607 : /* only need part of the first word of n */
608 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
609 0 : mask = (1U << cnt) - 1;
610 0 : *bp &= ~(mask << boff);
611 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
612 0 : } else if (boff + cnt <= 32) {
613 : /* only need to fill a single word of
614 : * b, but from two of n */
615 0 : if (cnt < 32)
616 0 : *bp &= ~(((1U << cnt) - 1) << boff);
617 : else
618 0 : *bp = 0;
619 0 : mask = ~((1U << noff) - 1);
620 0 : *bp |= (*np++ & mask) >> (noff - boff);
621 0 : cnt -= 32 - noff;
622 0 : mask = (1U << cnt) - 1;
623 0 : *bp |= (*np & mask) << (32 - noff);
624 : } else {
625 0 : if (boff > 0) {
626 : /* fill the rest of the first word of b */
627 0 : cnt -= 32 - boff;
628 0 : *bp &= (1U << boff) - 1;
629 0 : mask = ~((1U << noff) - 1);
630 0 : noff -= boff;
631 0 : boff = 32 - noff;
632 0 : *bp |= (*np++ & mask) >> noff;
633 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
634 : } else {
635 0 : boff = 32 - noff;
636 : }
637 0 : mask = (1U << noff) - 1;
638 0 : while (cnt >= 32) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : *bp++ |= (*np & mask) << boff;
641 0 : cnt -= 32;
642 : }
643 0 : if (cnt > 0) {
644 0 : *bp = (*np++ & ~mask) >> noff;
645 0 : if (cnt > noff)
646 0 : *bp++ |= (*np & mask) << boff;
647 : }
648 : }
649 : }
650 : } else {
651 0 : oid o;
652 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
653 0 : do {
654 0 : for (uint32_t i = boff; i < 32; i++) {
655 0 : o = canditer_next(ci);
656 0 : if (is_oid_nil(o))
657 : break;
658 0 : o -= ni->b->hseqbase;
659 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
660 : }
661 0 : *bp++ = v;
662 0 : v = 0;
663 0 : boff = 0;
664 0 : } while (!is_oid_nil(o));
665 : }
666 138 : MT_lock_unset(&b->theaplock);
667 138 : return GDK_SUCCEED;
668 : }
669 :
670 : /* Append the contents of BAT n (subject to the optional candidate
671 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
672 : * was passed in, and else the seqbase of n. */
673 : static gdk_return
674 2251111 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
675 : {
676 2251111 : struct canditer ci;
677 2251111 : BUN r;
678 2251111 : oid hseq = n->hseqbase;
679 2251111 : char buf[64];
680 2251111 : lng t0 = 0;
681 2251111 : const ValRecord *prop = NULL;
682 2251111 : ValRecord minprop, maxprop;
683 2251111 : const void *minbound = NULL, *maxbound = NULL;
684 2251111 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
685 2251111 : bool hlocked = false;
686 :
687 2251111 : if (b == NULL || n == NULL || BATcount(n) == 0) {
688 : return GDK_SUCCEED;
689 : }
690 1305881 : assert(b->theap->parentid == b->batCacheid);
691 :
692 1305881 : TRC_DEBUG_IF(ALGO) {
693 0 : t0 = GDKusec();
694 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
695 : }
696 :
697 1305881 : ALIGNapp(b, force, GDK_FAIL);
698 :
699 3667746 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
700 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
701 0 : return GDK_FAIL;
702 : }
703 :
704 1308615 : if (BATttype(b) != BATttype(n) &&
705 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
706 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
707 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
708 : }
709 :
710 1308615 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
711 :
712 1308308 : BATiter ni = bat_iterator(n);
713 :
714 1309260 : canditer_init(&ci, n, s);
715 1308094 : if (ci.ncand == 0) {
716 0 : goto doreturn;
717 : }
718 :
719 1308094 : if (BATcount(b) + ci.ncand > BUN_MAX) {
720 0 : bat_iterator_end(&ni);
721 0 : GDKerror("combined BATs too large\n");
722 0 : return GDK_FAIL;
723 : }
724 :
725 1308094 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
726 0 : bat_iterator_end(&ni);
727 0 : GDKerror("overflow of head value\n");
728 0 : return GDK_FAIL;
729 : }
730 :
731 1308094 : OIDXdestroy(b);
732 1309759 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
733 1309846 : RTREEdestroy(b);
734 :
735 1308707 : MT_lock_set(&b->theaplock);
736 1308786 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
737 1308065 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
738 48 : VALcopy(&minprop, prop) != NULL) {
739 48 : minbound = VALptr(&minprop);
740 48 : if (ci.ncand == BATcount(n) &&
741 62 : ni.minpos != BUN_NONE &&
742 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
743 0 : assert(0);
744 : GDKerror("value out of bounds\n");
745 : MT_lock_unset(&b->theaplock);
746 : goto bailout;
747 : }
748 : }
749 1307998 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
750 40 : VALcopy(&maxprop, prop) != NULL) {
751 40 : maxbound = VALptr(&maxprop);
752 40 : if (ci.ncand == BATcount(n) &&
753 52 : ni.maxpos != BUN_NONE &&
754 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
755 0 : assert(0);
756 : GDKerror("value out of bounds\n");
757 : MT_lock_unset(&b->theaplock);
758 : goto bailout;
759 : }
760 : }
761 :
762 1306825 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
763 398598 : if (ni.maxpos != BUN_NONE) {
764 153428 : BATiter bi = bat_iterator_nolock(b);
765 153428 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
766 147516 : if (s == NULL) {
767 147378 : b->tmaxpos = BATcount(b) + ni.maxpos;
768 : } else {
769 138 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 : } else {
773 245170 : b->tmaxpos = BUN_NONE;
774 : }
775 : }
776 1306834 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
777 397903 : if (ni.minpos != BUN_NONE) {
778 152982 : BATiter bi = bat_iterator_nolock(b);
779 152982 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
780 145965 : if (s == NULL) {
781 145829 : b->tminpos = BATcount(b) + ni.minpos;
782 : } else {
783 136 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 : } else {
787 244921 : b->tminpos = BUN_NONE;
788 : }
789 : }
790 1306834 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
791 1305290 : b->tunique_est = 0;
792 : }
793 1306834 : MT_lock_unset(&b->theaplock);
794 : /* load hash so that we can maintain it */
795 1310323 : (void) BATcheckhash(b);
796 :
797 1309715 : if (b->ttype == TYPE_void) {
798 : /* b does not have storage, keep it that way if we can */
799 129054 : HASHdestroy(b); /* we're not maintaining the hash here */
800 129027 : MT_lock_set(&b->theaplock);
801 129039 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
802 125188 : (BATcount(b) == 0 ||
803 108479 : (BATtdense(b) &&
804 108479 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
805 : /* n is also dense and consecutive with b */
806 125128 : if (BATcount(b) == 0) {
807 16708 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
808 0 : assert(0);
809 : GDKerror("value not within bounds\n");
810 : MT_lock_unset(&b->theaplock);
811 : goto bailout;
812 : }
813 16708 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
814 : }
815 125105 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
816 0 : assert(0);
817 : GDKerror("value not within bounds\n");
818 : MT_lock_unset(&b->theaplock);
819 : goto bailout;
820 : }
821 125105 : BATsetcount(b, BATcount(b) + ci.ncand);
822 125096 : MT_lock_unset(&b->theaplock);
823 125129 : goto doreturn;
824 : }
825 3911 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
826 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
827 : /* both b and n are void/nil */
828 0 : if (notnull) {
829 0 : assert(0);
830 : GDKerror("NULL value not within bounds\n");
831 : MT_lock_unset(&b->theaplock);
832 : goto bailout;
833 : }
834 0 : BATtseqbase(b, oid_nil);
835 0 : BATsetcount(b, BATcount(b) + ci.ncand);
836 0 : MT_lock_unset(&b->theaplock);
837 0 : goto doreturn;
838 : }
839 : /* we need to materialize b; allocate enough capacity */
840 3911 : MT_lock_unset(&b->theaplock);
841 3911 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
842 0 : goto bailout;
843 : }
844 : }
845 :
846 : /* property setting */
847 1184571 : MT_lock_set(&b->theaplock);
848 1183296 : r = BATcount(b);
849 :
850 1183296 : if (BATcount(b) == 0) {
851 373186 : b->tsorted = ni.sorted;
852 373186 : b->trevsorted = ni.revsorted;
853 373186 : b->tseqbase = oid_nil;
854 373186 : b->tnonil = ni.nonil;
855 373186 : b->tnil = ni.nil && ci.ncand == BATcount(n);
856 373186 : if (ci.tpe == cand_dense) {
857 373034 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
858 373034 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
859 373034 : if (BATtdensebi(&ni)) {
860 2121 : b->tseqbase = n->tseqbase + ci.seq - hseq;
861 : }
862 : } else {
863 152 : b->tnosorted = 0;
864 152 : b->tnorevsorted = 0;
865 : }
866 373186 : b->tkey = ni.key;
867 373186 : if (ci.ncand == BATcount(n)) {
868 372140 : b->tnokey[0] = ni.nokey[0];
869 372140 : b->tnokey[1] = ni.nokey[1];
870 : } else {
871 1046 : b->tnokey[0] = b->tnokey[1] = 0;
872 : }
873 : } else {
874 810110 : BUN last = r - 1;
875 810110 : BATiter bi = bat_iterator_nolock(b);
876 810110 : int xx = ATOMcmp(b->ttype,
877 : BUNtail(ni, ci.seq - hseq),
878 : BUNtail(bi, last));
879 809437 : if (b->tsorted && (!ni.sorted || xx < 0)) {
880 22067 : b->tsorted = false;
881 22067 : b->tnosorted = 0;
882 22067 : b->tseqbase = oid_nil;
883 : }
884 809437 : if (b->trevsorted &&
885 68248 : (!ni.revsorted || xx > 0)) {
886 18685 : b->trevsorted = false;
887 18685 : b->tnorevsorted = 0;
888 : }
889 809437 : if (b->tkey &&
890 76356 : (!(b->tsorted || b->trevsorted) ||
891 61565 : !ni.key || xx == 0)) {
892 20763 : BATkey(b, false);
893 : }
894 809378 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
895 4080 : (!BATtdensebi(&ni) ||
896 552 : ci.tpe != cand_dense ||
897 552 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
898 3547 : b->tseqbase = oid_nil;
899 : }
900 809378 : b->tnonil &= ni.nonil;
901 1606448 : b->tnil |= ni.nil && ci.ncand == ni.count;
902 : }
903 1182564 : MT_lock_unset(&b->theaplock);
904 1184420 : if (b->ttype == TYPE_str) {
905 85848 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 1098572 : } else if (ATOMvarsized(b->ttype)) {
909 515 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
910 0 : goto bailout;
911 : }
912 1098057 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
913 : /* no bounds and NOT_NULL property on MSK bats */
914 138 : assert(minbound == NULL && maxbound == NULL && !notnull);
915 138 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
916 0 : goto bailout;
917 : }
918 : } else {
919 1097919 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
920 : /* if needed space exceeds a normal growth
921 : * extend just with what's needed */
922 10633 : BUN ncap = BATcount(b) + ci.ncand;
923 10633 : BUN grows = BATgrows(b);
924 :
925 10613 : if (ncap > grows)
926 : grows = ncap;
927 10613 : if (BATextend(b, grows) != GDK_SUCCEED) {
928 0 : goto bailout;
929 : }
930 : }
931 1098027 : MT_rwlock_wrlock(&b->thashlock);
932 1098600 : hlocked = true;
933 1098600 : if (b->ttype != TYPE_void &&
934 1098359 : ni.type != TYPE_void &&
935 1093059 : ci.tpe == cand_dense) {
936 : /* use fast memcpy if we can */
937 1092984 : memcpy(Tloc(b, BATcount(b)),
938 1092984 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
939 1092984 : ci.ncand << ni.shift);
940 1092997 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
941 3608 : HASHappend_locked(b, r, Tloc(b, r));
942 13 : r++;
943 : }
944 : } else {
945 5616 : const void *atomnil = ATOMnilptr(b->ttype);
946 3902176 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
947 3890923 : BUN p = canditer_next(&ci) - hseq;
948 3887348 : const void *t = BUNtail(ni, p);
949 3890768 : bool isnil = atomcmp(t, atomnil) == 0;
950 3886383 : if (notnull && isnil) {
951 0 : assert(0);
952 : GDKerror("NULL value not within bounds\n");
953 : goto bailout;
954 3886383 : } else if (minbound &&
955 3886383 : !isnil &&
956 0 : atomcmp(t, minbound) < 0) {
957 0 : assert(0);
958 : GDKerror("value not within bounds\n");
959 : goto bailout;
960 3886383 : } else if (maxbound &&
961 0 : !isnil &&
962 0 : atomcmp(t, maxbound) >= 0) {
963 0 : assert(0);
964 : GDKerror("value not within bounds\n");
965 : goto bailout;
966 3886383 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
967 0 : goto bailout;
968 : }
969 3890780 : if (b->thash)
970 0 : HASHappend_locked(b, r, t);
971 3890924 : r++;
972 : }
973 5616 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
974 : }
975 1095005 : BUN nunique;
976 1095005 : nunique = b->thash ? b->thash->nunique : 0;
977 1095005 : MT_lock_set(&b->theaplock);
978 1097075 : BATsetcount(b, b->batCount + ci.ncand);
979 1096955 : if (nunique != 0)
980 5 : b->tunique_est = (double) nunique;
981 1096955 : MT_lock_unset(&b->theaplock);
982 1098670 : assert(hlocked);
983 1098670 : MT_rwlock_wrunlock(&b->thashlock);
984 1098670 : hlocked = false;
985 : }
986 :
987 1310033 : doreturn:
988 1310033 : bat_iterator_end(&ni);
989 1306407 : if (minbound)
990 48 : VALclear(&minprop);
991 1307471 : if (maxbound)
992 40 : VALclear(&maxprop);
993 1307471 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
994 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
995 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
996 : GDKusec() - t0);
997 :
998 : return GDK_SUCCEED;
999 : bailout:
1000 0 : if (hlocked)
1001 0 : MT_rwlock_wrunlock(&b->thashlock);
1002 0 : if (minbound)
1003 0 : VALclear(&minprop);
1004 0 : if (maxbound)
1005 0 : VALclear(&maxprop);
1006 0 : bat_iterator_end(&ni);
1007 0 : return GDK_FAIL;
1008 : }
1009 :
1010 : gdk_return
1011 2258453 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1012 : {
1013 2258453 : return BATappend2(b, n, s, force, true);
1014 : }
1015 :
1016 : gdk_return
1017 4 : BATdel(BAT *b, BAT *d)
1018 : {
1019 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1020 4 : MT_lock_set(&b->theaplock);
1021 4 : BATiter bi = bat_iterator_nolock(b);
1022 4 : MT_lock_unset(&b->theaplock);
1023 :
1024 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1025 4 : assert(d->tsorted);
1026 4 : assert(d->tkey);
1027 4 : if (BATcount(d) == 0)
1028 : return GDK_SUCCEED;
1029 4 : OIDXdestroy(b);
1030 4 : HASHdestroy(b);
1031 4 : PROPdestroy(b);
1032 4 : STRMPdestroy(b);
1033 4 : RTREEdestroy(b);
1034 4 : if (BATtdense(d)) {
1035 2 : oid o = d->tseqbase;
1036 2 : BUN c = BATcount(d);
1037 :
1038 2 : if (o + c <= b->hseqbase)
1039 : return GDK_SUCCEED;
1040 2 : if (o < b->hseqbase) {
1041 0 : c -= b->hseqbase - o;
1042 0 : o = b->hseqbase;
1043 : }
1044 2 : if (o - b->hseqbase < b->batInserted) {
1045 0 : GDKerror("cannot delete committed values\n");
1046 0 : return GDK_FAIL;
1047 : }
1048 2 : if (o + c > b->hseqbase + BATcount(b))
1049 0 : c = b->hseqbase + BATcount(b) - o;
1050 2 : if (c == 0)
1051 : return GDK_SUCCEED;
1052 2 : if (atmdel) {
1053 0 : BUN p = o - b->hseqbase;
1054 0 : BUN q = p + c;
1055 0 : while (p < q) {
1056 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1057 0 : p++;
1058 : }
1059 : }
1060 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1061 : return GDK_FAIL;
1062 2 : MT_lock_set(&b->theaplock);
1063 2 : if (o + c < b->hseqbase + BATcount(b)) {
1064 0 : o -= b->hseqbase;
1065 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1066 0 : BUN n = BATcount(b) - (o + c);
1067 : /* not very efficient, but first see
1068 : * how much this is used */
1069 0 : for (BUN i = 0; i < n; i++)
1070 0 : mskSetVal(b, o + i,
1071 0 : mskGetVal(b, o + c + i));
1072 : } else {
1073 0 : memmove(Tloc(b, o),
1074 0 : Tloc(b, o + c),
1075 0 : b->twidth * (BATcount(b) - (o + c)));
1076 : }
1077 0 : b->theap->dirty = true;
1078 : // o += b->hseqbase; // if this were to be used again
1079 : }
1080 2 : b->batCount -= c;
1081 : } else {
1082 2 : BATiter di = bat_iterator(d);
1083 2 : const oid *o = (const oid *) di.base;
1084 2 : const oid *s;
1085 2 : BUN c = di.count;
1086 2 : BUN nd = 0;
1087 2 : BUN pos;
1088 2 : char *p = NULL;
1089 :
1090 2 : if (o[c - 1] <= b->hseqbase) {
1091 0 : bat_iterator_end(&di);
1092 0 : return GDK_SUCCEED;
1093 : }
1094 2 : while (*o < b->hseqbase) {
1095 0 : o++;
1096 0 : c--;
1097 : }
1098 2 : if (*o - b->hseqbase < b->batInserted) {
1099 0 : bat_iterator_end(&di);
1100 0 : GDKerror("cannot delete committed values\n");
1101 0 : return GDK_FAIL;
1102 : }
1103 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1104 0 : bat_iterator_end(&di);
1105 0 : return GDK_FAIL;
1106 : }
1107 2 : s = o;
1108 2 : pos = *o - b->hseqbase;
1109 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1110 2 : p = Tloc(b, pos);
1111 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1112 4 : size_t n;
1113 4 : if (atmdel)
1114 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1115 4 : o++;
1116 4 : c--;
1117 4 : nd++;
1118 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1119 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1120 2 : else if ((oid) (o - s) < *o - *s)
1121 2 : n = o[0] - o[-1] - 1;
1122 : else
1123 : n = 0;
1124 4 : if (n > 0) {
1125 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1126 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1127 : /* not very efficient, but
1128 : * first see how much this is
1129 : * used */
1130 0 : for (BUN i = 0; i < n; i++) {
1131 0 : mskSetVal(b, pos + i,
1132 0 : mskGetVal(b, opos + i));
1133 : }
1134 0 : pos += n;
1135 : } else {
1136 2 : n *= b->twidth;
1137 2 : memmove(p,
1138 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1139 : n);
1140 2 : p += n;
1141 : }
1142 : s = o;
1143 : }
1144 : }
1145 2 : bat_iterator_end(&di);
1146 2 : MT_lock_set(&b->theaplock);
1147 2 : b->theap->dirty = true;
1148 2 : b->batCount -= nd;
1149 : }
1150 4 : if (b->batCount <= 1) {
1151 : /* some trivial properties */
1152 2 : b->tkey = true;
1153 2 : b->tsorted = b->trevsorted = true;
1154 2 : if (b->batCount == 0) {
1155 2 : b->tnil = false;
1156 2 : b->tnonil = true;
1157 : }
1158 : }
1159 : /* not sure about these anymore */
1160 4 : b->tnosorted = b->tnorevsorted = 0;
1161 4 : b->tnokey[0] = b->tnokey[1] = 0;
1162 4 : b->tminpos = BUN_NONE;
1163 4 : b->tmaxpos = BUN_NONE;
1164 4 : b->tunique_est = 0.0;
1165 4 : MT_lock_unset(&b->theaplock);
1166 :
1167 4 : return GDK_SUCCEED;
1168 : }
1169 :
1170 : /*
1171 : * Replace all values in b with values from n whose location is given by
1172 : * the oid in either p or positions.
1173 : * If positions is used, autoincr specifies whether it is the first of a
1174 : * dense range of positions or whether it is a full-blown array of
1175 : * position.
1176 : * If mayappend is set, the position in p/positions may refer to
1177 : * locations beyond the end of b.
1178 : */
1179 : static gdk_return
1180 215744 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1181 : bool mayappend, bool autoincr, bool force)
1182 : {
1183 215744 : lng t0 = GDKusec();
1184 215749 : oid pos = oid_nil;
1185 215749 : BUN nunique = 0;
1186 :
1187 215749 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1188 : return GDK_SUCCEED;
1189 : }
1190 : /* either p or positions */
1191 215749 : assert((p == NULL) != (positions == NULL));
1192 215749 : if (p != NULL) {
1193 215573 : if (BATcount(p) != BATcount(n)) {
1194 0 : GDKerror("update BATs not the same size\n");
1195 0 : return GDK_FAIL;
1196 : }
1197 215573 : if (ATOMtype(p->ttype) != TYPE_oid) {
1198 0 : GDKerror("positions BAT not type OID\n");
1199 0 : return GDK_FAIL;
1200 : }
1201 215573 : if (BATtdense(p)) {
1202 206692 : pos = p->tseqbase;
1203 206692 : positions = &pos;
1204 206692 : autoincr = true;
1205 206692 : p = NULL;
1206 8881 : } else if (p->ttype != TYPE_void) {
1207 8879 : positions = (const oid *) Tloc(p, 0);
1208 8879 : autoincr = false;
1209 : } else {
1210 : autoincr = false;
1211 : }
1212 176 : } else if (autoincr) {
1213 176 : pos = *positions;
1214 : }
1215 215749 : if (BATcount(n) == 0) {
1216 : return GDK_SUCCEED;
1217 : }
1218 :
1219 29159 : BATiter ni = bat_iterator(n);
1220 :
1221 29166 : OIDXdestroy(b);
1222 29172 : STRMPdestroy(b);
1223 29171 : RTREEdestroy(b);
1224 : /* load hash so that we can maintain it */
1225 29161 : (void) BATcheckhash(b);
1226 :
1227 29172 : MT_lock_set(&b->theaplock);
1228 29155 : if (!force && (b->batRestricted != BAT_WRITE ||
1229 49 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1230 0 : MT_lock_unset(&b->theaplock);
1231 0 : bat_iterator_end(&ni);
1232 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1233 0 : return GDK_FAIL;
1234 : }
1235 29155 : BATiter bi = bat_iterator_nolock(b);
1236 29155 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1237 28518 : b->tunique_est = 0;
1238 : }
1239 :
1240 29155 : b->tsorted = b->trevsorted = false;
1241 29155 : b->tnosorted = b->tnorevsorted = 0;
1242 29155 : b->tseqbase = oid_nil;
1243 29155 : b->tkey = false;
1244 29155 : b->tnokey[0] = b->tnokey[1] = 0;
1245 :
1246 29155 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1247 29155 : const void *nil = ATOMnilptr(b->ttype);
1248 29155 : oid hseqend = b->hseqbase + BATcount(b);
1249 :
1250 29155 : MT_lock_unset(&b->theaplock);
1251 :
1252 33951 : bool anynil = false;
1253 33951 : bool locked = false;
1254 :
1255 33951 : if (b->tvheap) {
1256 1206981 : for (BUN i = 0; i < ni.count; i++) {
1257 1204115 : oid updid;
1258 1204115 : if (positions) {
1259 1203102 : updid = autoincr ? pos++ : *positions++;
1260 : } else {
1261 1013 : updid = BUNtoid(p, i);
1262 : }
1263 :
1264 1204115 : if (updid < b->hseqbase ||
1265 1204115 : (!mayappend && updid >= hseqend)) {
1266 0 : GDKerror("id out of range\n");
1267 0 : goto bailout;
1268 : }
1269 1204115 : updid -= b->hseqbase;
1270 1204115 : if (!force && updid < b->batInserted) {
1271 0 : GDKerror("updating committed value\n");
1272 0 : goto bailout;
1273 : }
1274 :
1275 1204115 : const void *new = BUNtvar(ni, i);
1276 :
1277 1204115 : if (updid >= BATcount(b)) {
1278 23564 : assert(mayappend);
1279 23564 : if (locked) {
1280 4 : MT_rwlock_wrunlock(&b->thashlock);
1281 4 : locked = false;
1282 : }
1283 23564 : if (b->tminpos != bi.minpos ||
1284 23563 : b->tmaxpos != bi.maxpos) {
1285 1 : MT_lock_set(&b->theaplock);
1286 1 : b->tminpos = bi.minpos;
1287 1 : b->tmaxpos = bi.maxpos;
1288 1 : MT_lock_unset(&b->theaplock);
1289 : }
1290 23564 : if (BATcount(b) < updid &&
1291 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1292 0 : bat_iterator_end(&ni);
1293 0 : return GDK_FAIL;
1294 : }
1295 23564 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1296 0 : bat_iterator_end(&ni);
1297 0 : return GDK_FAIL;
1298 : }
1299 23564 : bi = bat_iterator_nolock(b);
1300 132530 : continue;
1301 : }
1302 :
1303 : /* it is possible that a previous run was killed
1304 : * after an update (with a mmapped tail file)
1305 : * but before that was committed, then the
1306 : * offset may point outside of the vheap */
1307 1180551 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1308 :
1309 1170949 : if (old && atomcmp(old, new) == 0) {
1310 : /* replacing with the same value:
1311 : * nothing to do */
1312 108966 : continue;
1313 : }
1314 :
1315 1070906 : bool isnil = atomcmp(new, nil) == 0;
1316 1066371 : anynil |= isnil;
1317 1066371 : MT_lock_set(&b->theaplock);
1318 1066687 : if (old == NULL ||
1319 1066687 : (b->tnil &&
1320 767 : !anynil &&
1321 767 : atomcmp(old, nil) == 0)) {
1322 : /* if old value is nil and no new
1323 : * value is, we're not sure anymore
1324 : * about the nil property, so we must
1325 : * clear it */
1326 765 : b->tnil = false;
1327 : }
1328 1066687 : b->tnonil &= !isnil;
1329 1066687 : b->tnil |= isnil;
1330 1066687 : MT_lock_unset(&b->theaplock);
1331 1066986 : if (bi.maxpos != BUN_NONE) {
1332 4236 : if (!isnil &&
1333 2118 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1334 : /* new value is larger than
1335 : * previous largest */
1336 24 : bi.maxpos = updid;
1337 4188 : } else if (old == NULL ||
1338 2107 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1339 13 : atomcmp(new, old) != 0)) {
1340 : /* old value is equal to
1341 : * largest and new value is
1342 : * smaller, so we don't know
1343 : * anymore which is the
1344 : * largest */
1345 13 : bi.maxpos = BUN_NONE;
1346 : }
1347 : }
1348 1066986 : if (bi.minpos != BUN_NONE) {
1349 4224 : if (!isnil &&
1350 2112 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1351 : /* new value is smaller than
1352 : * previous smallest */
1353 14 : bi.minpos = updid;
1354 4196 : } else if (old == NULL ||
1355 2120 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1356 22 : atomcmp(new, old) != 0)) {
1357 : /* old value is equal to
1358 : * smallest and new value is
1359 : * larger, so we don't know
1360 : * anymore which is the
1361 : * smallest */
1362 22 : bi.minpos = BUN_NONE;
1363 : }
1364 : }
1365 1066986 : if (!locked) {
1366 2365 : MT_rwlock_wrlock(&b->thashlock);
1367 2365 : locked = true;
1368 : }
1369 1066989 : if (old)
1370 1066989 : HASHdelete_locked(&bi, updid, old);
1371 0 : else if (b->thash) {
1372 0 : doHASHdestroy(b, b->thash);
1373 0 : b->thash = NULL;
1374 : }
1375 :
1376 1066945 : var_t d;
1377 1066945 : switch (b->twidth) {
1378 1058431 : case 1:
1379 1058431 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1380 1058431 : break;
1381 6486 : case 2:
1382 6486 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1383 6486 : break;
1384 1993 : case 4:
1385 1993 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1386 1993 : break;
1387 : #if SIZEOF_VAR_T == 8
1388 35 : case 8:
1389 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1390 35 : break;
1391 : #endif
1392 : default:
1393 0 : MT_UNREACHABLE();
1394 : }
1395 1066945 : MT_lock_set(&b->theaplock);
1396 1066854 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1397 1066774 : MT_lock_unset(&b->theaplock);
1398 1066824 : if (rc != GDK_SUCCEED) {
1399 0 : goto bailout;
1400 : }
1401 1066824 : if (b->twidth < SIZEOF_VAR_T &&
1402 1066904 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1403 : /* doesn't fit in current heap, upgrade it */
1404 16 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1405 0 : goto bailout;
1406 : }
1407 : }
1408 : /* in case ATOMreplaceVAR and/or
1409 : * GDKupgradevarheap replaces a heap, we need to
1410 : * reinitialize the iterator */
1411 : {
1412 : /* save and restore minpos/maxpos */
1413 1066824 : BUN minpos = bi.minpos;
1414 1066824 : BUN maxpos = bi.maxpos;
1415 1066824 : bi = bat_iterator_nolock(b);
1416 1066824 : bi.minpos = minpos;
1417 1066824 : bi.maxpos = maxpos;
1418 : }
1419 1066824 : switch (b->twidth) {
1420 1058294 : case 1:
1421 1058294 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1422 1058294 : break;
1423 6502 : case 2:
1424 6502 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1425 6502 : break;
1426 1993 : case 4:
1427 1993 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1428 1993 : break;
1429 : #if SIZEOF_VAR_T == 8
1430 35 : case 8:
1431 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1432 35 : break;
1433 : #endif
1434 : default:
1435 0 : MT_UNREACHABLE();
1436 : }
1437 1066824 : HASHinsert_locked(&bi, updid, new);
1438 :
1439 : }
1440 2866 : if (locked) {
1441 2363 : if (b->thash)
1442 2 : nunique = b->thash->nunique;
1443 2363 : MT_rwlock_wrunlock(&b->thashlock);
1444 2363 : locked = false;
1445 : }
1446 2867 : MT_lock_set(&b->theaplock);
1447 2867 : b->tvheap->dirty = true;
1448 2867 : MT_lock_unset(&b->theaplock);
1449 26300 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1450 0 : assert(b->thash == NULL);
1451 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1452 0 : for (BUN i = 0; i < ni.count; i++) {
1453 0 : oid updid;
1454 0 : if (positions) {
1455 0 : updid = autoincr ? pos++ : *positions++;
1456 : } else {
1457 0 : updid = BUNtoid(p, i);
1458 : }
1459 :
1460 0 : if (updid < b->hseqbase ||
1461 0 : (!mayappend && updid >= hseqend)) {
1462 0 : GDKerror("id out of range\n");
1463 0 : bat_iterator_end(&ni);
1464 0 : return GDK_FAIL;
1465 : }
1466 0 : updid -= b->hseqbase;
1467 0 : if (!force && updid < b->batInserted) {
1468 0 : GDKerror("updating committed value\n");
1469 0 : bat_iterator_end(&ni);
1470 0 : return GDK_FAIL;
1471 : }
1472 0 : if (updid >= BATcount(b)) {
1473 0 : assert(mayappend);
1474 0 : if (BATcount(b) < updid &&
1475 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1476 0 : bat_iterator_end(&ni);
1477 0 : return GDK_FAIL;
1478 : }
1479 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1480 0 : bat_iterator_end(&ni);
1481 0 : return GDK_FAIL;
1482 : }
1483 0 : continue;
1484 : }
1485 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1486 : }
1487 0 : bi = bat_iterator_nolock(b);
1488 26300 : } else if (autoincr) {
1489 17916 : if (pos < b->hseqbase ||
1490 17025 : (!mayappend && pos + ni.count > hseqend)) {
1491 0 : GDKerror("id out of range\n");
1492 0 : bat_iterator_end(&ni);
1493 0 : return GDK_FAIL;
1494 : }
1495 17916 : pos -= b->hseqbase;
1496 17916 : if (!force && pos < b->batInserted) {
1497 0 : GDKerror("updating committed value\n");
1498 0 : bat_iterator_end(&ni);
1499 0 : return GDK_FAIL;
1500 : }
1501 :
1502 17916 : if (pos >= BATcount(b)) {
1503 467 : assert(mayappend);
1504 467 : bat_iterator_end(&ni);
1505 467 : if (BATcount(b) < pos &&
1506 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1507 : return GDK_FAIL;
1508 : }
1509 467 : return BATappend(b, n, NULL, force);
1510 : }
1511 17455 : if (pos + ni.count > BATcount(b) &&
1512 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1513 0 : bat_iterator_end(&ni);
1514 0 : return GDK_FAIL;
1515 : }
1516 17449 : bi = bat_iterator_nolock(b);
1517 :
1518 : /* we copy all of n, so if there are nils in n we get
1519 : * nils in b (and else we don't know) */
1520 17449 : b->tnil = ni.nil;
1521 : /* we may not copy over all of b, so we only know that
1522 : * there are no nils in b afterward if there weren't
1523 : * any in either b or n to begin with */
1524 17449 : b->tnonil &= ni.nonil;
1525 : /* if there is no hash, we don't start the loop, if
1526 : * there is only a persisted hash, it will get destroyed
1527 : * in the first iteration, after which there is no hash
1528 : * and the loop ends */
1529 17449 : MT_rwlock_wrlock(&b->thashlock);
1530 17449 : locked = true;
1531 17449 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1532 17 : HASHdelete_locked(&bi, i, Tloc(b, i));
1533 17432 : if (ni.type == TYPE_void) {
1534 0 : assert(b->ttype == TYPE_oid);
1535 0 : oid *o = Tloc(b, pos);
1536 0 : if (is_oid_nil(ni.tseq)) {
1537 : /* we may or may not overwrite the old
1538 : * min/max values */
1539 0 : bi.minpos = BUN_NONE;
1540 0 : bi.maxpos = BUN_NONE;
1541 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1542 0 : o[i] = oid_nil;
1543 0 : b->tnil = true;
1544 : } else {
1545 0 : oid v = ni.tseq;
1546 : /* we know min/max of n, so we know
1547 : * the new min/max of b if those of n
1548 : * are smaller/larger than the old */
1549 0 : if (bi.minpos != BUN_NONE) {
1550 0 : if (v <= BUNtoid(b, bi.minpos))
1551 0 : bi.minpos = pos;
1552 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1553 0 : bi.minpos = BUN_NONE;
1554 : }
1555 0 : if (complex_cand(n)) {
1556 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1557 0 : o[i] = *(oid *)Tpos(&ni, i);
1558 : /* last value */
1559 0 : v = o[ni.count - 1];
1560 : } else {
1561 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1562 0 : o[i] = v++;
1563 : /* last value added (not one beyond) */
1564 0 : v--;
1565 : }
1566 0 : if (bi.maxpos != BUN_NONE) {
1567 0 : if (v >= BUNtoid(b, bi.maxpos))
1568 0 : bi.maxpos = pos + ni.count - 1;
1569 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1570 0 : bi.maxpos = BUN_NONE;
1571 : }
1572 : }
1573 : } else {
1574 : /* if the extremes of n are at least as
1575 : * extreme as those of b, we can replace b's
1576 : * min/max, else we don't know what b's new
1577 : * min/max are*/
1578 17730 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1579 298 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1580 183 : bi.minpos = pos + ni.minpos;
1581 : } else {
1582 17249 : bi.minpos = BUN_NONE;
1583 : }
1584 17755 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1585 323 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1586 236 : bi.maxpos = pos + ni.maxpos;
1587 : } else {
1588 17196 : bi.maxpos = BUN_NONE;
1589 : }
1590 17432 : memcpy(Tloc(b, pos), ni.base,
1591 17432 : ni.count << b->tshift);
1592 : }
1593 : /* either we have a hash that was updated above, or we
1594 : * have no hash; we cannot have the case where there is
1595 : * only a persisted (unloaded) hash since it would have
1596 : * been destroyed above */
1597 17432 : if (b->thash != NULL) {
1598 7 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1599 7 : HASHinsert_locked(&bi, i, Tloc(b, i));
1600 0 : if (b->thash)
1601 0 : nunique = b->thash->nunique;
1602 : }
1603 17425 : MT_rwlock_wrunlock(&b->thashlock);
1604 17449 : locked = false;
1605 17449 : if (ni.count == BATcount(b)) {
1606 : /* if we replaced all values of b by values
1607 : * from n, we can also copy the min/max
1608 : * properties */
1609 8753 : bi.minpos = ni.minpos;
1610 8753 : bi.maxpos = ni.maxpos;
1611 8753 : if (BATtdensebi(&ni)) {
1612 : /* replaced all of b with a dense sequence */
1613 47 : MT_lock_set(&b->theaplock);
1614 47 : BATtseqbase(b, ni.tseq);
1615 47 : MT_lock_unset(&b->theaplock);
1616 : }
1617 : }
1618 : } else {
1619 170730992 : for (BUN i = 0; i < ni.count; i++) {
1620 170722608 : oid updid;
1621 170722608 : if (positions) {
1622 : /* assert(!autoincr) */
1623 170722608 : updid = *positions++;
1624 : } else {
1625 0 : updid = BUNtoid(p, i);
1626 : }
1627 :
1628 170722608 : if (updid < b->hseqbase ||
1629 170722608 : (!mayappend && updid >= hseqend)) {
1630 0 : GDKerror("id out of range\n");
1631 0 : goto bailout;
1632 : }
1633 170722608 : updid -= b->hseqbase;
1634 170722608 : if (!force && updid < b->batInserted) {
1635 0 : GDKerror("updating committed value\n");
1636 0 : goto bailout;
1637 : }
1638 :
1639 170722608 : const void *new = BUNtloc(ni, i);
1640 :
1641 170722608 : if (updid >= BATcount(b)) {
1642 16058 : assert(mayappend);
1643 16058 : if (locked) {
1644 9 : MT_rwlock_wrunlock(&b->thashlock);
1645 9 : locked = false;
1646 : }
1647 16059 : if (b->tminpos != bi.minpos ||
1648 16057 : b->tmaxpos != bi.maxpos) {
1649 3 : MT_lock_set(&b->theaplock);
1650 3 : b->tminpos = bi.minpos;
1651 3 : b->tmaxpos = bi.maxpos;
1652 3 : MT_lock_unset(&b->theaplock);
1653 : }
1654 16058 : if (BATcount(b) < updid &&
1655 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1656 0 : goto bailout;
1657 : }
1658 16058 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1659 0 : bat_iterator_end(&ni);
1660 0 : return GDK_FAIL;
1661 : }
1662 16057 : bi = bat_iterator_nolock(b);
1663 16057 : continue;
1664 : }
1665 :
1666 170706550 : const void *old = BUNtloc(bi, updid);
1667 170706550 : bool isnil = atomcmp(new, nil) == 0;
1668 166718700 : anynil |= isnil;
1669 166718700 : if (b->tnil &&
1670 1908 : !anynil &&
1671 1908 : atomcmp(old, nil) == 0) {
1672 : /* if old value is nil and no new
1673 : * value is, we're not sure anymore
1674 : * about the nil property, so we must
1675 : * clear it */
1676 1904 : b->tnil = false;
1677 : }
1678 166718700 : b->tnonil &= !isnil;
1679 166718700 : b->tnil |= isnil;
1680 166718700 : if (bi.maxpos != BUN_NONE) {
1681 10672 : if (!isnil &&
1682 5334 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1683 : /* new value is larger than
1684 : * previous largest */
1685 137 : bi.maxpos = updid;
1686 5208 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1687 7 : atomcmp(new, old) != 0) {
1688 : /* old value is equal to
1689 : * largest and new value is
1690 : * smaller, so we don't know
1691 : * anymore which is the
1692 : * largest */
1693 7 : bi.maxpos = BUN_NONE;
1694 : }
1695 : }
1696 166718700 : if (bi.minpos != BUN_NONE) {
1697 10672 : if (!isnil &&
1698 5334 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1699 : /* new value is smaller than
1700 : * previous smallest */
1701 6 : bi.minpos = updid;
1702 5338 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1703 6 : atomcmp(new, old) != 0) {
1704 : /* old value is equal to
1705 : * smallest and new value is
1706 : * larger, so we don't know
1707 : * anymore which is the
1708 : * smallest */
1709 6 : bi.minpos = BUN_NONE;
1710 : }
1711 : }
1712 :
1713 166718700 : if (!locked) {
1714 8378 : MT_rwlock_wrlock(&b->thashlock);
1715 8378 : locked = true;
1716 : }
1717 166718707 : HASHdelete_locked(&bi, updid, old);
1718 166762211 : switch (b->twidth) {
1719 26913307 : case 1:
1720 26913307 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1721 26913307 : break;
1722 525041 : case 2:
1723 525041 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1724 525041 : break;
1725 35203638 : case 4:
1726 35203638 : ((int *) b->theap->base)[updid] = * (int *) new;
1727 35203638 : break;
1728 98904335 : case 8:
1729 98904335 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1730 98904335 : break;
1731 5215890 : case 16:
1732 : #ifdef HAVE_HGE
1733 5215890 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1734 : #else
1735 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1736 : #endif
1737 5215890 : break;
1738 0 : default:
1739 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1740 0 : break;
1741 : }
1742 166762211 : HASHinsert_locked(&bi, updid, new);
1743 : }
1744 8384 : if (locked) {
1745 8375 : if (b->thash)
1746 0 : nunique = b->thash->nunique;
1747 8375 : MT_rwlock_wrunlock(&b->thashlock);
1748 8375 : locked = false;
1749 : }
1750 : }
1751 28701 : bat_iterator_end(&ni);
1752 28671 : MT_lock_set(&b->theaplock);
1753 28692 : if (nunique != 0)
1754 2 : b->tunique_est = (double) nunique;
1755 28692 : b->tminpos = bi.minpos;
1756 28692 : b->tmaxpos = bi.maxpos;
1757 28692 : b->theap->dirty = true;
1758 28692 : MT_lock_unset(&b->theaplock);
1759 28706 : TRC_DEBUG(ALGO,
1760 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1761 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1762 : GDKusec() - t0);
1763 : return GDK_SUCCEED;
1764 :
1765 0 : bailout:
1766 0 : bat_iterator_end(&ni);
1767 0 : if (locked) {
1768 0 : Hash *h = b->thash;
1769 0 : b->thash = NULL;
1770 0 : MT_rwlock_wrunlock(&b->thashlock);
1771 0 : doHASHdestroy(b, h);
1772 : }
1773 : return GDK_FAIL;
1774 : }
1775 :
1776 : /* replace values from b at locations specified in p with values in n */
1777 : gdk_return
1778 214885 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1779 : {
1780 214885 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1781 : }
1782 :
1783 : /* like BATreplace, but p may specify locations beyond the end of b */
1784 : gdk_return
1785 1033 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1786 : {
1787 1033 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1788 : }
1789 :
1790 : #if 0 /* not used */
1791 : /* like BATreplace, but the positions are given by an array of oid values */
1792 : gdk_return
1793 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1794 : {
1795 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1796 : }
1797 : #endif
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid
1800 : * values, and they may specify locations beyond the end of b */
1801 : gdk_return
1802 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1803 : {
1804 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1805 : }
1806 :
1807 : /*
1808 : * BAT Selections
1809 : * The BAT selectors are among the most heavily used operators.
1810 : * Their efficient implementation is therefore mandatory.
1811 : *
1812 : * BAT slice
1813 : * This function returns a horizontal slice from a BAT. It optimizes
1814 : * execution by avoiding to copy when the BAT is memory mapped (in
1815 : * this case, an independent submap is created) or else when it is
1816 : * read-only, then a VIEW bat is created as a result.
1817 : *
1818 : * If a new copy has to be created, this function takes care to
1819 : * preserve void-columns (in this case, the seqbase has to be
1820 : * recomputed in the result).
1821 : *
1822 : * The selected range is excluding the high value.
1823 : */
1824 : BAT *
1825 12050702 : BATslice(BAT *b, BUN l, BUN h)
1826 : {
1827 12050702 : BUN low = l;
1828 12050702 : BAT *bn = NULL;
1829 :
1830 12050702 : BATcheck(b, NULL);
1831 12050702 : BATiter bi = bat_iterator(b);
1832 12064160 : if (l > bi.count)
1833 : l = bi.count;
1834 12064160 : if (h > bi.count)
1835 : h = bi.count;
1836 12064160 : if (h < l)
1837 : h = l;
1838 :
1839 12064160 : if (complex_cand(b)) {
1840 : /* slicing a candidate list with exceptions */
1841 95 : struct canditer ci;
1842 95 : canditer_init(&ci, NULL, b);
1843 95 : if (b->hseqbase + l >= ci.hseq) {
1844 95 : l = b->hseqbase + l - ci.hseq;
1845 95 : h = b->hseqbase + h - ci.hseq;
1846 : } else {
1847 0 : l = 0;
1848 0 : if (b->hseqbase + h >= ci.hseq)
1849 0 : h = b->hseqbase + h - ci.hseq;
1850 : else
1851 : h = 0;
1852 : }
1853 95 : bn = canditer_slice(&ci, l, h);
1854 95 : goto doreturn;
1855 : }
1856 : /* If the source BAT is readonly, then we can obtain a VIEW
1857 : * that just reuses the memory of the source. */
1858 12064065 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1859 : /* forget about slices for bit masks: we can't deal
1860 : * with difference in alignment, so we'll just make a
1861 : * copy */
1862 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1863 : /* we use BATappend with a candidate list to easily
1864 : * copy the part of b that we need */
1865 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1866 0 : if (bn == NULL ||
1867 0 : s == NULL ||
1868 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1869 0 : BBPreclaim(bn);
1870 0 : BBPreclaim(s);
1871 0 : bn = NULL;
1872 0 : goto doreturn;
1873 : }
1874 0 : BBPunfix(s->batCacheid);
1875 0 : goto doreturn;
1876 : }
1877 12064065 : restrict_t prestricted;
1878 14032085 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1879 1963480 : BAT *pb = BBP_desc(VIEWtparent(b));
1880 1963480 : MT_lock_set(&pb->theaplock);
1881 1962532 : prestricted = pb->batRestricted;
1882 1962532 : MT_lock_unset(&pb->theaplock);
1883 : } else {
1884 : prestricted = BAT_WRITE; /* just initialize with anything */
1885 : }
1886 12068605 : if (bi.restricted == BAT_READ &&
1887 12014031 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1888 12013984 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1889 12013984 : if (bn == NULL)
1890 : goto doreturn;
1891 : } else {
1892 : /* create a new BAT and put everything into it */
1893 54621 : BUN p = l;
1894 54621 : BUN q = h;
1895 :
1896 54621 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1897 54690 : if (bn == NULL)
1898 0 : goto doreturn;
1899 :
1900 54690 : if (bn->ttype == TYPE_void) {
1901 37021 : BATsetcount(bn, h - l);
1902 36980 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1903 17669 : } else if (bn->tvheap == NULL) {
1904 11111 : assert(BATatoms[bn->ttype].atomPut == NULL);
1905 11111 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1906 11111 : (q - p) << bn->tshift);
1907 11111 : bn->theap->dirty = true;
1908 11111 : BATsetcount(bn, h - l);
1909 : } else {
1910 1683738 : for (; p < q; p++) {
1911 1677232 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1912 0 : BBPreclaim(bn);
1913 0 : bn = NULL;
1914 0 : goto doreturn;
1915 : }
1916 : }
1917 : }
1918 54511 : bn->theap->dirty = true;
1919 54511 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1920 54511 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1921 54511 : bn->tkey = bi.key || bn->batCount <= 1;
1922 54511 : bn->tnonil = bi.nonil;
1923 54511 : bn->tnil = false; /* we don't know */
1924 54511 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1925 2312 : bn->tnosorted = bi.nosorted - l;
1926 : else
1927 52199 : bn->tnosorted = 0;
1928 54511 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1929 4720 : bn->tnorevsorted = bi.norevsorted - l;
1930 : else
1931 49791 : bn->tnorevsorted = 0;
1932 54511 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1933 44371 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1934 429 : bi.nokey[0] != bi.nokey[1] &&
1935 : !bn->tkey) {
1936 430 : bn->tnokey[0] = bi.nokey[0] - l;
1937 430 : bn->tnokey[1] = bi.nokey[1] - l;
1938 : } else {
1939 54081 : bn->tnokey[0] = bn->tnokey[1] = 0;
1940 : }
1941 : }
1942 12069223 : doreturn:
1943 12069223 : bat_iterator_end(&bi);
1944 12068614 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1945 : ALGOOPTBATFMT "\n",
1946 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1947 : return bn;
1948 : }
1949 :
1950 : #define BAT_ORDERED(TPE) \
1951 : do { \
1952 : const TPE *restrict vals = Tloc(b, 0); \
1953 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1954 : if (vals[p - 1] > vals[p]) { \
1955 : b->tnosorted = p; \
1956 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1957 : goto doreturn; \
1958 : } else if (vals[p - 1] < vals[p]) { \
1959 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1960 : b->tnorevsorted = p; \
1961 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1962 : } \
1963 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1964 : b->tnokey[0] = p - 1; \
1965 : b->tnokey[1] = p; \
1966 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1967 : } \
1968 : } \
1969 : } while (0)
1970 :
1971 : #define BAT_ORDERED_FP(TPE) \
1972 : do { \
1973 : const TPE *restrict vals = Tloc(b, 0); \
1974 : TPE prev = vals[0]; \
1975 : bool prevnil = is_##TPE##_nil(prev); \
1976 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1977 : TPE next = vals[p]; \
1978 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1979 : prev = next; \
1980 : if (cmp > 0) { \
1981 : b->tnosorted = bi.nosorted = p; \
1982 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1983 : goto doreturn; \
1984 : } else if (cmp < 0) { \
1985 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1986 : b->tnorevsorted = bi.norevsorted = p; \
1987 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1988 : } \
1989 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1990 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1991 : b->tnokey[1] = bi.nokey[1] = p; \
1992 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1993 : } \
1994 : } \
1995 : } while (0)
1996 :
1997 : /* Return whether the BAT is ordered or not. If we don't know, invest
1998 : * in a scan and record the results in the bat descriptor. If during
1999 : * the scan we happen to find evidence that the BAT is not reverse
2000 : * sorted, we record the location. */
2001 : bool
2002 3052295 : BATordered(BAT *b)
2003 : {
2004 3052295 : lng t0 = GDKusec();
2005 3053127 : bool sorted;
2006 :
2007 3053127 : MT_lock_set(&b->theaplock);
2008 3053556 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2009 442279 : MT_lock_unset(&b->theaplock);
2010 442367 : return true;
2011 : }
2012 2611277 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2013 2117910 : MT_lock_unset(&b->theaplock);
2014 2117918 : return false;
2015 : }
2016 :
2017 : /* There are a few reasons why we need a lock here. It may be
2018 : * that multiple threads call this functions at the same time
2019 : * (happens a lot with mitosis/mergetable), but we only need to
2020 : * scan the bat in one thread: the others can reap the rewards
2021 : * when that one thread is done. Also, we need the heap to
2022 : * remain accessible (could have used bat_iterator for that),
2023 : * and, and this is the killer argument, we may need to make
2024 : * changes to the bat descriptor. */
2025 493367 : BATiter bi = bat_iterator_nolock(b);
2026 493367 : if (!b->tsorted && b->tnosorted == 0) {
2027 888171 : switch (ATOMbasetype(b->ttype)) {
2028 95212 : case TYPE_bte:
2029 127085876 : BAT_ORDERED(bte);
2030 : break;
2031 10429 : case TYPE_sht:
2032 1637369 : BAT_ORDERED(sht);
2033 : break;
2034 323363 : case TYPE_int:
2035 119593562 : BAT_ORDERED(int);
2036 : break;
2037 10167 : case TYPE_lng:
2038 62269228 : BAT_ORDERED(lng);
2039 : break;
2040 : #ifdef HAVE_HGE
2041 378 : case TYPE_hge:
2042 8002684 : BAT_ORDERED(hge);
2043 : break;
2044 : #endif
2045 974 : case TYPE_flt:
2046 8007244 : BAT_ORDERED_FP(flt);
2047 : break;
2048 794 : case TYPE_dbl:
2049 8234249 : BAT_ORDERED_FP(dbl);
2050 : break;
2051 : case TYPE_str:
2052 21431497 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2053 21416825 : int c;
2054 21416825 : const char *p1 = BUNtvar(bi, p - 1);
2055 21416825 : const char *p2 = BUNtvar(bi, p);
2056 21416825 : if (p1 == p2)
2057 : c = 0;
2058 2269317 : else if (p1[0] == '\200') {
2059 1765 : if (p2[0] == '\200')
2060 : c = 0;
2061 : else
2062 : c = -1;
2063 2267552 : } else if (p2[0] == '\200')
2064 : c = 1;
2065 : else
2066 2266425 : c = strcmp(p1, p2);
2067 2266425 : if (c > 0) {
2068 37197 : b->tnosorted = bi.nosorted = p;
2069 37197 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2070 37197 : goto doreturn;
2071 21379628 : } else if (c < 0) {
2072 2232086 : assert(!b->trevsorted);
2073 2232086 : if (b->tnorevsorted == 0) {
2074 17338 : b->tnorevsorted = bi.norevsorted = p;
2075 17338 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2076 : }
2077 19147542 : } else if (b->tnokey[1] == 0) {
2078 4942 : assert(!b->tkey);
2079 4942 : b->tnokey[0] = bi.nokey[0] = p - 1;
2080 4942 : b->tnokey[1] = bi.nokey[1] = p;
2081 21379628 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2082 : }
2083 : }
2084 : break;
2085 186 : default: {
2086 186 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2087 2482 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 2437 : int c;
2089 2437 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2090 141 : b->tnosorted = bi.nosorted = p;
2091 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2092 141 : goto doreturn;
2093 2296 : } else if (c < 0) {
2094 2246 : if (!b->trevsorted && b->tnorevsorted == 0) {
2095 90 : b->tnorevsorted = bi.norevsorted = p;
2096 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2097 : }
2098 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2099 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2100 8 : b->tnokey[1] = bi.nokey[1] = p;
2101 2296 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2102 : }
2103 : }
2104 : break;
2105 : }
2106 : }
2107 : /* we only get here if we completed the scan; note that
2108 : * if we didn't record evidence about *reverse*
2109 : * sortedness, we know that the BAT is also reverse
2110 : * sorted; similarly, if we didn't record evidence about
2111 : * keyness, we know the BAT is key */
2112 155180 : b->tsorted = bi.sorted = true;
2113 155180 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2114 155734 : if (!b->trevsorted && b->tnorevsorted == 0) {
2115 69184 : b->trevsorted = bi.revsorted = true;
2116 69184 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2117 : }
2118 155734 : if (!b->tkey && b->tnokey[1] == 0) {
2119 52229 : b->tkey = bi.key = true;
2120 52229 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 : }
2123 155734 : doreturn:
2124 493928 : sorted = b->tsorted;
2125 493928 : bat pbid = VIEWtparent(b);
2126 493928 : MT_lock_unset(&b->theaplock);
2127 494446 : if (pbid) {
2128 278145 : BAT *pb = BBP_desc(pbid);
2129 278145 : MT_lock_set(&pb->theaplock);
2130 278145 : if (bi.count == BATcount(pb) &&
2131 202223 : bi.h == pb->theap &&
2132 202223 : bi.type == pb->ttype) {
2133 : /* add to knowledge in parent bat */
2134 202212 : pb->tsorted |= bi.sorted;
2135 202212 : if (pb->tnosorted == 0)
2136 202212 : pb->tnosorted = bi.nosorted;
2137 202212 : pb->trevsorted |= bi.revsorted;
2138 202212 : if (pb->tnorevsorted == 0)
2139 32984 : pb->tnorevsorted = bi.norevsorted;
2140 202212 : pb->tkey |= bi.key;
2141 202212 : if (pb->tnokey[1] == 0) {
2142 179281 : pb->tnokey[0] = bi.nokey[0];
2143 179281 : pb->tnokey[1] = bi.nokey[1];
2144 : }
2145 : }
2146 278145 : MT_lock_unset(&pb->theaplock);
2147 : }
2148 : return sorted;
2149 : }
2150 :
2151 : #define BAT_REVORDERED(TPE) \
2152 : do { \
2153 : const TPE *restrict vals = Tloc(b, 0); \
2154 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2155 : if (vals[p - 1] < vals[p]) { \
2156 : b->tnorevsorted = p; \
2157 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2158 : goto doreturn; \
2159 : } \
2160 : } \
2161 : } while (0)
2162 :
2163 : #define BAT_REVORDERED_FP(TPE) \
2164 : do { \
2165 : const TPE *restrict vals = Tloc(b, 0); \
2166 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2167 : TPE prev = vals[p - 1], next = vals[p]; \
2168 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2169 : if (cmp < 0) { \
2170 : b->tnorevsorted = bi.norevsorted = p; \
2171 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2172 : goto doreturn; \
2173 : } \
2174 : } \
2175 : } while (0)
2176 :
2177 : /* Return whether the BAT is reverse ordered or not. If we don't
2178 : * know, invest in a scan and record the results in the bat
2179 : * descriptor. */
2180 : bool
2181 2917259 : BATordered_rev(BAT *b)
2182 : {
2183 2917259 : lng t0 = GDKusec();
2184 2918003 : bool revsorted;
2185 :
2186 2918003 : if (b == NULL || !ATOMlinear(b->ttype))
2187 : return false;
2188 2918003 : MT_lock_set(&b->theaplock);
2189 2917886 : if (BATcount(b) <= 1 || b->trevsorted) {
2190 265371 : MT_lock_unset(&b->theaplock);
2191 265376 : return true;
2192 : }
2193 2652515 : if (b->ttype == TYPE_void) {
2194 23157 : MT_lock_unset(&b->theaplock);
2195 23166 : return is_oid_nil(b->tseqbase);
2196 : }
2197 2629358 : if (BATtdense(b) || b->tnorevsorted > 0) {
2198 2527292 : MT_lock_unset(&b->theaplock);
2199 2527299 : return false;
2200 : }
2201 102066 : BATiter bi = bat_iterator_nolock(b);
2202 102066 : if (!b->trevsorted && b->tnorevsorted == 0) {
2203 167011 : switch (ATOMbasetype(b->ttype)) {
2204 35242 : case TYPE_bte:
2205 10972218 : BAT_REVORDERED(bte);
2206 : break;
2207 4434 : case TYPE_sht:
2208 3161557 : BAT_REVORDERED(sht);
2209 : break;
2210 36125 : case TYPE_int:
2211 1302113 : BAT_REVORDERED(int);
2212 : break;
2213 4776 : case TYPE_lng:
2214 2685473 : BAT_REVORDERED(lng);
2215 : break;
2216 : #ifdef HAVE_HGE
2217 123 : case TYPE_hge:
2218 992 : BAT_REVORDERED(hge);
2219 : break;
2220 : #endif
2221 511 : case TYPE_flt:
2222 1997 : BAT_REVORDERED_FP(flt);
2223 : break;
2224 322 : case TYPE_dbl:
2225 485511 : BAT_REVORDERED_FP(dbl);
2226 : break;
2227 20533 : default: {
2228 20533 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2229 580753 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2230 577391 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2231 17170 : b->tnorevsorted = p;
2232 17170 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2233 17170 : goto doreturn;
2234 : }
2235 : }
2236 : break;
2237 : }
2238 : }
2239 26270 : b->trevsorted = bi.revsorted = true;
2240 26270 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2241 : }
2242 26270 : doreturn:
2243 102065 : revsorted = b->trevsorted;
2244 102065 : bat pbid = VIEWtparent(b);
2245 102065 : MT_lock_unset(&b->theaplock);
2246 102306 : if (pbid) {
2247 35399 : BAT *pb = BBP_desc(pbid);
2248 35399 : MT_lock_set(&pb->theaplock);
2249 35399 : if (bi.count == BATcount(pb) &&
2250 6402 : bi.h == pb->theap &&
2251 6402 : bi.type == pb->ttype) {
2252 : /* add to knowledge in parent bat */
2253 6402 : pb->trevsorted |= bi.revsorted;
2254 6402 : if (pb->tnorevsorted == 0)
2255 6402 : pb->tnorevsorted = bi.norevsorted;
2256 : }
2257 35399 : MT_lock_unset(&pb->theaplock);
2258 : }
2259 : return revsorted;
2260 : }
2261 :
2262 : /* figure out which sort function is to be called
2263 : * stable sort can produce an error (not enough memory available),
2264 : * "quick" sort does not produce errors */
2265 : static gdk_return
2266 3945688 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2267 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2268 : bool stable)
2269 : {
2270 3945688 : if (n <= 1) /* trivially sorted */
2271 : return GDK_SUCCEED;
2272 2761529 : switch (tpe) {
2273 2713220 : case TYPE_bte:
2274 : case TYPE_sht:
2275 : case TYPE_int:
2276 : case TYPE_lng:
2277 : #ifdef HAVE_HGE
2278 : case TYPE_hge:
2279 : #endif
2280 : case TYPE_date:
2281 : case TYPE_daytime:
2282 : case TYPE_timestamp:
2283 2713220 : assert(base == NULL);
2284 2713220 : if (nilslast == reverse && (stable || n > 100))
2285 20367 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2286 : break;
2287 4 : case TYPE_uuid:
2288 4 : assert(base == NULL);
2289 4 : if (nilslast == reverse && (stable || n > 100))
2290 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2291 : break;
2292 : default:
2293 : break;
2294 : }
2295 48416 : if (stable) {
2296 27 : if (reverse)
2297 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2298 : else
2299 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2300 : } else {
2301 2741134 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2302 : }
2303 2741134 : return GDK_SUCCEED;
2304 : }
2305 :
2306 : /* Sort the bat b according to both o and g. The stable and reverse
2307 : * parameters indicate whether the sort should be stable or descending
2308 : * respectively. The parameter b is required, o and g are optional
2309 : * (i.e., they may be NULL).
2310 : *
2311 : * A sorted copy is returned through the sorted parameter, the new
2312 : * ordering is returned through the order parameter, group information
2313 : * is returned through the groups parameter. All three output
2314 : * parameters may be NULL. If they're all NULL, this function does
2315 : * nothing.
2316 : *
2317 : * If o is specified, it is used to first rearrange b according to the
2318 : * order specified in o, after which b is sorted taking g into
2319 : * account.
2320 : *
2321 : * If g is specified, it indicates groups which should be individually
2322 : * ordered. Each row of consecutive equal values in g indicates a
2323 : * group which is sorted according to stable and reverse. g is used
2324 : * after the order in b was rearranged according to o.
2325 : *
2326 : * The outputs order and groups can be used in subsequent calls to
2327 : * this function. This can be used if multiple BATs need to be sorted
2328 : * together. The BATs should then be sorted in order of significance,
2329 : * and each following call should use the original unordered BAT plus
2330 : * the order and groups bat from the previous call. In this case, the
2331 : * sorted BATs are not of much use, so the sorted output parameter
2332 : * does not need to be specified.
2333 : * Apart from error checking and maintaining reference counts, sorting
2334 : * three columns (col1, col2, col3) could look like this with the
2335 : * sorted results in (col1s, col2s, col3s):
2336 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2337 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2338 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2339 : * Note that the "reverse" parameter can be different for each call.
2340 : */
2341 : gdk_return
2342 31609 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2343 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2344 : {
2345 31609 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2346 31609 : BATiter pbi;
2347 31609 : oid *restrict grps, *restrict ords, prev;
2348 31609 : BUN p, q, r;
2349 31609 : lng t0 = GDKusec();
2350 31620 : bool mkorderidx, orderidxlock = false;
2351 31620 : Heap *oidxh = NULL;
2352 :
2353 : /* we haven't implemented NILs as largest value for stable
2354 : * sort, so NILs come first for ascending and last for
2355 : * descending */
2356 31620 : assert(!stable || reverse == nilslast);
2357 :
2358 31620 : if (b == NULL) {
2359 0 : GDKerror("b must exist\n");
2360 0 : return GDK_FAIL;
2361 : }
2362 31620 : if (stable && reverse != nilslast) {
2363 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2364 0 : return GDK_FAIL;
2365 : }
2366 31620 : if (!ATOMlinear(b->ttype)) {
2367 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2368 0 : return GDK_FAIL;
2369 : }
2370 31620 : MT_lock_set(&b->theaplock);
2371 31617 : if (b->ttype == TYPE_void) {
2372 149 : b->tsorted = true;
2373 223 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2374 0 : b->trevsorted = !b->trevsorted;
2375 : }
2376 298 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2377 0 : b->tkey = !b->tkey;
2378 : }
2379 31468 : } else if (b->batCount <= 1) {
2380 9857 : if (!b->tsorted || !b->trevsorted) {
2381 23 : b->tsorted = b->trevsorted = true;
2382 : }
2383 : }
2384 31617 : MT_lock_unset(&b->theaplock);
2385 31624 : if (o != NULL &&
2386 17223 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2387 17223 : BATcount(o) != BATcount(b) || /* same size as b */
2388 6759 : (o->ttype == TYPE_void && /* no nil tail */
2389 2727 : BATcount(o) != 0 &&
2390 2727 : is_oid_nil(o->tseqbase)))) {
2391 0 : GDKerror("o must have type oid and same size as b\n");
2392 0 : return GDK_FAIL;
2393 : }
2394 31624 : if (g != NULL &&
2395 17223 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2396 17223 : !g->tsorted || /* sorted */
2397 17223 : BATcount(g) != BATcount(b) || /* same size as b */
2398 6886 : (g->ttype == TYPE_void && /* no nil tail */
2399 2854 : BATcount(g) != 0 &&
2400 2854 : is_oid_nil(g->tseqbase)))) {
2401 0 : GDKerror("g must have type oid, sorted on the tail, "
2402 : "and same size as b\n");
2403 0 : return GDK_FAIL;
2404 : }
2405 31624 : if (sorted == NULL && order == NULL) {
2406 : /* no place to put result, so we're done quickly */
2407 0 : GDKerror("no place to put the result.\n");
2408 0 : return GDK_FAIL;
2409 : }
2410 31624 : if (g == NULL && !stable) {
2411 : /* pre-ordering doesn't make sense if we're not
2412 : * subsorting and the sort is not stable */
2413 14120 : o = NULL;
2414 : }
2415 31624 : if (b->tnonil) {
2416 : /* if there are no nils, placement of nils doesn't
2417 : * matter, so set nilslast such that ordered bits can
2418 : * be used */
2419 23434 : nilslast = reverse;
2420 : }
2421 31624 : pbi = bat_iterator(NULL);
2422 32948 : if (BATcount(b) <= 1 ||
2423 21555 : (reverse == nilslast &&
2424 : (reverse ? b->trevsorted : b->tsorted) &&
2425 5634 : o == NULL && g == NULL &&
2426 3079 : (groups == NULL || BATtkey(b) ||
2427 : (reverse ? b->tsorted : b->trevsorted)))) {
2428 : /* trivially (sub)sorted, and either we don't need to
2429 : * return group information, or we can trivially
2430 : * deduce the groups */
2431 12066 : if (sorted) {
2432 10302 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2433 10302 : if (bn == NULL)
2434 0 : goto error;
2435 10302 : *sorted = bn;
2436 : }
2437 12066 : if (order) {
2438 11171 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2439 11172 : if (on == NULL)
2440 0 : goto error;
2441 11172 : *order = on;
2442 : }
2443 12067 : if (groups) {
2444 6366 : if (BATtkey(b)) {
2445 : /* singleton groups */
2446 5723 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2447 5723 : if (gn == NULL)
2448 0 : goto error;
2449 : } else {
2450 : /* single group */
2451 643 : const oid *o = 0;
2452 643 : assert(BATcount(b) == 1 ||
2453 : (b->tsorted && b->trevsorted));
2454 643 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2455 643 : if (gn == NULL)
2456 0 : goto error;
2457 : }
2458 6366 : *groups = gn;
2459 : }
2460 12067 : bat_iterator_end(&pbi);
2461 12067 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2462 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2463 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2464 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2465 : ALGOOPTBATFMT " -- trivial (" LLFMT
2466 : " usec)\n",
2467 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2468 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2469 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2470 : ALGOOPTBATPAR(on), GDKusec() - t0);
2471 12067 : return GDK_SUCCEED;
2472 : }
2473 19546 : if (VIEWtparent(b)) {
2474 4181 : pb = BATdescriptor(VIEWtparent(b));
2475 4192 : if (pb != NULL &&
2476 4192 : (b->tbaseoff != pb->tbaseoff ||
2477 3331 : BATcount(b) != BATcount(pb) ||
2478 3061 : b->hseqbase != pb->hseqbase ||
2479 3051 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2480 1142 : BBPunfix(pb->batCacheid);
2481 1142 : pb = NULL;
2482 : }
2483 : } else {
2484 : pb = b;
2485 : }
2486 19557 : bat_iterator_end(&pbi);
2487 19552 : pbi = bat_iterator(pb);
2488 : /* when we will create an order index if it doesn't already exist */
2489 19554 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2490 19554 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2491 6377 : (void) BATcheckorderidx(pb);
2492 6377 : MT_lock_set(&pb->batIdxLock);
2493 6373 : if (pb->torderidx) {
2494 61 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2495 : /* we can use the order index */
2496 61 : oidxh = pb->torderidx;
2497 61 : HEAPincref(oidxh);
2498 : }
2499 : mkorderidx = false;
2500 6312 : } else if (b != pb) {
2501 : /* don't build orderidx on parent bat */
2502 : mkorderidx = false;
2503 5242 : } else if (mkorderidx) {
2504 : /* keep lock when going to create */
2505 4617 : orderidxlock = true;
2506 : }
2507 6373 : if (!orderidxlock)
2508 1756 : MT_lock_unset(&pb->batIdxLock);
2509 : }
2510 19553 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2511 : /* there is an order index that we can use */
2512 61 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2513 61 : if (on == NULL)
2514 0 : goto error;
2515 61 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2516 61 : BATsetcount(on, BATcount(b));
2517 61 : HEAPdecref(oidxh, false);
2518 61 : oidxh = NULL;
2519 61 : on->tkey = true;
2520 61 : on->tnil = false;
2521 61 : on->tnonil = true;
2522 61 : on->tsorted = on->trevsorted = false;
2523 61 : on->tseqbase = oid_nil;
2524 61 : if (sorted || groups) {
2525 61 : bn = BATproject(on, b);
2526 61 : if (bn == NULL)
2527 0 : goto error;
2528 61 : bn->tsorted = true;
2529 61 : if (groups) {
2530 6 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2531 0 : goto error;
2532 6 : if (sorted &&
2533 6 : (*groups)->tkey &&
2534 : g == NULL) {
2535 : /* if new groups bat is key
2536 : * and since there is no input
2537 : * groups bat, we know the
2538 : * result bat is key */
2539 4 : bn->tkey = true;
2540 : }
2541 : }
2542 61 : if (sorted)
2543 61 : *sorted = bn;
2544 : else {
2545 0 : BBPunfix(bn->batCacheid);
2546 0 : bn = NULL;
2547 : }
2548 : }
2549 61 : if (order)
2550 11 : *order = on;
2551 : else {
2552 50 : BBPunfix(on->batCacheid);
2553 50 : on = NULL;
2554 : }
2555 61 : bat_iterator_end(&pbi);
2556 61 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2557 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2558 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2559 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2560 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2561 : " usec)\n",
2562 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2563 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2564 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2565 : ALGOOPTBATPAR(on), GDKusec() - t0);
2566 61 : if (pb != NULL && pb != b)
2567 53 : BBPunfix(pb->batCacheid);
2568 61 : return GDK_SUCCEED;
2569 12595 : } else if (oidxh) {
2570 0 : HEAPdecref(oidxh, false);
2571 0 : oidxh = NULL;
2572 : }
2573 19487 : if (o) {
2574 12051 : bn = BATproject(o, b);
2575 12051 : if (bn == NULL)
2576 0 : goto error;
2577 12051 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2578 5482 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2579 2741 : BBPunfix(bn->batCacheid);
2580 2741 : bn = b2;
2581 : }
2582 12050 : if (pb) {
2583 11512 : bat_iterator_end(&pbi);
2584 11514 : if (pb != b)
2585 1619 : BBPunfix(pb->batCacheid);
2586 11515 : pbi = bat_iterator(NULL);
2587 11515 : pb = NULL;
2588 : }
2589 : } else {
2590 7436 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2591 : }
2592 19470 : if (bn == NULL)
2593 0 : goto error;
2594 19470 : if (order) {
2595 : /* prepare order bat */
2596 16559 : if (o) {
2597 : /* make copy of input so that we can refine it;
2598 : * copy can be read-only if we take the shortcut
2599 : * below in the case g is "key" */
2600 10288 : on = COLcopy(o, TYPE_oid,
2601 10288 : g == NULL ||
2602 10289 : !(g->tkey || g->ttype == TYPE_void),
2603 : TRANSIENT);
2604 10291 : if (on == NULL)
2605 0 : goto error;
2606 10291 : BAThseqbase(on, b->hseqbase);
2607 10289 : on->tminpos = BUN_NONE;
2608 10289 : on->tmaxpos = BUN_NONE;
2609 : } else {
2610 : /* create new order */
2611 6271 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2612 6281 : if (on == NULL)
2613 0 : goto error;
2614 6281 : ords = (oid *) Tloc(on, 0);
2615 29985070 : for (p = 0, q = BATcount(bn); p < q; p++)
2616 29978789 : ords[p] = p + b->hseqbase;
2617 6281 : BATsetcount(on, BATcount(bn));
2618 6282 : on->tkey = true;
2619 6282 : on->tnil = false;
2620 6282 : on->tnonil = true;
2621 : }
2622 : /* COLcopy above can create TYPE_void */
2623 16571 : if (on->ttype != TYPE_void) {
2624 15805 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2625 15805 : on->tseqbase = oid_nil; /* and hence not dense */
2626 15805 : on->tnosorted = on->tnorevsorted = 0;
2627 : }
2628 16571 : *order = on;
2629 16571 : ords = (oid *) Tloc(on, 0);
2630 : } else {
2631 : ords = NULL;
2632 : }
2633 19482 : if (g) {
2634 12053 : if (g->tkey || g->ttype == TYPE_void) {
2635 : /* if g is "key", all groups are size 1, so no
2636 : * subsorting needed */
2637 5082 : if (sorted) {
2638 4604 : *sorted = bn;
2639 : } else {
2640 478 : BBPunfix(bn->batCacheid);
2641 478 : bn = NULL;
2642 : }
2643 5083 : if (order) {
2644 3905 : *order = on;
2645 3905 : if (o) {
2646 : /* we can inherit sortedness
2647 : * after all */
2648 3905 : on->tsorted = o->tsorted;
2649 3905 : on->trevsorted = o->trevsorted;
2650 3905 : if (o->tnosorted)
2651 56 : on->tnosorted = o->tnosorted;
2652 3905 : if (o->tnorevsorted)
2653 74 : on->tnorevsorted = o->tnorevsorted;
2654 : } else {
2655 : /* we didn't rearrange, so
2656 : * still sorted */
2657 0 : on->tsorted = true;
2658 0 : on->trevsorted = false;
2659 : }
2660 3905 : if (BATcount(on) <= 1) {
2661 0 : on->tsorted = true;
2662 0 : on->trevsorted = true;
2663 : }
2664 : }
2665 5083 : if (groups) {
2666 2912 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2667 2910 : if (gn == NULL)
2668 0 : goto error;
2669 2910 : *groups = gn;
2670 : }
2671 5081 : bat_iterator_end(&pbi);
2672 5081 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2673 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2674 : ",reverse=%d,nilslast=%d,stable=%d"
2675 : ") = (" ALGOOPTBATFMT ","
2676 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2677 : " -- key group (" LLFMT " usec)\n",
2678 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2679 : ALGOBATPAR(g), reverse, nilslast,
2680 : stable, ALGOOPTBATPAR(bn),
2681 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2682 : GDKusec() - t0);
2683 5081 : if (pb != NULL && pb != b)
2684 0 : BBPunfix(pb->batCacheid);
2685 5081 : return GDK_SUCCEED;
2686 : }
2687 6971 : assert(g->ttype == TYPE_oid);
2688 6971 : grps = (oid *) Tloc(g, 0);
2689 6971 : prev = grps[0];
2690 6971 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2691 0 : goto error;
2692 44650988 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2693 44644017 : if (grps[p] != prev) {
2694 : /* sub sort [r,p) */
2695 7594588 : if (do_sort(Tloc(bn, r),
2696 3662572 : ords ? ords + r : NULL,
2697 3932016 : bn->tvheap ? bn->tvheap->base : NULL,
2698 3932016 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2699 3932016 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2700 0 : goto error;
2701 3932014 : r = p;
2702 3932014 : prev = grps[p];
2703 : }
2704 : }
2705 : /* sub sort [r,q) */
2706 13358 : if (do_sort(Tloc(bn, r),
2707 6387 : ords ? ords + r : NULL,
2708 6971 : bn->tvheap ? bn->tvheap->base : NULL,
2709 6971 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2710 6971 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2711 0 : goto error;
2712 : /* if single group (r==0) the result is (rev)sorted,
2713 : * otherwise (maybe) not */
2714 6971 : bn->tsorted = r == 0 && !reverse && !nilslast;
2715 13907 : bn->trevsorted = r == 0 && reverse && nilslast;
2716 : } else {
2717 7429 : Heap *m = NULL;
2718 : /* only invest in creating an order index if the BAT
2719 : * is persistent */
2720 7429 : if (mkorderidx) {
2721 4614 : assert(orderidxlock);
2722 4614 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2723 : ords == NULL) {
2724 0 : ords = (oid *) m->base + ORDERIDXOFF;
2725 0 : if (o && o->ttype != TYPE_void)
2726 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2727 0 : else if (o)
2728 0 : for (p = 0, q = BATcount(o); p < q; p++)
2729 0 : ords[p] = p + o->tseqbase;
2730 : else
2731 0 : for (p = 0, q = BATcount(b); p < q; p++)
2732 0 : ords[p] = p + b->hseqbase;
2733 : }
2734 : }
2735 7432 : if ((reverse != nilslast ||
2736 14045 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2737 13427 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2738 6715 : do_sort(Tloc(bn, 0),
2739 : ords,
2740 6715 : bn->tvheap ? bn->tvheap->base : NULL,
2741 6715 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2742 6715 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2743 0 : if (m != NULL) {
2744 0 : HEAPfree(m, true);
2745 0 : GDKfree(m);
2746 : }
2747 0 : goto error;
2748 : }
2749 7440 : bn->tsorted = !reverse && !nilslast;
2750 7440 : bn->trevsorted = reverse && nilslast;
2751 7440 : if (m != NULL) {
2752 4617 : assert(orderidxlock);
2753 4617 : if (pb->torderidx == NULL) {
2754 4617 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2755 4617 : memcpy((oid *) m->base + ORDERIDXOFF,
2756 : ords,
2757 4617 : pbi.count * sizeof(oid));
2758 : }
2759 4617 : pb->torderidx = m;
2760 4617 : persistOIDX(pb);
2761 : } else {
2762 0 : HEAPfree(m, true);
2763 0 : GDKfree(m);
2764 : }
2765 : }
2766 : }
2767 14409 : if (orderidxlock) {
2768 4615 : MT_lock_unset(&pb->batIdxLock);
2769 4616 : orderidxlock = false;
2770 : }
2771 14411 : bn->theap->dirty = true;
2772 14411 : bn->tnosorted = 0;
2773 14411 : bn->tnorevsorted = 0;
2774 14411 : bn->tnokey[0] = bn->tnokey[1] = 0;
2775 14411 : bn->tminpos = BUN_NONE;
2776 14411 : bn->tmaxpos = BUN_NONE;
2777 14411 : if (groups) {
2778 8760 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2779 0 : goto error;
2780 8758 : if ((*groups)->tkey &&
2781 957 : (g == NULL || (g->tsorted && g->trevsorted))) {
2782 : /* if new groups bat is key and the input
2783 : * group bat has a single value (both sorted
2784 : * and revsorted), we know the result bat is
2785 : * key */
2786 1169 : bn->tkey = true;
2787 : }
2788 : }
2789 :
2790 14409 : bat_iterator_end(&pbi);
2791 14407 : if (sorted)
2792 10546 : *sorted = bn;
2793 : else {
2794 3861 : BBPunfix(bn->batCacheid);
2795 3861 : bn = NULL;
2796 : }
2797 :
2798 14411 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2799 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2800 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2801 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2802 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2803 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2804 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2805 : g ? "grouped " : "", GDKusec() - t0);
2806 14411 : if (pb && pb != b)
2807 1375 : BBPunfix(pb->batCacheid);
2808 : return GDK_SUCCEED;
2809 :
2810 0 : error:
2811 0 : bat_iterator_end(&pbi);
2812 0 : if (orderidxlock)
2813 0 : MT_lock_unset(&pb->batIdxLock);
2814 0 : if (oidxh)
2815 0 : HEAPdecref(oidxh, false);
2816 0 : BBPreclaim(bn);
2817 0 : if (pb && pb != b)
2818 0 : BBPunfix(pb->batCacheid);
2819 0 : BBPreclaim(on);
2820 0 : if (sorted)
2821 0 : *sorted = NULL;
2822 0 : if (order)
2823 0 : *order = NULL;
2824 0 : if (groups)
2825 0 : *groups = NULL;
2826 : return GDK_FAIL;
2827 : }
2828 :
2829 : /* return a new BAT of length n with seqbase hseq, and the constant v
2830 : * in the tail */
2831 : BAT *
2832 2113848 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2833 : {
2834 2113848 : BAT *bn;
2835 2113848 : void *restrict p;
2836 2113848 : BUN i;
2837 2113848 : lng t0 = 0;
2838 :
2839 2113848 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2840 2113848 : if (v == NULL)
2841 : return NULL;
2842 2113848 : bn = COLnew(hseq, tailtype, n, role);
2843 2118193 : if (bn != NULL && n > 0) {
2844 88836 : p = Tloc(bn, 0);
2845 88836 : switch (ATOMstorage(tailtype)) {
2846 24 : case TYPE_void:
2847 24 : v = &oid_nil;
2848 24 : BATtseqbase(bn, oid_nil);
2849 24 : break;
2850 0 : case TYPE_msk:
2851 0 : if (*(msk*)v) {
2852 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2853 0 : if (n & 31) {
2854 0 : uint32_t *m = p;
2855 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2856 : }
2857 : } else
2858 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2859 : break;
2860 12327 : case TYPE_bte:
2861 12327 : memset(p, *(bte*)v, n);
2862 12327 : break;
2863 : case TYPE_sht:
2864 7161464 : for (i = 0; i < n; i++)
2865 7142814 : ((sht *) p)[i] = *(sht *) v;
2866 : break;
2867 : case TYPE_int:
2868 : case TYPE_flt:
2869 : assert(sizeof(int) == sizeof(flt));
2870 222716668 : for (i = 0; i < n; i++)
2871 222710314 : ((int *) p)[i] = *(int *) v;
2872 : break;
2873 : case TYPE_lng:
2874 : case TYPE_dbl:
2875 : assert(sizeof(lng) == sizeof(dbl));
2876 234886491 : for (i = 0; i < n; i++)
2877 234852419 : ((lng *) p)[i] = *(lng *) v;
2878 : break;
2879 : #ifdef HAVE_HGE
2880 : case TYPE_hge:
2881 27265890 : for (i = 0; i < n; i++)
2882 27264271 : ((hge *) p)[i] = *(hge *) v;
2883 : break;
2884 : #endif
2885 : case TYPE_uuid:
2886 200047 : for (i = 0; i < n; i++)
2887 200038 : ((uuid *) p)[i] = *(uuid *) v;
2888 : break;
2889 15632 : case TYPE_str:
2890 : /* insert the first value, then just copy the
2891 : * offset lots of times */
2892 15632 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2893 0 : BBPreclaim(bn);
2894 0 : return NULL;
2895 : }
2896 15636 : char val[sizeof(var_t)];
2897 15636 : memcpy(val, Tloc(bn, 0), bn->twidth);
2898 15636 : if (bn->twidth == 1 && n > 1) {
2899 : /* single byte value: we have a
2900 : * function for that */
2901 8069 : memset(Tloc(bn, 1), val[0], n - 1);
2902 : } else {
2903 7567 : char *p = Tloc(bn, 0);
2904 7587 : for (i = 1; i < n; i++) {
2905 20 : p += bn->twidth;
2906 20 : memcpy(p, val, bn->twidth);
2907 : }
2908 : }
2909 : break;
2910 : default:
2911 333712 : for (i = 0; i < n; i++)
2912 333562 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2913 0 : BBPreclaim(bn);
2914 0 : return NULL;
2915 : }
2916 : break;
2917 : }
2918 88841 : bn->theap->dirty = true;
2919 88841 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2920 88852 : BATsetcount(bn, n);
2921 88954 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2922 88954 : bn->tnonil = !bn->tnil;
2923 88954 : bn->tkey = BATcount(bn) <= 1;
2924 : }
2925 2118311 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2926 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2927 : return bn;
2928 : }
2929 :
2930 : /*
2931 : * BAT Aggregates
2932 : *
2933 : * We retain the size() and card() aggregate results in the column
2934 : * descriptor. We would like to have such functionality in an
2935 : * extensible way for many aggregates, for DD (1) we do not want to
2936 : * change the binary BAT format on disk and (2) aggr and size are the
2937 : * most relevant aggregates.
2938 : *
2939 : * It is all hacked into the aggr[3] records; three adjacent integers
2940 : * that were left over in the column record. We refer to these as if
2941 : * it where an int aggr[3] array. The below routines set and retrieve
2942 : * the aggregate values from the tail of the BAT, as many
2943 : * aggregate-manipulating BAT functions work on tail.
2944 : *
2945 : * The rules are as follows: aggr[0] contains the alignment ID of the
2946 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2947 : * equal to b->talign, the precomputed aggregate values in
2948 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2949 : * of them may be set at the time. This is encoded by the value
2950 : * int_nil, which cannot occur in these two aggregates.
2951 : *
2952 : * This was now extended to record the property whether we know there
2953 : * is a nil value present by mis-using the highest bits of both
2954 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2955 : */
2956 :
2957 : void
2958 48468830 : PROPdestroy_nolock(BAT *b)
2959 : {
2960 48468830 : PROPrec *p = b->tprops;
2961 48468830 : PROPrec *n;
2962 :
2963 48468830 : b->tprops = NULL;
2964 48472705 : while (p) {
2965 4895 : n = p->next;
2966 4895 : assert(p->id != (enum prop_t) 20);
2967 4895 : VALclear(&p->v);
2968 4895 : GDKfree(p);
2969 4895 : p = n;
2970 : }
2971 48467810 : }
2972 :
2973 : void
2974 433 : PROPdestroy(BAT *b)
2975 : {
2976 433 : MT_lock_set(&b->theaplock);
2977 433 : PROPdestroy_nolock(b);
2978 433 : MT_lock_unset(&b->theaplock);
2979 433 : }
2980 :
2981 : ValPtr
2982 207346782 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2983 : {
2984 207346782 : PROPrec *p;
2985 :
2986 207346782 : p = b->tprops;
2987 207353797 : while (p && p->id != idx)
2988 7015 : p = p->next;
2989 207346782 : return p ? &p->v : NULL;
2990 : }
2991 :
2992 : void
2993 404261 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2994 : {
2995 404261 : PROPrec *prop = b->tprops, *prev = NULL;
2996 :
2997 404496 : while (prop) {
2998 404369 : if (prop->id == idx) {
2999 404134 : if (prev)
3000 106 : prev->next = prop->next;
3001 : else
3002 404028 : b->tprops = prop->next;
3003 404134 : VALclear(&prop->v);
3004 404134 : GDKfree(prop);
3005 404134 : return;
3006 : }
3007 235 : prev = prop;
3008 235 : prop = prop->next;
3009 : }
3010 : }
3011 :
3012 : ValPtr
3013 409068 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3014 : {
3015 409068 : PROPrec *p;
3016 :
3017 409068 : p = b->tprops;
3018 416434 : while (p && p->id != idx)
3019 7366 : p = p->next;
3020 409068 : if (p == NULL) {
3021 409061 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3022 : /* properties are hints, so if we can't create
3023 : * one we ignore the error */
3024 0 : GDKclrerr();
3025 0 : return NULL;
3026 : }
3027 409061 : p->id = idx;
3028 409061 : p->next = b->tprops;
3029 409061 : p->v.vtype = 0;
3030 409061 : b->tprops = p;
3031 : } else {
3032 7 : VALclear(&p->v);
3033 : }
3034 409068 : if (VALinit(&p->v, type, v) == NULL) {
3035 : /* failed to initialize, so remove property */
3036 0 : BATrmprop_nolock(b, idx);
3037 0 : GDKclrerr();
3038 0 : p = NULL;
3039 : }
3040 0 : return p ? &p->v : NULL;
3041 : }
3042 :
3043 : ValPtr
3044 3416210 : BATgetprop(BAT *b, enum prop_t idx)
3045 : {
3046 3416210 : ValPtr p;
3047 :
3048 3416210 : MT_lock_set(&b->theaplock);
3049 3419848 : p = BATgetprop_nolock(b, idx);
3050 3419693 : MT_lock_unset(&b->theaplock);
3051 3420927 : return p;
3052 : }
3053 :
3054 : ValPtr
3055 4705 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3056 : {
3057 4705 : ValPtr p;
3058 4705 : MT_lock_set(&b->theaplock);
3059 4705 : p = BATsetprop_nolock(b, idx, type, v);
3060 4705 : MT_lock_unset(&b->theaplock);
3061 4705 : return p;
3062 : }
3063 :
3064 : void
3065 2 : BATrmprop(BAT *b, enum prop_t idx)
3066 : {
3067 2 : MT_lock_set(&b->theaplock);
3068 2 : BATrmprop_nolock(b, idx);
3069 2 : MT_lock_unset(&b->theaplock);
3070 2 : }
3071 :
3072 : /*
3073 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3074 : * non-nil tail value.
3075 : * This function does not fail (the callers currently don't check for failure).
3076 : */
3077 : BUN
3078 2252 : BATcount_no_nil(BAT *b, BAT *s)
3079 : {
3080 2252 : BUN cnt = 0;
3081 2252 : const void *restrict p, *restrict nil;
3082 2252 : const char *restrict base;
3083 2252 : int t;
3084 2252 : int (*cmp)(const void *, const void *);
3085 2252 : struct canditer ci;
3086 2252 : oid hseq;
3087 :
3088 2252 : BATcheck(b, 0);
3089 :
3090 2252 : hseq = b->hseqbase;
3091 2252 : canditer_init(&ci, b, s);
3092 2252 : BATiter bi = bat_iterator(b);
3093 2252 : if (bi.nonil) {
3094 2013 : bat_iterator_end(&bi);
3095 2012 : return ci.ncand;
3096 : }
3097 239 : p = bi.base;
3098 239 : t = ATOMbasetype(bi.type);
3099 239 : switch (t) {
3100 0 : case TYPE_void:
3101 0 : cnt = ci.ncand * BATtdensebi(&bi);
3102 0 : break;
3103 0 : case TYPE_msk:
3104 0 : cnt = ci.ncand;
3105 0 : break;
3106 16 : case TYPE_bte:
3107 32 : CAND_LOOP(&ci)
3108 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3109 : break;
3110 0 : case TYPE_sht:
3111 0 : CAND_LOOP(&ci)
3112 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3113 : break;
3114 113 : case TYPE_int:
3115 3329725 : CAND_LOOP(&ci)
3116 3329612 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3117 : break;
3118 85 : case TYPE_lng:
3119 156873 : CAND_LOOP(&ci)
3120 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3121 : break;
3122 : #ifdef HAVE_HGE
3123 0 : case TYPE_hge:
3124 0 : CAND_LOOP(&ci)
3125 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3126 : break;
3127 : #endif
3128 0 : case TYPE_flt:
3129 0 : CAND_LOOP(&ci)
3130 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 0 : case TYPE_dbl:
3133 0 : CAND_LOOP(&ci)
3134 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3135 : break;
3136 0 : case TYPE_uuid:
3137 0 : CAND_LOOP(&ci)
3138 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3139 : break;
3140 25 : case TYPE_str:
3141 25 : base = bi.vh->base;
3142 25 : switch (bi.width) {
3143 23 : case 1:
3144 4809 : CAND_LOOP(&ci)
3145 4786 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3146 : break;
3147 2 : case 2:
3148 312 : CAND_LOOP(&ci)
3149 310 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3150 : break;
3151 0 : case 4:
3152 0 : CAND_LOOP(&ci)
3153 0 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3154 : break;
3155 : #if SIZEOF_VAR_T == 8
3156 0 : case 8:
3157 0 : CAND_LOOP(&ci)
3158 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3159 : break;
3160 : #endif
3161 : default:
3162 0 : MT_UNREACHABLE();
3163 : }
3164 : break;
3165 0 : default:
3166 0 : nil = ATOMnilptr(t);
3167 0 : cmp = ATOMcompare(t);
3168 0 : if (nil == NULL) {
3169 0 : cnt = ci.ncand;
3170 0 : } else if (b->tvheap) {
3171 0 : base = b->tvheap->base;
3172 0 : CAND_LOOP(&ci)
3173 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3174 : } else {
3175 0 : CAND_LOOP(&ci)
3176 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3177 : }
3178 : break;
3179 : }
3180 239 : if (cnt == bi.count) {
3181 35 : MT_lock_set(&b->theaplock);
3182 35 : if (cnt == BATcount(b) && bi.h == b->theap) {
3183 : /* we learned something */
3184 35 : b->tnonil = true;
3185 35 : assert(!b->tnil);
3186 35 : b->tnil = false;
3187 : }
3188 35 : bat pbid = VIEWtparent(b);
3189 35 : MT_lock_unset(&b->theaplock);
3190 35 : if (pbid) {
3191 21 : BAT *pb = BATdescriptor(pbid);
3192 21 : if (pb) {
3193 21 : MT_lock_set(&pb->theaplock);
3194 21 : if (cnt == BATcount(pb) &&
3195 0 : bi.h == pb->theap &&
3196 0 : !pb->tnonil) {
3197 0 : pb->tnonil = true;
3198 0 : assert(!pb->tnil);
3199 0 : pb->tnil = false;
3200 : }
3201 21 : MT_lock_unset(&pb->theaplock);
3202 21 : BBPunfix(pb->batCacheid);
3203 : }
3204 : }
3205 : }
3206 239 : bat_iterator_end(&bi);
3207 239 : return cnt;
3208 : }
|