Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 51696933 : unshare_varsized_heap(BAT *b)
27 : {
28 51696933 : if (ATOMvarsized(b->ttype) &&
29 20945364 : b->tvheap->parentid != b->batCacheid) {
30 855 : Heap *h = GDKmalloc(sizeof(Heap));
31 860 : if (h == NULL)
32 : return GDK_FAIL;
33 860 : MT_thread_setalgorithm("unshare vheap");
34 1722 : *h = (Heap) {
35 861 : .parentid = b->batCacheid,
36 861 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 861 : strconcat_len(h->filename, sizeof(h->filename),
40 861 : BBP_physical(b->batCacheid), ".theap", NULL);
41 861 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 859 : MT_lock_set(&b->theaplock);
47 859 : Heap *oh = b->tvheap;
48 859 : b->tvheap = h;
49 859 : MT_lock_unset(&b->theaplock);
50 860 : BBPrelease(oh->parentid);
51 861 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 65944 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 65944 : size_t toff = ~(size_t) 0; /* tail offset */
66 65944 : BUN p, r; /* loop variables */
67 65944 : const void *tp = NULL; /* tail value pointer */
68 65944 : var_t v;
69 65944 : size_t off; /* offset within n's string heap */
70 65944 : BUN cnt = ci->ncand;
71 65944 : BUN oldcnt = BATcount(b);
72 :
73 65944 : assert(b->ttype == TYPE_str);
74 65944 : assert(b->tbaseoff == 0);
75 65944 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 65944 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 65944 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 65960 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 14459 : toff = 0;
85 14459 : MT_thread_setalgorithm("shared vheap");
86 51501 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 41204 : MT_lock_set(&b->theaplock);
90 41357 : bat bid = b->tvheap->parentid;
91 41357 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 41404 : HEAPincref(ni->vh);
93 41419 : b->tvheap = ni->vh;
94 41419 : b->tascii = ni->ascii;
95 41419 : MT_lock_unset(&b->theaplock);
96 41441 : BBPretain(ni->vh->parentid);
97 41412 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 41412 : toff = 0;
100 41412 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 11159 : if (b->tvheap->parentid != b->batCacheid &&
106 854 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 10305 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 64 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5744140 : for (int i = 0; i < 1024; i++) {
120 5738469 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5738538 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5738538 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5671 : len = (len + 512) / 1024; /* rounded average length */
125 5671 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5671 : BUN ecnt = ci->ncand;
129 5671 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 55 : ecnt = (BUN)ni->b->tunique_est;
131 5671 : if (r < ecnt)
132 917 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4754 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5671 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5663 : if (oldcnt == 0) {
140 5632 : toff = 0;
141 5632 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 31 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 31 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5665 : MT_lock_set(&b->theaplock);
148 5663 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5658 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5658 : b->tvheap->free = toff + ni->vhfree;
154 5658 : b->tvheap->dirty = true;
155 5658 : b->tascii &= ni->ascii;
156 5658 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 61529 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 61494 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 66205 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 66162 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 61344 : MT_thread_setalgorithm("memcpy offsets");
177 61390 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 4818 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 130 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 130 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 130 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 130 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 130 : MT_thread_setalgorithm("copy offset values");
197 130 : r = b->batCount;
198 3520141 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3519685 : p = canditer_next(ci) - ni->b->hseqbase;
200 3519686 : switch (ni->width) {
201 90 : case 1:
202 90 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 90 : break;
204 239 : case 2:
205 239 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 239 : break;
207 3519358 : case 4:
208 3519358 : v = (var_t) tip[p];
209 3519358 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3519686 : v = (var_t) ((size_t) v + toff);
219 3519686 : assert(v >= GDK_VAROFFSET);
220 3519686 : assert((size_t) v < b->tvheap->free);
221 3519686 : switch (b->twidth) {
222 240 : case 1:
223 240 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 240 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 240 : break;
226 745 : case 2:
227 745 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 745 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 745 : break;
230 3518701 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3518701 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3518701 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3518701 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3519686 : MT_UNREACHABLE();
243 : }
244 : }
245 4688 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 4656 : r = b->batCount;
253 4656 : oid hseq = ni->b->hseqbase;
254 4656 : MT_thread_setalgorithm("insert string values");
255 8209358 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 8199522 : p = canditer_next(ci) - hseq;
257 8177976 : tp = BUNtvar(*ni, p);
258 8177976 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 8199564 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 32 : r = b->batCount;
271 32 : MT_thread_setalgorithm("insert string values with check");
272 4017 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 3953 : p = canditer_next(ci) - ni->b->hseqbase;
274 3953 : off = BUNtvaroff(*ni, p); /* the offset */
275 3953 : tp = ni->vh->base + off; /* the string */
276 3953 : if (off < b->tvheap->free &&
277 3953 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 684 : v = (var_t) off;
284 684 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 680 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 680 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 680 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 680 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3269 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 3953 : r++;
313 : }
314 : }
315 66261 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 66007 : MT_rwlock_wrlock(&b->thashlock);
317 66284 : MT_lock_set(&b->theaplock);
318 66205 : BATsetcount(b, oldcnt + ci->ncand);
319 66238 : assert(b->batCapacity >= b->batCount);
320 66238 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 69326 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 3006 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 66320 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 66320 : MT_rwlock_wrunlock(&b->thashlock);
327 66306 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 512 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 512 : BUN cnt = ci->ncand, r;
339 512 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 512 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 512 : assert(b->twidth == ni->width);
345 512 : assert(b->twidth == SIZEOF_VAR_T);
346 512 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 513 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 513 : if (mayshare &&
360 514 : BATcount(b) == 0 &&
361 232 : b->batRole == TRANSIENT &&
362 148 : ni->restricted == BAT_READ &&
363 148 : b->tvheap != ni->vh) {
364 : /* if b is still empty, in the transient farm, and n
365 : * is read-only, we replace b's vheap with a reference
366 : * to n's */
367 148 : MT_lock_set(&b->theaplock);
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : MT_lock_unset(&b->theaplock);
373 148 : BBPretain(ni->vh->parentid);
374 148 : if (bid != b->batCacheid)
375 0 : BBPrelease(bid);
376 : }
377 513 : if (b->tvheap == ni->vh) {
378 : /* if b and n use the same vheap, we only need to copy
379 : * the offsets from n to b */
380 341 : if (ci->tpe == cand_dense) {
381 : /* fast memcpy since we copy a consecutive
382 : * chunk of memory */
383 341 : memcpy(Tloc(b, BATcount(b)),
384 341 : (const var_t *) ni->base + (ci->seq - hseq),
385 341 : cnt << b->tshift);
386 : } else {
387 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
388 0 : const var_t *restrict src = (const var_t *) ni->base;
389 0 : while (cnt > 0) {
390 0 : cnt--;
391 0 : *dst++ = src[canditer_next(ci) - hseq];
392 : }
393 : }
394 341 : MT_rwlock_wrlock(&b->thashlock);
395 341 : MT_lock_set(&b->theaplock);
396 341 : BATsetcount(b, BATcount(b) + ci->ncand);
397 341 : MT_lock_unset(&b->theaplock);
398 : /* maintain hash table */
399 341 : for (BUN i = BATcount(b) - ci->ncand;
400 341 : b->thash && i < BATcount(b);
401 0 : i++) {
402 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
403 : }
404 341 : BUN nunique = b->thash ? b->thash->nunique : 0;
405 341 : MT_rwlock_wrunlock(&b->thashlock);
406 341 : if (nunique != 0) {
407 0 : MT_lock_set(&b->theaplock);
408 0 : b->tunique_est = (double) nunique;
409 0 : MT_lock_unset(&b->theaplock);
410 : }
411 341 : return GDK_SUCCEED;
412 : }
413 : /* b and n do not share their vheap, so we need to copy data */
414 172 : if (b->tvheap->parentid != b->batCacheid) {
415 : /* if b shares its vheap with some other bat, unshare it */
416 18 : Heap *h = GDKmalloc(sizeof(Heap));
417 18 : if (h == NULL) {
418 : return GDK_FAIL;
419 : }
420 36 : *h = (Heap) {
421 18 : .parentid = b->batCacheid,
422 18 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
423 : .refs = ATOMIC_VAR_INIT(1),
424 : };
425 18 : strconcat_len(h->filename, sizeof(h->filename),
426 18 : BBP_physical(b->batCacheid), ".theap", NULL);
427 18 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
428 0 : HEAPfree(h, true);
429 0 : GDKfree(h);
430 0 : return GDK_FAIL;
431 : }
432 18 : MT_lock_set(&b->theaplock);
433 18 : Heap *oh = b->tvheap;
434 18 : b->tvheap = h;
435 18 : MT_lock_unset(&b->theaplock);
436 18 : if (oh->parentid != b->batCacheid)
437 18 : BBPrelease(oh->parentid);
438 18 : HEAPdecref(oh, false);
439 : }
440 172 : if (BATcount(b) == 0 &&
441 84 : ci->tpe == cand_dense && ci->ncand == ni->count) {
442 : /* just copy the heaps */
443 84 : MT_lock_set(&b->theaplock);
444 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
445 0 : MT_lock_unset(&b->theaplock);
446 0 : return GDK_FAIL;
447 : }
448 83 : memcpy(b->theap->base, ni->base, ni->hfree);
449 83 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
450 83 : b->theap->free = ni->hfree;
451 83 : b->theap->dirty = true;
452 83 : b->tvheap->free = ni->vhfree;
453 83 : b->tvheap->dirty = true;
454 83 : BATsetcount(b, ni->count);
455 85 : b->tnil = ni->nil;
456 85 : b->tnonil = ni->nonil;
457 85 : b->tsorted = ni->sorted;
458 85 : b->tnosorted = ni->nosorted;
459 85 : b->trevsorted = ni->revsorted;
460 85 : b->tnorevsorted = ni->norevsorted;
461 85 : b->tkey = ni->key;
462 85 : b->tnokey[0] = ni->nokey[0];
463 85 : b->tnokey[1] = ni->nokey[1];
464 85 : b->tminpos = ni->minpos;
465 85 : b->tmaxpos = ni->maxpos;
466 85 : b->tunique_est = ni->unique_est;
467 85 : MT_lock_unset(&b->theaplock);
468 85 : return GDK_SUCCEED;
469 : }
470 : /* copy data from n to b */
471 : r = BATcount(b);
472 380593 : for (BUN i = 0; i < cnt; i++) {
473 380504 : BUN p = canditer_next(ci) - hseq;
474 380502 : const void *t = BUNtvar(*ni, p);
475 380502 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
476 : return GDK_FAIL;
477 : }
478 380505 : r++;
479 : }
480 89 : MT_rwlock_wrlock(&b->thashlock);
481 90 : if (b->thash) {
482 0 : r -= cnt;
483 0 : BATiter bi = bat_iterator_nolock(b);
484 0 : for (BUN i = 0; i < cnt; i++) {
485 0 : const void *t = BUNtvar(bi, r);
486 0 : HASHappend_locked(b, r, t);
487 0 : r++;
488 : }
489 : }
490 90 : BUN nunique = b->thash ? b->thash->nunique : 0;
491 90 : MT_lock_set(&b->theaplock);
492 90 : BATsetcount(b, r);
493 89 : if (nunique != 0)
494 0 : b->tunique_est = (double) nunique;
495 89 : MT_lock_unset(&b->theaplock);
496 89 : MT_rwlock_wrunlock(&b->thashlock);
497 89 : return GDK_SUCCEED;
498 : }
499 :
500 : static gdk_return
501 127 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
502 : {
503 127 : if (ci->ncand == 0)
504 : return GDK_SUCCEED;
505 127 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
506 : return GDK_FAIL;
507 :
508 127 : MT_lock_set(&b->theaplock);
509 :
510 127 : uint32_t boff = b->batCount % 32;
511 127 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
512 127 : b->batCount += ci->ncand;
513 127 : b->theap->dirty = true;
514 127 : b->theap->free = ((b->batCount + 31) / 32) * 4;
515 127 : if (ci->tpe == cand_dense) {
516 127 : const uint32_t *np;
517 127 : uint32_t noff, mask;
518 127 : BUN cnt;
519 127 : noff = (ci->seq - ni->b->hseqbase) % 32;
520 127 : cnt = ci->ncand;
521 127 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
522 127 : if (boff == noff) {
523 : /* words of b and n are aligned, so we don't
524 : * need to shift bits around */
525 47 : if (boff + cnt <= 32) {
526 : /* all new bits within one word */
527 41 : if (cnt == 32) {
528 0 : *bp = *np;
529 : } else {
530 41 : mask = ((1U << cnt) - 1) << boff;
531 41 : *bp &= ~mask;
532 41 : *bp |= *np & mask;
533 : }
534 : } else {
535 : /* multiple words of b are affected */
536 6 : if (boff != 0) {
537 : /* first fill up the rest of the first
538 : * word */
539 0 : mask = ~0U << boff;
540 0 : *bp &= ~mask;
541 0 : *bp++ |= *np++ & mask;
542 0 : cnt -= 32 - boff;
543 : }
544 6 : if (cnt >= 32) {
545 : /* copy an integral number of words fast */
546 6 : BUN nw = cnt / 32;
547 6 : memcpy(bp, np, nw*sizeof(int));
548 6 : bp += nw;
549 6 : np += nw;
550 6 : cnt %= 32;
551 : }
552 6 : if (cnt > 0) {
553 : /* do the left over bits */
554 6 : mask = (1U << cnt) - 1;
555 6 : *bp = *np & mask;
556 : }
557 : }
558 80 : } else if (boff > noff) {
559 80 : if (boff + cnt <= 32) {
560 : /* we only need to copy bits from a
561 : * single word of n to a single word
562 : * of b */
563 : /* boff > 0, so cnt < 32, hence the
564 : * shift is ok */
565 72 : mask = (1U << cnt) - 1;
566 72 : *bp &= ~(mask << boff);
567 72 : *bp |= (*np & (mask << noff)) << (boff - noff);
568 : } else {
569 : /* first fill the rest of the last partial
570 : * word of b, so that's 32-boff bits */
571 8 : mask = (1U << (32 - boff)) - 1;
572 8 : *bp &= ~(mask << boff);
573 8 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
574 8 : cnt -= 32 - boff;
575 :
576 : /* set boff and noff to the amount we need to
577 : * shift bits in consecutive words of n around
578 : * to fit into the next word of b; set mask to
579 : * the mask of the bottom bits of n that fit
580 : * in a word of b (and the complement are the
581 : * top bits that go to another word of b) */
582 8 : boff -= noff;
583 8 : noff = 32 - boff;
584 8 : mask = (1U << noff) - 1;
585 140 : while (cnt >= 32) {
586 132 : *bp = (*np++ & ~mask) >> noff;
587 132 : *bp++ |= (*np & mask) << boff;
588 132 : cnt -= 32;
589 : }
590 8 : if (cnt > noff) {
591 : /* the last bits come from two words
592 : * in n */
593 4 : *bp = (*np++ & ~mask) >> noff;
594 4 : cnt -= noff;
595 4 : mask = (1U << cnt) - 1;
596 4 : *bp++ |= (*np & mask) << boff;
597 4 : } else if (cnt > 0) {
598 : /* the last bits come from a single
599 : * word in n */
600 4 : mask = ((1U << cnt) - 1) << noff;
601 4 : *bp = (*np & mask) >> noff;
602 : }
603 : }
604 : } else {
605 : /* boff < noff */
606 0 : if (noff + cnt <= 32) {
607 : /* only need part of the first word of n */
608 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
609 0 : mask = (1U << cnt) - 1;
610 0 : *bp &= ~(mask << boff);
611 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
612 0 : } else if (boff + cnt <= 32) {
613 : /* only need to fill a single word of
614 : * b, but from two of n */
615 0 : if (cnt < 32)
616 0 : *bp &= ~(((1U << cnt) - 1) << boff);
617 : else
618 0 : *bp = 0;
619 0 : mask = ~((1U << noff) - 1);
620 0 : *bp |= (*np++ & mask) >> (noff - boff);
621 0 : cnt -= 32 - noff;
622 0 : mask = (1U << cnt) - 1;
623 0 : *bp |= (*np & mask) << (32 - noff);
624 : } else {
625 0 : if (boff > 0) {
626 : /* fill the rest of the first word of b */
627 0 : cnt -= 32 - boff;
628 0 : *bp &= (1U << boff) - 1;
629 0 : mask = ~((1U << noff) - 1);
630 0 : noff -= boff;
631 0 : boff = 32 - noff;
632 0 : *bp |= (*np++ & mask) >> noff;
633 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
634 : } else {
635 0 : boff = 32 - noff;
636 : }
637 0 : mask = (1U << noff) - 1;
638 0 : while (cnt >= 32) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : *bp++ |= (*np & mask) << boff;
641 0 : cnt -= 32;
642 : }
643 0 : if (cnt > 0) {
644 0 : *bp = (*np++ & ~mask) >> noff;
645 0 : if (cnt > noff)
646 0 : *bp++ |= (*np & mask) << boff;
647 : }
648 : }
649 : }
650 : } else {
651 0 : oid o;
652 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
653 0 : do {
654 0 : for (uint32_t i = boff; i < 32; i++) {
655 0 : o = canditer_next(ci);
656 0 : if (is_oid_nil(o))
657 : break;
658 0 : o -= ni->b->hseqbase;
659 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
660 : }
661 0 : *bp++ = v;
662 0 : v = 0;
663 0 : boff = 0;
664 0 : } while (!is_oid_nil(o));
665 : }
666 127 : MT_lock_unset(&b->theaplock);
667 127 : return GDK_SUCCEED;
668 : }
669 :
670 : /* Append the contents of BAT n (subject to the optional candidate
671 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
672 : * was passed in, and else the seqbase of n. */
673 : static gdk_return
674 2075244 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
675 : {
676 2075244 : struct canditer ci;
677 2075244 : BUN r;
678 2075244 : oid hseq = n->hseqbase;
679 2075244 : char buf[64];
680 2075244 : lng t0 = 0;
681 2075244 : const ValRecord *prop = NULL;
682 2075244 : ValRecord minprop, maxprop;
683 2075244 : const void *minbound = NULL, *maxbound = NULL;
684 2075244 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
685 2075244 : bool hlocked = false;
686 :
687 2075244 : if (b == NULL || n == NULL || BATcount(n) == 0) {
688 : return GDK_SUCCEED;
689 : }
690 1263235 : assert(b->theap->parentid == b->batCacheid);
691 :
692 1263235 : TRC_DEBUG_IF(ALGO) {
693 0 : t0 = GDKusec();
694 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
695 : }
696 :
697 1263235 : ALIGNapp(b, force, GDK_FAIL);
698 :
699 3537259 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
700 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
701 0 : return GDK_FAIL;
702 : }
703 :
704 1266049 : if (BATttype(b) != BATttype(n) &&
705 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
706 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
707 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
708 : }
709 :
710 1266049 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
711 :
712 1265854 : BATiter ni = bat_iterator(n);
713 :
714 1265998 : canditer_init(&ci, n, s);
715 1266409 : if (ci.ncand == 0) {
716 0 : goto doreturn;
717 : }
718 :
719 1266409 : if (BATcount(b) + ci.ncand > BUN_MAX) {
720 0 : bat_iterator_end(&ni);
721 0 : GDKerror("combined BATs too large\n");
722 0 : return GDK_FAIL;
723 : }
724 :
725 1266409 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
726 0 : bat_iterator_end(&ni);
727 0 : GDKerror("overflow of head value\n");
728 0 : return GDK_FAIL;
729 : }
730 :
731 1266409 : OIDXdestroy(b);
732 1266147 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
733 1266941 : RTREEdestroy(b);
734 :
735 1265119 : MT_lock_set(&b->theaplock);
736 1265430 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
737 1264255 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
738 48 : VALcopy(&minprop, prop) != NULL) {
739 48 : minbound = VALptr(&minprop);
740 48 : if (ci.ncand == BATcount(n) &&
741 62 : ni.minpos != BUN_NONE &&
742 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
743 0 : assert(0);
744 : GDKerror("value out of bounds\n");
745 : MT_lock_unset(&b->theaplock);
746 : goto bailout;
747 : }
748 : }
749 1264715 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
750 40 : VALcopy(&maxprop, prop) != NULL) {
751 40 : maxbound = VALptr(&maxprop);
752 40 : if (ci.ncand == BATcount(n) &&
753 52 : ni.maxpos != BUN_NONE &&
754 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
755 0 : assert(0);
756 : GDKerror("value out of bounds\n");
757 : MT_lock_unset(&b->theaplock);
758 : goto bailout;
759 : }
760 : }
761 :
762 1264438 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
763 375976 : if (ni.maxpos != BUN_NONE) {
764 151836 : BATiter bi = bat_iterator_nolock(b);
765 151836 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
766 146768 : if (s == NULL) {
767 146632 : b->tmaxpos = BATcount(b) + ni.maxpos;
768 : } else {
769 136 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 : } else {
773 224140 : b->tmaxpos = BUN_NONE;
774 : }
775 : }
776 1264447 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
777 375266 : if (ni.minpos != BUN_NONE) {
778 151380 : BATiter bi = bat_iterator_nolock(b);
779 151380 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
780 145281 : if (s == NULL) {
781 145147 : b->tminpos = BATcount(b) + ni.minpos;
782 : } else {
783 134 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 : } else {
787 223886 : b->tminpos = BUN_NONE;
788 : }
789 : }
790 1264445 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
791 1263018 : b->tunique_est = 0;
792 : }
793 1264445 : MT_lock_unset(&b->theaplock);
794 : /* load hash so that we can maintain it */
795 1266627 : (void) BATcheckhash(b);
796 :
797 1266102 : if (b->ttype == TYPE_void) {
798 : /* b does not have storage, keep it that way if we can */
799 130313 : HASHdestroy(b); /* we're not maintaining the hash here */
800 130298 : MT_lock_set(&b->theaplock);
801 130302 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
802 126424 : (BATcount(b) == 0 ||
803 109562 : (BATtdense(b) &&
804 109562 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
805 : /* n is also dense and consecutive with b */
806 126367 : if (BATcount(b) == 0) {
807 16862 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
808 0 : assert(0);
809 : GDKerror("value not within bounds\n");
810 : MT_lock_unset(&b->theaplock);
811 : goto bailout;
812 : }
813 16862 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
814 : }
815 126355 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
816 0 : assert(0);
817 : GDKerror("value not within bounds\n");
818 : MT_lock_unset(&b->theaplock);
819 : goto bailout;
820 : }
821 126355 : BATsetcount(b, BATcount(b) + ci.ncand);
822 126338 : MT_lock_unset(&b->theaplock);
823 126372 : goto doreturn;
824 : }
825 3935 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
826 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
827 : /* both b and n are void/nil */
828 0 : if (notnull) {
829 0 : assert(0);
830 : GDKerror("NULL value not within bounds\n");
831 : MT_lock_unset(&b->theaplock);
832 : goto bailout;
833 : }
834 0 : BATtseqbase(b, oid_nil);
835 0 : BATsetcount(b, BATcount(b) + ci.ncand);
836 0 : MT_lock_unset(&b->theaplock);
837 0 : goto doreturn;
838 : }
839 : /* we need to materialize b; allocate enough capacity */
840 3935 : MT_lock_unset(&b->theaplock);
841 3935 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
842 0 : goto bailout;
843 : }
844 : }
845 :
846 : /* property setting */
847 1139724 : MT_lock_set(&b->theaplock);
848 1139178 : r = BATcount(b);
849 :
850 1139178 : if (BATcount(b) == 0) {
851 350627 : b->tsorted = ni.sorted;
852 350627 : b->trevsorted = ni.revsorted;
853 350627 : b->tseqbase = oid_nil;
854 350627 : b->tnonil = ni.nonil;
855 350627 : b->tnil = ni.nil && ci.ncand == BATcount(n);
856 350627 : if (ci.tpe == cand_dense) {
857 350477 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
858 350477 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
859 350477 : if (BATtdensebi(&ni)) {
860 2080 : b->tseqbase = n->tseqbase + ci.seq - hseq;
861 : }
862 : } else {
863 150 : b->tnosorted = 0;
864 150 : b->tnorevsorted = 0;
865 : }
866 350627 : b->tkey = ni.key;
867 350627 : if (ci.ncand == BATcount(n)) {
868 349614 : b->tnokey[0] = ni.nokey[0];
869 349614 : b->tnokey[1] = ni.nokey[1];
870 : } else {
871 1013 : b->tnokey[0] = b->tnokey[1] = 0;
872 : }
873 : } else {
874 788551 : BUN last = r - 1;
875 788551 : BATiter bi = bat_iterator_nolock(b);
876 788551 : int xx = ATOMcmp(b->ttype,
877 : BUNtail(ni, ci.seq - hseq),
878 : BUNtail(bi, last));
879 787615 : if (b->tsorted && (!ni.sorted || xx < 0)) {
880 20894 : b->tsorted = false;
881 20894 : b->tnosorted = 0;
882 20894 : b->tseqbase = oid_nil;
883 : }
884 787615 : if (b->trevsorted &&
885 66247 : (!ni.revsorted || xx > 0)) {
886 17727 : b->trevsorted = false;
887 17727 : b->tnorevsorted = 0;
888 : }
889 787615 : if (b->tkey &&
890 72943 : (!(b->tsorted || b->trevsorted) ||
891 59031 : !ni.key || xx == 0)) {
892 19561 : BATkey(b, false);
893 : }
894 787561 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
895 4077 : (!BATtdensebi(&ni) ||
896 526 : ci.tpe != cand_dense ||
897 526 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
898 3570 : b->tseqbase = oid_nil;
899 : }
900 787561 : b->tnonil &= ni.nonil;
901 1563892 : b->tnil |= ni.nil && ci.ncand == ni.count;
902 : }
903 1138188 : MT_lock_unset(&b->theaplock);
904 1140304 : if (b->ttype == TYPE_str) {
905 66215 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 1074089 : } else if (ATOMvarsized(b->ttype)) {
909 516 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
910 0 : goto bailout;
911 : }
912 1073573 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
913 : /* no bounds and NOT_NULL property on MSK bats */
914 127 : assert(minbound == NULL && maxbound == NULL && !notnull);
915 127 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
916 0 : goto bailout;
917 : }
918 : } else {
919 1073446 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
920 : /* if needed space exceeds a normal growth
921 : * extend just with what's needed */
922 4990 : BUN ncap = BATcount(b) + ci.ncand;
923 4990 : BUN grows = BATgrows(b);
924 :
925 5006 : if (ncap > grows)
926 : grows = ncap;
927 5006 : if (BATextend(b, grows) != GDK_SUCCEED) {
928 0 : goto bailout;
929 : }
930 : }
931 1073571 : MT_rwlock_wrlock(&b->thashlock);
932 1073871 : hlocked = true;
933 1073871 : if (b->ttype != TYPE_void &&
934 1073720 : ni.type != TYPE_void &&
935 1068434 : ci.tpe == cand_dense) {
936 : /* use fast memcpy if we can */
937 1068232 : memcpy(Tloc(b, BATcount(b)),
938 1068232 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
939 1068232 : ci.ncand << ni.shift);
940 1068245 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
941 3230 : HASHappend_locked(b, r, Tloc(b, r));
942 13 : r++;
943 : }
944 : } else {
945 5639 : const void *atomnil = ATOMnilptr(b->ttype);
946 4066008 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
947 4054706 : BUN p = canditer_next(&ci) - hseq;
948 4053041 : const void *t = BUNtail(ni, p);
949 4058555 : bool isnil = atomcmp(t, atomnil) == 0;
950 4047154 : if (notnull && isnil) {
951 0 : assert(0);
952 : GDKerror("NULL value not within bounds\n");
953 : goto bailout;
954 4047154 : } else if (minbound &&
955 4047154 : !isnil &&
956 0 : atomcmp(t, minbound) < 0) {
957 0 : assert(0);
958 : GDKerror("value not within bounds\n");
959 : goto bailout;
960 4047154 : } else if (maxbound &&
961 0 : !isnil &&
962 0 : atomcmp(t, maxbound) >= 0) {
963 0 : assert(0);
964 : GDKerror("value not within bounds\n");
965 : goto bailout;
966 4047154 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
967 0 : goto bailout;
968 : }
969 4052090 : if (b->thash)
970 0 : HASHappend_locked(b, r, t);
971 4054709 : r++;
972 : }
973 5641 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
974 : }
975 1070656 : BUN nunique;
976 1070656 : nunique = b->thash ? b->thash->nunique : 0;
977 1070656 : MT_lock_set(&b->theaplock);
978 1072482 : BATsetcount(b, b->batCount + ci.ncand);
979 1072397 : if (nunique != 0)
980 5 : b->tunique_est = (double) nunique;
981 1072397 : MT_lock_unset(&b->theaplock);
982 1073545 : assert(hlocked);
983 1073545 : MT_rwlock_wrunlock(&b->thashlock);
984 1073545 : hlocked = false;
985 : }
986 :
987 1266675 : doreturn:
988 1266675 : bat_iterator_end(&ni);
989 1264573 : if (minbound)
990 48 : VALclear(&minprop);
991 1265179 : if (maxbound)
992 40 : VALclear(&maxprop);
993 1265179 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
994 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
995 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
996 : GDKusec() - t0);
997 :
998 : return GDK_SUCCEED;
999 : bailout:
1000 0 : if (hlocked)
1001 0 : MT_rwlock_wrunlock(&b->thashlock);
1002 0 : if (minbound)
1003 0 : VALclear(&minprop);
1004 0 : if (maxbound)
1005 0 : VALclear(&maxprop);
1006 0 : bat_iterator_end(&ni);
1007 0 : return GDK_FAIL;
1008 : }
1009 :
1010 : gdk_return
1011 2082179 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1012 : {
1013 2082179 : return BATappend2(b, n, s, force, true);
1014 : }
1015 :
1016 : gdk_return
1017 4 : BATdel(BAT *b, BAT *d)
1018 : {
1019 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1020 4 : MT_lock_set(&b->theaplock);
1021 4 : BATiter bi = bat_iterator_nolock(b);
1022 4 : MT_lock_unset(&b->theaplock);
1023 :
1024 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1025 4 : assert(d->tsorted);
1026 4 : assert(d->tkey);
1027 4 : if (BATcount(d) == 0)
1028 : return GDK_SUCCEED;
1029 4 : OIDXdestroy(b);
1030 4 : HASHdestroy(b);
1031 4 : PROPdestroy(b);
1032 4 : STRMPdestroy(b);
1033 4 : RTREEdestroy(b);
1034 4 : if (BATtdense(d)) {
1035 2 : oid o = d->tseqbase;
1036 2 : BUN c = BATcount(d);
1037 :
1038 2 : if (o + c <= b->hseqbase)
1039 : return GDK_SUCCEED;
1040 2 : if (o < b->hseqbase) {
1041 0 : c -= b->hseqbase - o;
1042 0 : o = b->hseqbase;
1043 : }
1044 2 : if (o - b->hseqbase < b->batInserted) {
1045 0 : GDKerror("cannot delete committed values\n");
1046 0 : return GDK_FAIL;
1047 : }
1048 2 : if (o + c > b->hseqbase + BATcount(b))
1049 0 : c = b->hseqbase + BATcount(b) - o;
1050 2 : if (c == 0)
1051 : return GDK_SUCCEED;
1052 2 : if (atmdel) {
1053 0 : BUN p = o - b->hseqbase;
1054 0 : BUN q = p + c;
1055 0 : while (p < q) {
1056 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1057 0 : p++;
1058 : }
1059 : }
1060 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1061 : return GDK_FAIL;
1062 2 : MT_lock_set(&b->theaplock);
1063 2 : if (o + c < b->hseqbase + BATcount(b)) {
1064 0 : o -= b->hseqbase;
1065 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1066 0 : BUN n = BATcount(b) - (o + c);
1067 : /* not very efficient, but first see
1068 : * how much this is used */
1069 0 : for (BUN i = 0; i < n; i++)
1070 0 : mskSetVal(b, o + i,
1071 0 : mskGetVal(b, o + c + i));
1072 : } else {
1073 0 : memmove(Tloc(b, o),
1074 0 : Tloc(b, o + c),
1075 0 : b->twidth * (BATcount(b) - (o + c)));
1076 : }
1077 0 : b->theap->dirty = true;
1078 : // o += b->hseqbase; // if this were to be used again
1079 : }
1080 2 : b->batCount -= c;
1081 : } else {
1082 2 : BATiter di = bat_iterator(d);
1083 2 : const oid *o = (const oid *) di.base;
1084 2 : const oid *s;
1085 2 : BUN c = di.count;
1086 2 : BUN nd = 0;
1087 2 : BUN pos;
1088 2 : char *p = NULL;
1089 :
1090 2 : if (o[c - 1] <= b->hseqbase) {
1091 0 : bat_iterator_end(&di);
1092 0 : return GDK_SUCCEED;
1093 : }
1094 2 : while (*o < b->hseqbase) {
1095 0 : o++;
1096 0 : c--;
1097 : }
1098 2 : if (*o - b->hseqbase < b->batInserted) {
1099 0 : bat_iterator_end(&di);
1100 0 : GDKerror("cannot delete committed values\n");
1101 0 : return GDK_FAIL;
1102 : }
1103 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1104 0 : bat_iterator_end(&di);
1105 0 : return GDK_FAIL;
1106 : }
1107 2 : s = o;
1108 2 : pos = *o - b->hseqbase;
1109 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1110 2 : p = Tloc(b, pos);
1111 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1112 4 : size_t n;
1113 4 : if (atmdel)
1114 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1115 4 : o++;
1116 4 : c--;
1117 4 : nd++;
1118 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1119 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1120 2 : else if ((oid) (o - s) < *o - *s)
1121 2 : n = o[0] - o[-1] - 1;
1122 : else
1123 : n = 0;
1124 4 : if (n > 0) {
1125 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1126 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1127 : /* not very efficient, but
1128 : * first see how much this is
1129 : * used */
1130 0 : for (BUN i = 0; i < n; i++) {
1131 0 : mskSetVal(b, pos + i,
1132 0 : mskGetVal(b, opos + i));
1133 : }
1134 0 : pos += n;
1135 : } else {
1136 2 : n *= b->twidth;
1137 2 : memmove(p,
1138 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1139 : n);
1140 2 : p += n;
1141 : }
1142 : s = o;
1143 : }
1144 : }
1145 2 : bat_iterator_end(&di);
1146 2 : MT_lock_set(&b->theaplock);
1147 2 : b->theap->dirty = true;
1148 2 : b->batCount -= nd;
1149 : }
1150 4 : if (b->batCount <= 1) {
1151 : /* some trivial properties */
1152 2 : b->tkey = true;
1153 2 : b->tsorted = b->trevsorted = true;
1154 2 : if (b->batCount == 0) {
1155 2 : b->tnil = false;
1156 2 : b->tnonil = true;
1157 : }
1158 : }
1159 : /* not sure about these anymore */
1160 4 : b->tnosorted = b->tnorevsorted = 0;
1161 4 : b->tnokey[0] = b->tnokey[1] = 0;
1162 4 : b->tminpos = BUN_NONE;
1163 4 : b->tmaxpos = BUN_NONE;
1164 4 : b->tunique_est = 0.0;
1165 4 : MT_lock_unset(&b->theaplock);
1166 :
1167 4 : return GDK_SUCCEED;
1168 : }
1169 :
1170 : /*
1171 : * Replace all values in b with values from n whose location is given by
1172 : * the oid in either p or positions.
1173 : * If positions is used, autoincr specifies whether it is the first of a
1174 : * dense range of positions or whether it is a full-blown array of
1175 : * position.
1176 : * If mayappend is set, the position in p/positions may refer to
1177 : * locations beyond the end of b.
1178 : */
1179 : static gdk_return
1180 205722 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1181 : bool mayappend, bool autoincr, bool force)
1182 : {
1183 205722 : lng t0 = GDKusec();
1184 205827 : oid pos = oid_nil;
1185 205827 : BUN nunique = 0;
1186 :
1187 205827 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1188 : return GDK_SUCCEED;
1189 : }
1190 : /* either p or positions */
1191 205827 : assert((p == NULL) != (positions == NULL));
1192 205827 : if (p != NULL) {
1193 205652 : if (BATcount(p) != BATcount(n)) {
1194 0 : GDKerror("update BATs not the same size\n");
1195 0 : return GDK_FAIL;
1196 : }
1197 205652 : if (ATOMtype(p->ttype) != TYPE_oid) {
1198 0 : GDKerror("positions BAT not type OID\n");
1199 0 : return GDK_FAIL;
1200 : }
1201 205652 : if (BATtdense(p)) {
1202 197357 : pos = p->tseqbase;
1203 197357 : positions = &pos;
1204 197357 : autoincr = true;
1205 197357 : p = NULL;
1206 8295 : } else if (p->ttype != TYPE_void) {
1207 8293 : positions = (const oid *) Tloc(p, 0);
1208 8293 : autoincr = false;
1209 : } else {
1210 : autoincr = false;
1211 : }
1212 175 : } else if (autoincr) {
1213 175 : pos = *positions;
1214 : }
1215 205827 : if (BATcount(n) == 0) {
1216 : return GDK_SUCCEED;
1217 : }
1218 :
1219 27350 : BATiter ni = bat_iterator(n);
1220 :
1221 27356 : OIDXdestroy(b);
1222 27360 : STRMPdestroy(b);
1223 27361 : RTREEdestroy(b);
1224 : /* load hash so that we can maintain it */
1225 27347 : (void) BATcheckhash(b);
1226 :
1227 27347 : MT_lock_set(&b->theaplock);
1228 27345 : if (!force && (b->batRestricted != BAT_WRITE ||
1229 17 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1230 0 : MT_lock_unset(&b->theaplock);
1231 0 : bat_iterator_end(&ni);
1232 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1233 0 : return GDK_FAIL;
1234 : }
1235 27345 : BATiter bi = bat_iterator_nolock(b);
1236 27345 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1237 26782 : b->tunique_est = 0;
1238 : }
1239 :
1240 27345 : b->tsorted = b->trevsorted = false;
1241 27345 : b->tnosorted = b->tnorevsorted = 0;
1242 27345 : b->tseqbase = oid_nil;
1243 27345 : b->tkey = false;
1244 27345 : b->tnokey[0] = b->tnokey[1] = 0;
1245 :
1246 27345 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1247 27345 : const void *nil = ATOMnilptr(b->ttype);
1248 27345 : oid hseqend = b->hseqbase + BATcount(b);
1249 :
1250 27345 : MT_lock_unset(&b->theaplock);
1251 :
1252 28308 : bool anynil = false;
1253 28308 : bool locked = false;
1254 :
1255 28308 : if (b->tvheap) {
1256 1060356 : for (BUN i = 0; i < ni.count; i++) {
1257 1058054 : oid updid;
1258 1058054 : if (positions) {
1259 1057041 : updid = autoincr ? pos++ : *positions++;
1260 : } else {
1261 1013 : updid = BUNtoid(p, i);
1262 : }
1263 :
1264 1058054 : if (updid < b->hseqbase ||
1265 1058054 : (!mayappend && updid >= hseqend)) {
1266 0 : GDKerror("id out of range\n");
1267 0 : goto bailout;
1268 : }
1269 1058054 : updid -= b->hseqbase;
1270 1058054 : if (!force && updid < b->batInserted) {
1271 0 : GDKerror("updating committed value\n");
1272 0 : goto bailout;
1273 : }
1274 :
1275 1058054 : const void *new = BUNtvar(ni, i);
1276 :
1277 1058054 : if (updid >= BATcount(b)) {
1278 23534 : assert(mayappend);
1279 23534 : if (locked) {
1280 4 : MT_rwlock_wrunlock(&b->thashlock);
1281 4 : locked = false;
1282 : }
1283 23534 : if (b->tminpos != bi.minpos ||
1284 23533 : b->tmaxpos != bi.maxpos) {
1285 1 : MT_lock_set(&b->theaplock);
1286 1 : b->tminpos = bi.minpos;
1287 1 : b->tmaxpos = bi.maxpos;
1288 1 : MT_lock_unset(&b->theaplock);
1289 : }
1290 23534 : if (BATcount(b) < updid &&
1291 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1292 0 : bat_iterator_end(&ni);
1293 0 : return GDK_FAIL;
1294 : }
1295 23534 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1296 0 : bat_iterator_end(&ni);
1297 0 : return GDK_FAIL;
1298 : }
1299 23534 : bi = bat_iterator_nolock(b);
1300 33183 : continue;
1301 : }
1302 :
1303 : /* it is possible that a previous run was killed
1304 : * after an update (with a mmapped tail file)
1305 : * but before that was committed, then the
1306 : * offset may point outside of the vheap */
1307 1034520 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1308 :
1309 1032981 : if (old && atomcmp(old, new) == 0) {
1310 : /* replacing with the same value:
1311 : * nothing to do */
1312 9649 : continue;
1313 : }
1314 :
1315 1024836 : bool isnil = atomcmp(new, nil) == 0;
1316 1023879 : anynil |= isnil;
1317 1023879 : MT_lock_set(&b->theaplock);
1318 1023897 : if (old == NULL ||
1319 1023897 : (b->tnil &&
1320 571 : !anynil &&
1321 571 : atomcmp(old, nil) == 0)) {
1322 : /* if old value is nil and no new
1323 : * value is, we're not sure anymore
1324 : * about the nil property, so we must
1325 : * clear it */
1326 569 : b->tnil = false;
1327 : }
1328 1023897 : b->tnonil &= !isnil;
1329 1023897 : b->tnil |= isnil;
1330 1023897 : MT_lock_unset(&b->theaplock);
1331 1023928 : if (bi.maxpos != BUN_NONE) {
1332 220 : if (!isnil &&
1333 110 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1334 : /* new value is larger than
1335 : * previous largest */
1336 22 : bi.maxpos = updid;
1337 176 : } else if (old == NULL ||
1338 100 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1339 12 : atomcmp(new, old) != 0)) {
1340 : /* old value is equal to
1341 : * largest and new value is
1342 : * smaller, so we don't know
1343 : * anymore which is the
1344 : * largest */
1345 12 : bi.maxpos = BUN_NONE;
1346 : }
1347 : }
1348 1023928 : if (bi.minpos != BUN_NONE) {
1349 208 : if (!isnil &&
1350 104 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1351 : /* new value is smaller than
1352 : * previous smallest */
1353 14 : bi.minpos = updid;
1354 180 : } else if (old == NULL ||
1355 112 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1356 22 : atomcmp(new, old) != 0)) {
1357 : /* old value is equal to
1358 : * smallest and new value is
1359 : * larger, so we don't know
1360 : * anymore which is the
1361 : * smallest */
1362 22 : bi.minpos = BUN_NONE;
1363 : }
1364 : }
1365 1023928 : if (!locked) {
1366 1973 : MT_rwlock_wrlock(&b->thashlock);
1367 1973 : locked = true;
1368 : }
1369 1023927 : if (old)
1370 1023927 : HASHdelete_locked(&bi, updid, old);
1371 0 : else if (b->thash) {
1372 0 : doHASHdestroy(b, b->thash);
1373 0 : b->thash = NULL;
1374 : }
1375 :
1376 1023926 : var_t d;
1377 1023926 : switch (b->twidth) {
1378 1020477 : case 1:
1379 1020477 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1380 1020477 : break;
1381 3413 : case 2:
1382 3413 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1383 3413 : break;
1384 1 : case 4:
1385 1 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1386 1 : break;
1387 : #if SIZEOF_VAR_T == 8
1388 35 : case 8:
1389 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1390 35 : break;
1391 : #endif
1392 : default:
1393 0 : MT_UNREACHABLE();
1394 : }
1395 1023926 : MT_lock_set(&b->theaplock);
1396 1023925 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1397 1023917 : MT_lock_unset(&b->theaplock);
1398 1023923 : if (rc != GDK_SUCCEED) {
1399 0 : goto bailout;
1400 : }
1401 1023923 : if (b->twidth < SIZEOF_VAR_T &&
1402 1023887 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1403 : /* doesn't fit in current heap, upgrade it */
1404 16 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1405 0 : goto bailout;
1406 : }
1407 : }
1408 : /* in case ATOMreplaceVAR and/or
1409 : * GDKupgradevarheap replaces a heap, we need to
1410 : * reinitialize the iterator */
1411 : {
1412 : /* save and restore minpos/maxpos */
1413 1023923 : BUN minpos = bi.minpos;
1414 1023923 : BUN maxpos = bi.maxpos;
1415 1023923 : bi = bat_iterator_nolock(b);
1416 1023923 : bi.minpos = minpos;
1417 1023923 : bi.maxpos = maxpos;
1418 : }
1419 1023923 : switch (b->twidth) {
1420 1020459 : case 1:
1421 1020459 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1422 1020459 : break;
1423 3428 : case 2:
1424 3428 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1425 3428 : break;
1426 1 : case 4:
1427 1 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1428 1 : break;
1429 : #if SIZEOF_VAR_T == 8
1430 35 : case 8:
1431 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1432 35 : break;
1433 : #endif
1434 : default:
1435 0 : MT_UNREACHABLE();
1436 : }
1437 1023923 : HASHinsert_locked(&bi, updid, new);
1438 :
1439 : }
1440 2302 : if (locked) {
1441 1969 : if (b->thash)
1442 2 : nunique = b->thash->nunique;
1443 1969 : MT_rwlock_wrunlock(&b->thashlock);
1444 1969 : locked = false;
1445 : }
1446 2302 : MT_lock_set(&b->theaplock);
1447 2303 : b->tvheap->dirty = true;
1448 2303 : MT_lock_unset(&b->theaplock);
1449 25056 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1450 0 : assert(b->thash == NULL);
1451 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1452 0 : for (BUN i = 0; i < ni.count; i++) {
1453 0 : oid updid;
1454 0 : if (positions) {
1455 0 : updid = autoincr ? pos++ : *positions++;
1456 : } else {
1457 0 : updid = BUNtoid(p, i);
1458 : }
1459 :
1460 0 : if (updid < b->hseqbase ||
1461 0 : (!mayappend && updid >= hseqend)) {
1462 0 : GDKerror("id out of range\n");
1463 0 : bat_iterator_end(&ni);
1464 0 : return GDK_FAIL;
1465 : }
1466 0 : updid -= b->hseqbase;
1467 0 : if (!force && updid < b->batInserted) {
1468 0 : GDKerror("updating committed value\n");
1469 0 : bat_iterator_end(&ni);
1470 0 : return GDK_FAIL;
1471 : }
1472 0 : if (updid >= BATcount(b)) {
1473 0 : assert(mayappend);
1474 0 : if (BATcount(b) < updid &&
1475 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1476 0 : bat_iterator_end(&ni);
1477 0 : return GDK_FAIL;
1478 : }
1479 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1480 0 : bat_iterator_end(&ni);
1481 0 : return GDK_FAIL;
1482 : }
1483 0 : continue;
1484 : }
1485 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1486 : }
1487 0 : bi = bat_iterator_nolock(b);
1488 25056 : } else if (autoincr) {
1489 16901 : if (pos < b->hseqbase ||
1490 16082 : (!mayappend && pos + ni.count > hseqend)) {
1491 0 : GDKerror("id out of range\n");
1492 0 : bat_iterator_end(&ni);
1493 0 : return GDK_FAIL;
1494 : }
1495 16901 : pos -= b->hseqbase;
1496 16901 : if (!force && pos < b->batInserted) {
1497 0 : GDKerror("updating committed value\n");
1498 0 : bat_iterator_end(&ni);
1499 0 : return GDK_FAIL;
1500 : }
1501 :
1502 16901 : if (pos >= BATcount(b)) {
1503 444 : assert(mayappend);
1504 444 : bat_iterator_end(&ni);
1505 444 : if (BATcount(b) < pos &&
1506 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1507 : return GDK_FAIL;
1508 : }
1509 444 : return BATappend(b, n, NULL, force);
1510 : }
1511 16463 : if (pos + ni.count > BATcount(b) &&
1512 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1513 0 : bat_iterator_end(&ni);
1514 0 : return GDK_FAIL;
1515 : }
1516 16457 : bi = bat_iterator_nolock(b);
1517 :
1518 : /* we copy all of n, so if there are nils in n we get
1519 : * nils in b (and else we don't know) */
1520 16457 : b->tnil = ni.nil;
1521 : /* we may not copy over all of b, so we only know that
1522 : * there are no nils in b afterward if there weren't
1523 : * any in either b or n to begin with */
1524 16457 : b->tnonil &= ni.nonil;
1525 : /* if there is no hash, we don't start the loop, if
1526 : * there is only a persisted hash, it will get destroyed
1527 : * in the first iteration, after which there is no hash
1528 : * and the loop ends */
1529 16457 : MT_rwlock_wrlock(&b->thashlock);
1530 16460 : locked = true;
1531 16460 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1532 18 : HASHdelete_locked(&bi, i, Tloc(b, i));
1533 16442 : if (ni.type == TYPE_void) {
1534 0 : assert(b->ttype == TYPE_oid);
1535 0 : oid *o = Tloc(b, pos);
1536 0 : if (is_oid_nil(ni.tseq)) {
1537 : /* we may or may not overwrite the old
1538 : * min/max values */
1539 0 : bi.minpos = BUN_NONE;
1540 0 : bi.maxpos = BUN_NONE;
1541 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1542 0 : o[i] = oid_nil;
1543 0 : b->tnil = true;
1544 : } else {
1545 0 : oid v = ni.tseq;
1546 : /* we know min/max of n, so we know
1547 : * the new min/max of b if those of n
1548 : * are smaller/larger than the old */
1549 0 : if (bi.minpos != BUN_NONE) {
1550 0 : if (v <= BUNtoid(b, bi.minpos))
1551 0 : bi.minpos = pos;
1552 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1553 0 : bi.minpos = BUN_NONE;
1554 : }
1555 0 : if (complex_cand(n)) {
1556 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1557 0 : o[i] = *(oid *)Tpos(&ni, i);
1558 : /* last value */
1559 0 : v = o[ni.count - 1];
1560 : } else {
1561 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1562 0 : o[i] = v++;
1563 : /* last value added (not one beyond) */
1564 0 : v--;
1565 : }
1566 0 : if (bi.maxpos != BUN_NONE) {
1567 0 : if (v >= BUNtoid(b, bi.maxpos))
1568 0 : bi.maxpos = pos + ni.count - 1;
1569 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1570 0 : bi.maxpos = BUN_NONE;
1571 : }
1572 : }
1573 : } else {
1574 : /* if the extremes of n are at least as
1575 : * extreme as those of b, we can replace b's
1576 : * min/max, else we don't know what b's new
1577 : * min/max are*/
1578 16711 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1579 269 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1580 159 : bi.minpos = pos + ni.minpos;
1581 : } else {
1582 16283 : bi.minpos = BUN_NONE;
1583 : }
1584 16739 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1585 297 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1586 216 : bi.maxpos = pos + ni.maxpos;
1587 : } else {
1588 16226 : bi.maxpos = BUN_NONE;
1589 : }
1590 16442 : memcpy(Tloc(b, pos), ni.base,
1591 16442 : ni.count << b->tshift);
1592 : }
1593 : /* either we have a hash that was updated above, or we
1594 : * have no hash; we cannot have the case where there is
1595 : * only a persisted (unloaded) hash since it would have
1596 : * been destroyed above */
1597 16442 : if (b->thash != NULL) {
1598 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1599 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1600 0 : if (b->thash)
1601 0 : nunique = b->thash->nunique;
1602 : }
1603 16455 : MT_rwlock_wrunlock(&b->thashlock);
1604 16461 : locked = false;
1605 16461 : if (ni.count == BATcount(b)) {
1606 : /* if we replaced all values of b by values
1607 : * from n, we can also copy the min/max
1608 : * properties */
1609 8078 : bi.minpos = ni.minpos;
1610 8078 : bi.maxpos = ni.maxpos;
1611 8078 : if (BATtdensebi(&ni)) {
1612 : /* replaced all of b with a dense sequence */
1613 47 : MT_lock_set(&b->theaplock);
1614 47 : BATtseqbase(b, ni.tseq);
1615 47 : MT_lock_unset(&b->theaplock);
1616 : }
1617 : }
1618 : } else {
1619 155362881 : for (BUN i = 0; i < ni.count; i++) {
1620 155354732 : oid updid;
1621 155354732 : if (positions) {
1622 : /* assert(!autoincr) */
1623 155354732 : updid = *positions++;
1624 : } else {
1625 0 : updid = BUNtoid(p, i);
1626 : }
1627 :
1628 155354732 : if (updid < b->hseqbase ||
1629 155354732 : (!mayappend && updid >= hseqend)) {
1630 0 : GDKerror("id out of range\n");
1631 0 : goto bailout;
1632 : }
1633 155354732 : updid -= b->hseqbase;
1634 155354732 : if (!force && updid < b->batInserted) {
1635 0 : GDKerror("updating committed value\n");
1636 0 : goto bailout;
1637 : }
1638 :
1639 155354732 : const void *new = BUNtloc(ni, i);
1640 :
1641 155354732 : if (updid >= BATcount(b)) {
1642 16059 : assert(mayappend);
1643 16059 : if (locked) {
1644 10 : MT_rwlock_wrunlock(&b->thashlock);
1645 10 : locked = false;
1646 : }
1647 16059 : if (b->tminpos != bi.minpos ||
1648 16057 : b->tmaxpos != bi.maxpos) {
1649 3 : MT_lock_set(&b->theaplock);
1650 3 : b->tminpos = bi.minpos;
1651 3 : b->tmaxpos = bi.maxpos;
1652 3 : MT_lock_unset(&b->theaplock);
1653 : }
1654 16059 : if (BATcount(b) < updid &&
1655 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1656 0 : goto bailout;
1657 : }
1658 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1659 0 : bat_iterator_end(&ni);
1660 0 : return GDK_FAIL;
1661 : }
1662 16059 : bi = bat_iterator_nolock(b);
1663 16059 : continue;
1664 : }
1665 :
1666 155338673 : const void *old = BUNtloc(bi, updid);
1667 155338673 : bool isnil = atomcmp(new, nil) == 0;
1668 151293129 : anynil |= isnil;
1669 151293129 : if (b->tnil &&
1670 1900 : !anynil &&
1671 1900 : atomcmp(old, nil) == 0) {
1672 : /* if old value is nil and no new
1673 : * value is, we're not sure anymore
1674 : * about the nil property, so we must
1675 : * clear it */
1676 1896 : b->tnil = false;
1677 : }
1678 151293129 : b->tnonil &= !isnil;
1679 151293129 : b->tnil |= isnil;
1680 151293129 : if (bi.maxpos != BUN_NONE) {
1681 3664 : if (!isnil &&
1682 1830 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1683 : /* new value is larger than
1684 : * previous largest */
1685 137 : bi.maxpos = updid;
1686 1704 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1687 7 : atomcmp(new, old) != 0) {
1688 : /* old value is equal to
1689 : * largest and new value is
1690 : * smaller, so we don't know
1691 : * anymore which is the
1692 : * largest */
1693 7 : bi.maxpos = BUN_NONE;
1694 : }
1695 : }
1696 151293129 : if (bi.minpos != BUN_NONE) {
1697 3664 : if (!isnil &&
1698 1830 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1699 : /* new value is smaller than
1700 : * previous smallest */
1701 6 : bi.minpos = updid;
1702 1834 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1703 6 : atomcmp(new, old) != 0) {
1704 : /* old value is equal to
1705 : * smallest and new value is
1706 : * larger, so we don't know
1707 : * anymore which is the
1708 : * smallest */
1709 6 : bi.minpos = BUN_NONE;
1710 : }
1711 : }
1712 :
1713 151293129 : if (!locked) {
1714 8143 : MT_rwlock_wrlock(&b->thashlock);
1715 8143 : locked = true;
1716 : }
1717 151293134 : HASHdelete_locked(&bi, updid, old);
1718 151214809 : switch (b->twidth) {
1719 27509857 : case 1:
1720 27509857 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1721 27509857 : break;
1722 525067 : case 2:
1723 525067 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1724 525067 : break;
1725 19519105 : case 4:
1726 19519105 : ((int *) b->theap->base)[updid] = * (int *) new;
1727 19519105 : break;
1728 98440862 : case 8:
1729 98440862 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1730 98440862 : break;
1731 5219918 : case 16:
1732 : #ifdef HAVE_HGE
1733 5219918 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1734 : #else
1735 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1736 : #endif
1737 5219918 : break;
1738 0 : default:
1739 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1740 0 : break;
1741 : }
1742 151214809 : HASHinsert_locked(&bi, updid, new);
1743 : }
1744 8149 : if (locked) {
1745 8139 : if (b->thash)
1746 0 : nunique = b->thash->nunique;
1747 8139 : MT_rwlock_wrunlock(&b->thashlock);
1748 8139 : locked = false;
1749 : }
1750 : }
1751 26913 : bat_iterator_end(&ni);
1752 26895 : MT_lock_set(&b->theaplock);
1753 26910 : if (nunique != 0)
1754 2 : b->tunique_est = (double) nunique;
1755 26910 : b->tminpos = bi.minpos;
1756 26910 : b->tmaxpos = bi.maxpos;
1757 26910 : b->theap->dirty = true;
1758 26910 : MT_lock_unset(&b->theaplock);
1759 26916 : TRC_DEBUG(ALGO,
1760 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1761 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1762 : GDKusec() - t0);
1763 : return GDK_SUCCEED;
1764 :
1765 0 : bailout:
1766 0 : bat_iterator_end(&ni);
1767 0 : if (locked) {
1768 0 : Hash *h = b->thash;
1769 0 : b->thash = NULL;
1770 0 : MT_rwlock_wrunlock(&b->thashlock);
1771 0 : doHASHdestroy(b, h);
1772 : }
1773 : return GDK_FAIL;
1774 : }
1775 :
1776 : /* replace values from b at locations specified in p with values in n */
1777 : gdk_return
1778 204924 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1779 : {
1780 204924 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1781 : }
1782 :
1783 : /* like BATreplace, but p may specify locations beyond the end of b */
1784 : gdk_return
1785 925 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1786 : {
1787 925 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1788 : }
1789 :
1790 : #if 0 /* not used */
1791 : /* like BATreplace, but the positions are given by an array of oid values */
1792 : gdk_return
1793 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1794 : {
1795 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1796 : }
1797 : #endif
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid
1800 : * values, and they may specify locations beyond the end of b */
1801 : gdk_return
1802 175 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1803 : {
1804 175 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1805 : }
1806 :
1807 : /*
1808 : * BAT Selections
1809 : * The BAT selectors are among the most heavily used operators.
1810 : * Their efficient implementation is therefore mandatory.
1811 : *
1812 : * BAT slice
1813 : * This function returns a horizontal slice from a BAT. It optimizes
1814 : * execution by avoiding to copy when the BAT is memory mapped (in
1815 : * this case, an independent submap is created) or else when it is
1816 : * read-only, then a VIEW bat is created as a result.
1817 : *
1818 : * If a new copy has to be created, this function takes care to
1819 : * preserve void-columns (in this case, the seqbase has to be
1820 : * recomputed in the result).
1821 : *
1822 : * The selected range is excluding the high value.
1823 : */
1824 : BAT *
1825 10537072 : BATslice(BAT *b, BUN l, BUN h)
1826 : {
1827 10537072 : BUN low = l;
1828 10537072 : BAT *bn = NULL;
1829 :
1830 10537072 : BATcheck(b, NULL);
1831 10537072 : BATiter bi = bat_iterator(b);
1832 10547342 : if (l > bi.count)
1833 : l = bi.count;
1834 10547342 : if (h > bi.count)
1835 : h = bi.count;
1836 10547342 : if (h < l)
1837 : h = l;
1838 :
1839 10547342 : if (complex_cand(b)) {
1840 : /* slicing a candidate list with exceptions */
1841 75 : struct canditer ci;
1842 75 : canditer_init(&ci, NULL, b);
1843 75 : if (b->hseqbase + l >= ci.hseq) {
1844 75 : l = b->hseqbase + l - ci.hseq;
1845 75 : h = b->hseqbase + h - ci.hseq;
1846 : } else {
1847 0 : l = 0;
1848 0 : if (b->hseqbase + h >= ci.hseq)
1849 0 : h = b->hseqbase + h - ci.hseq;
1850 : else
1851 : h = 0;
1852 : }
1853 75 : bn = canditer_slice(&ci, l, h);
1854 75 : goto doreturn;
1855 : }
1856 : /* If the source BAT is readonly, then we can obtain a VIEW
1857 : * that just reuses the memory of the source. */
1858 10547267 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1859 : /* forget about slices for bit masks: we can't deal
1860 : * with difference in alignment, so we'll just make a
1861 : * copy */
1862 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1863 : /* we use BATappend with a candidate list to easily
1864 : * copy the part of b that we need */
1865 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1866 0 : if (bn == NULL ||
1867 0 : s == NULL ||
1868 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1869 0 : BBPreclaim(bn);
1870 0 : BBPreclaim(s);
1871 0 : bn = NULL;
1872 0 : goto doreturn;
1873 : }
1874 0 : BBPunfix(s->batCacheid);
1875 0 : goto doreturn;
1876 : }
1877 10547267 : restrict_t prestricted;
1878 12367297 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1879 1816387 : BAT *pb = BBP_desc(VIEWtparent(b));
1880 1816387 : MT_lock_set(&pb->theaplock);
1881 1815848 : prestricted = pb->batRestricted;
1882 1815848 : MT_lock_unset(&pb->theaplock);
1883 : } else {
1884 : prestricted = BAT_WRITE; /* just initialize with anything */
1885 : }
1886 10550910 : if (bi.restricted == BAT_READ &&
1887 10501286 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1888 10501406 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1889 10501406 : if (bn == NULL)
1890 : goto doreturn;
1891 : } else {
1892 : /* create a new BAT and put everything into it */
1893 49504 : BUN p = l;
1894 49504 : BUN q = h;
1895 :
1896 49504 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1897 49555 : if (bn == NULL)
1898 0 : goto doreturn;
1899 :
1900 49555 : if (bn->ttype == TYPE_void) {
1901 36007 : BATsetcount(bn, h - l);
1902 36032 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1903 13548 : } else if (bn->tvheap == NULL) {
1904 9048 : assert(BATatoms[bn->ttype].atomPut == NULL);
1905 9048 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1906 9048 : (q - p) << bn->tshift);
1907 9048 : bn->theap->dirty = true;
1908 9048 : BATsetcount(bn, h - l);
1909 : } else {
1910 1217606 : for (; p < q; p++) {
1911 1213131 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1912 0 : BBPreclaim(bn);
1913 0 : bn = NULL;
1914 0 : goto doreturn;
1915 : }
1916 : }
1917 : }
1918 49436 : bn->theap->dirty = true;
1919 49436 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1920 49436 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1921 49436 : bn->tkey = bi.key || bn->batCount <= 1;
1922 49436 : bn->tnonil = bi.nonil;
1923 49436 : bn->tnil = false; /* we don't know */
1924 49436 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1925 2194 : bn->tnosorted = bi.nosorted - l;
1926 : else
1927 47242 : bn->tnosorted = 0;
1928 49436 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1929 4561 : bn->tnorevsorted = bi.norevsorted - l;
1930 : else
1931 44875 : bn->tnorevsorted = 0;
1932 49436 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1933 40292 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1934 237 : bi.nokey[0] != bi.nokey[1] &&
1935 : !bn->tkey) {
1936 238 : bn->tnokey[0] = bi.nokey[0] - l;
1937 238 : bn->tnokey[1] = bi.nokey[1] - l;
1938 : } else {
1939 49198 : bn->tnokey[0] = bn->tnokey[1] = 0;
1940 : }
1941 : }
1942 10556083 : doreturn:
1943 10556083 : bat_iterator_end(&bi);
1944 10557029 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1945 : ALGOOPTBATFMT "\n",
1946 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1947 : return bn;
1948 : }
1949 :
1950 : #define BAT_ORDERED(TPE) \
1951 : do { \
1952 : const TPE *restrict vals = Tloc(b, 0); \
1953 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1954 : if (vals[p - 1] > vals[p]) { \
1955 : b->tnosorted = p; \
1956 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1957 : goto doreturn; \
1958 : } else if (vals[p - 1] < vals[p]) { \
1959 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1960 : b->tnorevsorted = p; \
1961 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1962 : } \
1963 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1964 : b->tnokey[0] = p - 1; \
1965 : b->tnokey[1] = p; \
1966 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1967 : } \
1968 : } \
1969 : } while (0)
1970 :
1971 : #define BAT_ORDERED_FP(TPE) \
1972 : do { \
1973 : const TPE *restrict vals = Tloc(b, 0); \
1974 : TPE prev = vals[0]; \
1975 : bool prevnil = is_##TPE##_nil(prev); \
1976 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1977 : TPE next = vals[p]; \
1978 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1979 : prev = next; \
1980 : if (cmp > 0) { \
1981 : b->tnosorted = bi.nosorted = p; \
1982 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1983 : goto doreturn; \
1984 : } else if (cmp < 0) { \
1985 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1986 : b->tnorevsorted = bi.norevsorted = p; \
1987 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1988 : } \
1989 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1990 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1991 : b->tnokey[1] = bi.nokey[1] = p; \
1992 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1993 : } \
1994 : } \
1995 : } while (0)
1996 :
1997 : /* Return whether the BAT is ordered or not. If we don't know, invest
1998 : * in a scan and record the results in the bat descriptor. If during
1999 : * the scan we happen to find evidence that the BAT is not reverse
2000 : * sorted, we record the location. */
2001 : bool
2002 2909174 : BATordered(BAT *b)
2003 : {
2004 2909174 : lng t0 = GDKusec();
2005 2909966 : bool sorted;
2006 :
2007 2909966 : MT_lock_set(&b->theaplock);
2008 2910313 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2009 402860 : MT_lock_unset(&b->theaplock);
2010 402897 : return true;
2011 : }
2012 2507453 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2013 2056397 : MT_lock_unset(&b->theaplock);
2014 2056417 : return false;
2015 : }
2016 :
2017 : /* There are a few reasons why we need a lock here. It may be
2018 : * that multiple threads call this functions at the same time
2019 : * (happens a lot with mitosis/mergetable), but we only need to
2020 : * scan the bat in one thread: the others can reap the rewards
2021 : * when that one thread is done. Also, we need the heap to
2022 : * remain accessible (could have used bat_iterator for that),
2023 : * and, and this is the killer argument, we may need to make
2024 : * changes to the bat descriptor. */
2025 451056 : BATiter bi = bat_iterator_nolock(b);
2026 451056 : if (!b->tsorted && b->tnosorted == 0) {
2027 806427 : switch (ATOMbasetype(b->ttype)) {
2028 92022 : case TYPE_bte:
2029 124331078 : BAT_ORDERED(bte);
2030 : break;
2031 8303 : case TYPE_sht:
2032 1587878 : BAT_ORDERED(sht);
2033 : break;
2034 300140 : case TYPE_int:
2035 117982700 : BAT_ORDERED(int);
2036 : break;
2037 8066 : case TYPE_lng:
2038 60317707 : BAT_ORDERED(lng);
2039 : break;
2040 : #ifdef HAVE_HGE
2041 368 : case TYPE_hge:
2042 8002665 : BAT_ORDERED(hge);
2043 : break;
2044 : #endif
2045 974 : case TYPE_flt:
2046 8007244 : BAT_ORDERED_FP(flt);
2047 : break;
2048 789 : case TYPE_dbl:
2049 8213431 : BAT_ORDERED_FP(dbl);
2050 : break;
2051 : case TYPE_str:
2052 21006735 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2053 20992476 : int c;
2054 20992476 : const char *p1 = BUNtvar(bi, p - 1);
2055 20992476 : const char *p2 = BUNtvar(bi, p);
2056 20992476 : if (p1 == p2)
2057 : c = 0;
2058 2247243 : else if (p1[0] == '\200') {
2059 1415 : if (p2[0] == '\200')
2060 : c = 0;
2061 : else
2062 : c = -1;
2063 2245828 : } else if (p2[0] == '\200')
2064 : c = 1;
2065 : else
2066 2244964 : c = strcmp(p1, p2);
2067 2244964 : if (c > 0) {
2068 25959 : b->tnosorted = bi.nosorted = p;
2069 25959 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2070 25959 : goto doreturn;
2071 20966517 : } else if (c < 0) {
2072 2221233 : assert(!b->trevsorted);
2073 2221233 : if (b->tnorevsorted == 0) {
2074 11786 : b->tnorevsorted = bi.norevsorted = p;
2075 11786 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2076 : }
2077 18745284 : } else if (b->tnokey[1] == 0) {
2078 3583 : assert(!b->tkey);
2079 3583 : b->tnokey[0] = bi.nokey[0] = p - 1;
2080 3583 : b->tnokey[1] = bi.nokey[1] = p;
2081 20966517 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2082 : }
2083 : }
2084 : break;
2085 178 : default: {
2086 178 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2087 2458 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 2421 : int c;
2089 2421 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2090 141 : b->tnosorted = bi.nosorted = p;
2091 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2092 141 : goto doreturn;
2093 2280 : } else if (c < 0) {
2094 2230 : if (!b->trevsorted && b->tnorevsorted == 0) {
2095 86 : b->tnorevsorted = bi.norevsorted = p;
2096 86 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2097 : }
2098 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2099 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2100 8 : b->tnokey[1] = bi.nokey[1] = p;
2101 2280 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2102 : }
2103 : }
2104 : break;
2105 : }
2106 : }
2107 : /* we only get here if we completed the scan; note that
2108 : * if we didn't record evidence about *reverse*
2109 : * sortedness, we know that the BAT is also reverse
2110 : * sorted; similarly, if we didn't record evidence about
2111 : * keyness, we know the BAT is key */
2112 146455 : b->tsorted = bi.sorted = true;
2113 146455 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2114 146965 : if (!b->trevsorted && b->tnorevsorted == 0) {
2115 65480 : b->trevsorted = bi.revsorted = true;
2116 65480 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2117 : }
2118 146965 : if (!b->tkey && b->tnokey[1] == 0) {
2119 49805 : b->tkey = bi.key = true;
2120 49805 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 : }
2123 146965 : doreturn:
2124 451573 : sorted = b->tsorted;
2125 451573 : bat pbid = VIEWtparent(b);
2126 451573 : MT_lock_unset(&b->theaplock);
2127 451955 : if (pbid) {
2128 253743 : BAT *pb = BBP_desc(pbid);
2129 253743 : MT_lock_set(&pb->theaplock);
2130 253743 : if (bi.count == BATcount(pb) &&
2131 192135 : bi.h == pb->theap &&
2132 192135 : bi.type == pb->ttype) {
2133 : /* add to knowledge in parent bat */
2134 192124 : pb->tsorted |= bi.sorted;
2135 192124 : if (pb->tnosorted == 0)
2136 192124 : pb->tnosorted = bi.nosorted;
2137 192124 : pb->trevsorted |= bi.revsorted;
2138 192124 : if (pb->tnorevsorted == 0)
2139 23982 : pb->tnorevsorted = bi.norevsorted;
2140 192124 : pb->tkey |= bi.key;
2141 192124 : if (pb->tnokey[1] == 0) {
2142 169377 : pb->tnokey[0] = bi.nokey[0];
2143 169377 : pb->tnokey[1] = bi.nokey[1];
2144 : }
2145 : }
2146 253743 : MT_lock_unset(&pb->theaplock);
2147 : }
2148 : return sorted;
2149 : }
2150 :
2151 : #define BAT_REVORDERED(TPE) \
2152 : do { \
2153 : const TPE *restrict vals = Tloc(b, 0); \
2154 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2155 : if (vals[p - 1] < vals[p]) { \
2156 : b->tnorevsorted = p; \
2157 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2158 : goto doreturn; \
2159 : } \
2160 : } \
2161 : } while (0)
2162 :
2163 : #define BAT_REVORDERED_FP(TPE) \
2164 : do { \
2165 : const TPE *restrict vals = Tloc(b, 0); \
2166 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2167 : TPE prev = vals[p - 1], next = vals[p]; \
2168 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2169 : if (cmp < 0) { \
2170 : b->tnorevsorted = bi.norevsorted = p; \
2171 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2172 : goto doreturn; \
2173 : } \
2174 : } \
2175 : } while (0)
2176 :
2177 : /* Return whether the BAT is reverse ordered or not. If we don't
2178 : * know, invest in a scan and record the results in the bat
2179 : * descriptor. */
2180 : bool
2181 2797588 : BATordered_rev(BAT *b)
2182 : {
2183 2797588 : lng t0 = GDKusec();
2184 2798204 : bool revsorted;
2185 :
2186 2798204 : if (b == NULL || !ATOMlinear(b->ttype))
2187 : return false;
2188 2798197 : MT_lock_set(&b->theaplock);
2189 2797947 : if (BATcount(b) <= 1 || b->trevsorted) {
2190 254350 : MT_lock_unset(&b->theaplock);
2191 254372 : return true;
2192 : }
2193 2543597 : if (b->ttype == TYPE_void) {
2194 21070 : MT_lock_unset(&b->theaplock);
2195 21076 : return is_oid_nil(b->tseqbase);
2196 : }
2197 2522527 : if (BATtdense(b) || b->tnorevsorted > 0) {
2198 2434391 : MT_lock_unset(&b->theaplock);
2199 2434444 : return false;
2200 : }
2201 88136 : BATiter bi = bat_iterator_nolock(b);
2202 88136 : if (!b->trevsorted && b->tnorevsorted == 0) {
2203 139951 : switch (ATOMbasetype(b->ttype)) {
2204 34435 : case TYPE_bte:
2205 10061612 : BAT_REVORDERED(bte);
2206 : break;
2207 3673 : case TYPE_sht:
2208 3169813 : BAT_REVORDERED(sht);
2209 : break;
2210 30073 : case TYPE_int:
2211 1194679 : BAT_REVORDERED(int);
2212 : break;
2213 4428 : case TYPE_lng:
2214 2684595 : BAT_REVORDERED(lng);
2215 : break;
2216 : #ifdef HAVE_HGE
2217 122 : case TYPE_hge:
2218 990 : BAT_REVORDERED(hge);
2219 : break;
2220 : #endif
2221 511 : case TYPE_flt:
2222 1997 : BAT_REVORDERED_FP(flt);
2223 : break;
2224 321 : case TYPE_dbl:
2225 485509 : BAT_REVORDERED_FP(dbl);
2226 : break;
2227 14573 : default: {
2228 14573 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2229 555242 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2230 551979 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2231 11312 : b->tnorevsorted = p;
2232 11312 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2233 11312 : goto doreturn;
2234 : }
2235 : }
2236 : break;
2237 : }
2238 : }
2239 25703 : b->trevsorted = bi.revsorted = true;
2240 25703 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2241 : }
2242 25703 : doreturn:
2243 88138 : revsorted = b->trevsorted;
2244 88138 : bat pbid = VIEWtparent(b);
2245 88138 : MT_lock_unset(&b->theaplock);
2246 88434 : if (pbid) {
2247 25886 : BAT *pb = BBP_desc(pbid);
2248 25886 : MT_lock_set(&pb->theaplock);
2249 25887 : if (bi.count == BATcount(pb) &&
2250 3269 : bi.h == pb->theap &&
2251 3269 : bi.type == pb->ttype) {
2252 : /* add to knowledge in parent bat */
2253 3269 : pb->trevsorted |= bi.revsorted;
2254 3269 : if (pb->tnorevsorted == 0)
2255 3269 : pb->tnorevsorted = bi.norevsorted;
2256 : }
2257 25887 : MT_lock_unset(&pb->theaplock);
2258 : }
2259 : return revsorted;
2260 : }
2261 :
2262 : /* figure out which sort function is to be called
2263 : * stable sort can produce an error (not enough memory available),
2264 : * "quick" sort does not produce errors */
2265 : static gdk_return
2266 3131792 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2267 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2268 : bool stable)
2269 : {
2270 3131792 : if (n <= 1) /* trivially sorted */
2271 : return GDK_SUCCEED;
2272 2701300 : switch (tpe) {
2273 2676041 : case TYPE_bte:
2274 : case TYPE_sht:
2275 : case TYPE_int:
2276 : case TYPE_lng:
2277 : #ifdef HAVE_HGE
2278 : case TYPE_hge:
2279 : #endif
2280 : case TYPE_date:
2281 : case TYPE_daytime:
2282 : case TYPE_timestamp:
2283 2676041 : assert(base == NULL);
2284 2676041 : if (nilslast == reverse && (stable || n > 100))
2285 20144 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2286 : break;
2287 4 : case TYPE_uuid:
2288 4 : assert(base == NULL);
2289 4 : if (nilslast == reverse && (stable || n > 100))
2290 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2291 : break;
2292 : default:
2293 : break;
2294 : }
2295 25348 : if (stable) {
2296 27 : if (reverse)
2297 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2298 : else
2299 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2300 : } else {
2301 2681128 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2302 : }
2303 2681128 : return GDK_SUCCEED;
2304 : }
2305 :
2306 : /* Sort the bat b according to both o and g. The stable and reverse
2307 : * parameters indicate whether the sort should be stable or descending
2308 : * respectively. The parameter b is required, o and g are optional
2309 : * (i.e., they may be NULL).
2310 : *
2311 : * A sorted copy is returned through the sorted parameter, the new
2312 : * ordering is returned through the order parameter, group information
2313 : * is returned through the groups parameter. All three output
2314 : * parameters may be NULL. If they're all NULL, this function does
2315 : * nothing.
2316 : *
2317 : * If o is specified, it is used to first rearrange b according to the
2318 : * order specified in o, after which b is sorted taking g into
2319 : * account.
2320 : *
2321 : * If g is specified, it indicates groups which should be individually
2322 : * ordered. Each row of consecutive equal values in g indicates a
2323 : * group which is sorted according to stable and reverse. g is used
2324 : * after the order in b was rearranged according to o.
2325 : *
2326 : * The outputs order and groups can be used in subsequent calls to
2327 : * this function. This can be used if multiple BATs need to be sorted
2328 : * together. The BATs should then be sorted in order of significance,
2329 : * and each following call should use the original unordered BAT plus
2330 : * the order and groups bat from the previous call. In this case, the
2331 : * sorted BATs are not of much use, so the sorted output parameter
2332 : * does not need to be specified.
2333 : * Apart from error checking and maintaining reference counts, sorting
2334 : * three columns (col1, col2, col3) could look like this with the
2335 : * sorted results in (col1s, col2s, col3s):
2336 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2337 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2338 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2339 : * Note that the "reverse" parameter can be different for each call.
2340 : */
2341 : gdk_return
2342 24289 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2343 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2344 : {
2345 24289 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2346 24289 : BATiter pbi;
2347 24289 : oid *restrict grps, *restrict ords, prev;
2348 24289 : BUN p, q, r;
2349 24289 : lng t0 = GDKusec();
2350 24294 : bool mkorderidx, orderidxlock = false;
2351 24294 : Heap *oidxh = NULL;
2352 :
2353 : /* we haven't implemented NILs as largest value for stable
2354 : * sort, so NILs come first for ascending and last for
2355 : * descending */
2356 24294 : assert(!stable || reverse == nilslast);
2357 :
2358 24294 : if (b == NULL) {
2359 0 : GDKerror("b must exist\n");
2360 0 : return GDK_FAIL;
2361 : }
2362 24294 : if (stable && reverse != nilslast) {
2363 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2364 0 : return GDK_FAIL;
2365 : }
2366 24294 : if (!ATOMlinear(b->ttype)) {
2367 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2368 0 : return GDK_FAIL;
2369 : }
2370 24294 : MT_lock_set(&b->theaplock);
2371 24298 : if (b->ttype == TYPE_void) {
2372 148 : b->tsorted = true;
2373 222 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2374 0 : b->trevsorted = !b->trevsorted;
2375 : }
2376 296 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2377 0 : b->tkey = !b->tkey;
2378 : }
2379 24150 : } else if (b->batCount <= 1) {
2380 6609 : if (!b->tsorted || !b->trevsorted) {
2381 23 : b->tsorted = b->trevsorted = true;
2382 : }
2383 : }
2384 24298 : MT_lock_unset(&b->theaplock);
2385 24292 : if (o != NULL &&
2386 13006 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2387 13006 : BATcount(o) != BATcount(b) || /* same size as b */
2388 4825 : (o->ttype == TYPE_void && /* no nil tail */
2389 2271 : BATcount(o) != 0 &&
2390 2271 : is_oid_nil(o->tseqbase)))) {
2391 0 : GDKerror("o must have type oid and same size as b\n");
2392 0 : return GDK_FAIL;
2393 : }
2394 24292 : if (g != NULL &&
2395 13006 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2396 13006 : !g->tsorted || /* sorted */
2397 13006 : BATcount(g) != BATcount(b) || /* same size as b */
2398 5099 : (g->ttype == TYPE_void && /* no nil tail */
2399 2545 : BATcount(g) != 0 &&
2400 2545 : is_oid_nil(g->tseqbase)))) {
2401 0 : GDKerror("g must have type oid, sorted on the tail, "
2402 : "and same size as b\n");
2403 0 : return GDK_FAIL;
2404 : }
2405 24292 : if (sorted == NULL && order == NULL) {
2406 : /* no place to put result, so we're done quickly */
2407 0 : GDKerror("no place to put the result.\n");
2408 0 : return GDK_FAIL;
2409 : }
2410 24292 : if (g == NULL && !stable) {
2411 : /* pre-ordering doesn't make sense if we're not
2412 : * subsorting and the sort is not stable */
2413 11010 : o = NULL;
2414 : }
2415 24292 : if (b->tnonil) {
2416 : /* if there are no nils, placement of nils doesn't
2417 : * matter, so set nilslast such that ordered bits can
2418 : * be used */
2419 17936 : nilslast = reverse;
2420 : }
2421 24292 : pbi = bat_iterator(NULL);
2422 25270 : if (BATcount(b) <= 1 ||
2423 17502 : (reverse == nilslast &&
2424 : (reverse ? b->trevsorted : b->tsorted) &&
2425 4814 : o == NULL && g == NULL &&
2426 2338 : (groups == NULL || BATtkey(b) ||
2427 : (reverse ? b->tsorted : b->trevsorted)))) {
2428 : /* trivially (sub)sorted, and either we don't need to
2429 : * return group information, or we can trivially
2430 : * deduce the groups */
2431 8409 : if (sorted) {
2432 7035 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2433 7035 : if (bn == NULL)
2434 0 : goto error;
2435 7035 : *sorted = bn;
2436 : }
2437 8409 : if (order) {
2438 7564 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2439 7567 : if (on == NULL)
2440 0 : goto error;
2441 7567 : *order = on;
2442 : }
2443 8412 : if (groups) {
2444 4432 : if (BATtkey(b)) {
2445 : /* singleton groups */
2446 4033 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2447 4033 : if (gn == NULL)
2448 0 : goto error;
2449 : } else {
2450 : /* single group */
2451 399 : const oid *o = 0;
2452 399 : assert(BATcount(b) == 1 ||
2453 : (b->tsorted && b->trevsorted));
2454 399 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2455 399 : if (gn == NULL)
2456 0 : goto error;
2457 : }
2458 4432 : *groups = gn;
2459 : }
2460 8412 : bat_iterator_end(&pbi);
2461 8412 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2462 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2463 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2464 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2465 : ALGOOPTBATFMT " -- trivial (" LLFMT
2466 : " usec)\n",
2467 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2468 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2469 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2470 : ALGOOPTBATPAR(on), GDKusec() - t0);
2471 8412 : return GDK_SUCCEED;
2472 : }
2473 15885 : if (VIEWtparent(b)) {
2474 3370 : pb = BATdescriptor(VIEWtparent(b));
2475 3371 : if (pb != NULL &&
2476 3371 : (b->tbaseoff != pb->tbaseoff ||
2477 2658 : BATcount(b) != BATcount(pb) ||
2478 2446 : b->hseqbase != pb->hseqbase ||
2479 2435 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2480 936 : BBPunfix(pb->batCacheid);
2481 936 : pb = NULL;
2482 : }
2483 : } else {
2484 : pb = b;
2485 : }
2486 15886 : bat_iterator_end(&pbi);
2487 15884 : pbi = bat_iterator(pb);
2488 : /* when we will create an order index if it doesn't already exist */
2489 15888 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2490 15888 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2491 5362 : (void) BATcheckorderidx(pb);
2492 5362 : MT_lock_set(&pb->batIdxLock);
2493 5360 : if (pb->torderidx) {
2494 61 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2495 : /* we can use the order index */
2496 61 : oidxh = pb->torderidx;
2497 61 : HEAPincref(oidxh);
2498 : }
2499 : mkorderidx = false;
2500 5299 : } else if (b != pb) {
2501 : /* don't build orderidx on parent bat */
2502 : mkorderidx = false;
2503 4429 : } else if (mkorderidx) {
2504 : /* keep lock when going to create */
2505 3910 : orderidxlock = true;
2506 : }
2507 5360 : if (!orderidxlock)
2508 1451 : MT_lock_unset(&pb->batIdxLock);
2509 : }
2510 15886 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2511 : /* there is an order index that we can use */
2512 61 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2513 61 : if (on == NULL)
2514 0 : goto error;
2515 61 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2516 61 : BATsetcount(on, BATcount(b));
2517 61 : HEAPdecref(oidxh, false);
2518 61 : oidxh = NULL;
2519 61 : on->tkey = true;
2520 61 : on->tnil = false;
2521 61 : on->tnonil = true;
2522 61 : on->tsorted = on->trevsorted = false;
2523 61 : on->tseqbase = oid_nil;
2524 61 : if (sorted || groups) {
2525 61 : bn = BATproject(on, b);
2526 60 : if (bn == NULL)
2527 0 : goto error;
2528 60 : bn->tsorted = true;
2529 60 : if (groups) {
2530 6 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2531 0 : goto error;
2532 6 : if (sorted &&
2533 6 : (*groups)->tkey &&
2534 : g == NULL) {
2535 : /* if new groups bat is key
2536 : * and since there is no input
2537 : * groups bat, we know the
2538 : * result bat is key */
2539 4 : bn->tkey = true;
2540 : }
2541 : }
2542 60 : if (sorted)
2543 60 : *sorted = bn;
2544 : else {
2545 0 : BBPunfix(bn->batCacheid);
2546 0 : bn = NULL;
2547 : }
2548 : }
2549 60 : if (order)
2550 10 : *order = on;
2551 : else {
2552 50 : BBPunfix(on->batCacheid);
2553 50 : on = NULL;
2554 : }
2555 60 : bat_iterator_end(&pbi);
2556 61 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2557 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2558 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2559 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2560 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2561 : " usec)\n",
2562 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2563 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2564 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2565 : ALGOOPTBATPAR(on), GDKusec() - t0);
2566 61 : if (pb != NULL && pb != b)
2567 53 : BBPunfix(pb->batCacheid);
2568 61 : return GDK_SUCCEED;
2569 10044 : } else if (oidxh) {
2570 0 : HEAPdecref(oidxh, false);
2571 0 : oidxh = NULL;
2572 : }
2573 15824 : if (o) {
2574 9508 : bn = BATproject(o, b);
2575 9507 : if (bn == NULL)
2576 0 : goto error;
2577 9507 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2578 3900 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2579 1951 : BBPunfix(bn->batCacheid);
2580 1951 : bn = b2;
2581 : }
2582 9509 : if (pb) {
2583 9076 : bat_iterator_end(&pbi);
2584 9076 : if (pb != b)
2585 1174 : BBPunfix(pb->batCacheid);
2586 9076 : pbi = bat_iterator(NULL);
2587 9076 : pb = NULL;
2588 : }
2589 : } else {
2590 6316 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2591 : }
2592 15822 : if (bn == NULL)
2593 0 : goto error;
2594 15822 : if (order) {
2595 : /* prepare order bat */
2596 13162 : if (o) {
2597 : /* make copy of input so that we can refine it;
2598 : * copy can be read-only if we take the shortcut
2599 : * below in the case g is "key" */
2600 7893 : on = COLcopy(o, TYPE_oid,
2601 7893 : g == NULL ||
2602 7893 : !(g->tkey || g->ttype == TYPE_void),
2603 : TRANSIENT);
2604 7893 : if (on == NULL)
2605 0 : goto error;
2606 7893 : BAThseqbase(on, b->hseqbase);
2607 7893 : on->tminpos = BUN_NONE;
2608 7893 : on->tmaxpos = BUN_NONE;
2609 : } else {
2610 : /* create new order */
2611 5269 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2612 5273 : if (on == NULL)
2613 0 : goto error;
2614 5273 : ords = (oid *) Tloc(on, 0);
2615 30849003 : for (p = 0, q = BATcount(bn); p < q; p++)
2616 30843730 : ords[p] = p + b->hseqbase;
2617 5273 : BATsetcount(on, BATcount(bn));
2618 5272 : on->tkey = true;
2619 5272 : on->tnil = false;
2620 5272 : on->tnonil = true;
2621 : }
2622 : /* COLcopy above can create TYPE_void */
2623 13165 : if (on->ttype != TYPE_void) {
2624 12421 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2625 12421 : on->tseqbase = oid_nil; /* and hence not dense */
2626 12421 : on->tnosorted = on->tnorevsorted = 0;
2627 : }
2628 13165 : *order = on;
2629 13165 : ords = (oid *) Tloc(on, 0);
2630 : } else {
2631 : ords = NULL;
2632 : }
2633 15825 : if (g) {
2634 9509 : if (g->tkey || g->ttype == TYPE_void) {
2635 : /* if g is "key", all groups are size 1, so no
2636 : * subsorting needed */
2637 4809 : if (sorted) {
2638 4479 : *sorted = bn;
2639 : } else {
2640 330 : BBPunfix(bn->batCacheid);
2641 330 : bn = NULL;
2642 : }
2643 4809 : if (order) {
2644 3638 : *order = on;
2645 3638 : if (o) {
2646 : /* we can inherit sortedness
2647 : * after all */
2648 3638 : on->tsorted = o->tsorted;
2649 3638 : on->trevsorted = o->trevsorted;
2650 3638 : if (o->tnosorted)
2651 48 : on->tnosorted = o->tnosorted;
2652 3638 : if (o->tnorevsorted)
2653 62 : on->tnorevsorted = o->tnorevsorted;
2654 : } else {
2655 : /* we didn't rearrange, so
2656 : * still sorted */
2657 0 : on->tsorted = true;
2658 0 : on->trevsorted = false;
2659 : }
2660 3638 : if (BATcount(on) <= 1) {
2661 0 : on->tsorted = true;
2662 0 : on->trevsorted = true;
2663 : }
2664 : }
2665 4809 : if (groups) {
2666 2807 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2667 2807 : if (gn == NULL)
2668 0 : goto error;
2669 2807 : *groups = gn;
2670 : }
2671 4809 : bat_iterator_end(&pbi);
2672 4809 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2673 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2674 : ",reverse=%d,nilslast=%d,stable=%d"
2675 : ") = (" ALGOOPTBATFMT ","
2676 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2677 : " -- key group (" LLFMT " usec)\n",
2678 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2679 : ALGOBATPAR(g), reverse, nilslast,
2680 : stable, ALGOOPTBATPAR(bn),
2681 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2682 : GDKusec() - t0);
2683 4809 : if (pb != NULL && pb != b)
2684 0 : BBPunfix(pb->batCacheid);
2685 4809 : return GDK_SUCCEED;
2686 : }
2687 4700 : assert(g->ttype == TYPE_oid);
2688 4700 : grps = (oid *) Tloc(g, 0);
2689 4700 : prev = grps[0];
2690 4700 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2691 0 : goto error;
2692 44566132 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2693 44561432 : if (grps[p] != prev) {
2694 : /* sub sort [r,p) */
2695 5973980 : if (do_sort(Tloc(bn, r),
2696 2852595 : ords ? ords + r : NULL,
2697 3121385 : bn->tvheap ? bn->tvheap->base : NULL,
2698 3121385 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2699 3121385 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2700 0 : goto error;
2701 3121385 : r = p;
2702 3121385 : prev = grps[p];
2703 : }
2704 : }
2705 : /* sub sort [r,q) */
2706 8955 : if (do_sort(Tloc(bn, r),
2707 4255 : ords ? ords + r : NULL,
2708 4700 : bn->tvheap ? bn->tvheap->base : NULL,
2709 4700 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2710 4700 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2711 0 : goto error;
2712 : /* if single group (r==0) the result is (rev)sorted,
2713 : * otherwise (maybe) not */
2714 4700 : bn->tsorted = r == 0 && !reverse && !nilslast;
2715 9369 : bn->trevsorted = r == 0 && reverse && nilslast;
2716 : } else {
2717 6316 : Heap *m = NULL;
2718 : /* only invest in creating an order index if the BAT
2719 : * is persistent */
2720 6316 : if (mkorderidx) {
2721 3907 : assert(orderidxlock);
2722 3907 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2723 : ords == NULL) {
2724 0 : ords = (oid *) m->base + ORDERIDXOFF;
2725 0 : if (o && o->ttype != TYPE_void)
2726 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2727 0 : else if (o)
2728 0 : for (p = 0, q = BATcount(o); p < q; p++)
2729 0 : ords[p] = p + o->tseqbase;
2730 : else
2731 0 : for (p = 0, q = BATcount(b); p < q; p++)
2732 0 : ords[p] = p + b->hseqbase;
2733 : }
2734 : }
2735 6318 : if ((reverse != nilslast ||
2736 11932 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2737 11419 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2738 5707 : do_sort(Tloc(bn, 0),
2739 : ords,
2740 5707 : bn->tvheap ? bn->tvheap->base : NULL,
2741 5707 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2742 5707 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2743 0 : if (m != NULL) {
2744 0 : HEAPfree(m, true);
2745 0 : GDKfree(m);
2746 : }
2747 0 : goto error;
2748 : }
2749 6317 : bn->tsorted = !reverse && !nilslast;
2750 6317 : bn->trevsorted = reverse && nilslast;
2751 6317 : if (m != NULL) {
2752 3908 : assert(orderidxlock);
2753 3908 : if (pb->torderidx == NULL) {
2754 3908 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2755 3908 : memcpy((oid *) m->base + ORDERIDXOFF,
2756 : ords,
2757 3908 : pbi.count * sizeof(oid));
2758 : }
2759 3908 : pb->torderidx = m;
2760 3908 : persistOIDX(pb);
2761 : } else {
2762 0 : HEAPfree(m, true);
2763 0 : GDKfree(m);
2764 : }
2765 : }
2766 : }
2767 11017 : if (orderidxlock) {
2768 3907 : MT_lock_unset(&pb->batIdxLock);
2769 3909 : orderidxlock = false;
2770 : }
2771 11021 : bn->theap->dirty = true;
2772 11021 : bn->tnosorted = 0;
2773 11021 : bn->tnorevsorted = 0;
2774 11021 : bn->tnokey[0] = bn->tnokey[1] = 0;
2775 11021 : bn->tminpos = BUN_NONE;
2776 11021 : bn->tmaxpos = BUN_NONE;
2777 11021 : if (groups) {
2778 6490 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2779 0 : goto error;
2780 6489 : if ((*groups)->tkey &&
2781 866 : (g == NULL || (g->tsorted && g->trevsorted))) {
2782 : /* if new groups bat is key and the input
2783 : * group bat has a single value (both sorted
2784 : * and revsorted), we know the result bat is
2785 : * key */
2786 1072 : bn->tkey = true;
2787 : }
2788 : }
2789 :
2790 11020 : bat_iterator_end(&pbi);
2791 11019 : if (sorted)
2792 8193 : *sorted = bn;
2793 : else {
2794 2826 : BBPunfix(bn->batCacheid);
2795 2826 : bn = NULL;
2796 : }
2797 :
2798 11018 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2799 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2800 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2801 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2802 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2803 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2804 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2805 : g ? "grouped " : "", GDKusec() - t0);
2806 11018 : if (pb && pb != b)
2807 1208 : BBPunfix(pb->batCacheid);
2808 : return GDK_SUCCEED;
2809 :
2810 0 : error:
2811 0 : bat_iterator_end(&pbi);
2812 0 : if (orderidxlock)
2813 0 : MT_lock_unset(&pb->batIdxLock);
2814 0 : if (oidxh)
2815 0 : HEAPdecref(oidxh, false);
2816 0 : BBPreclaim(bn);
2817 0 : if (pb && pb != b)
2818 0 : BBPunfix(pb->batCacheid);
2819 0 : BBPreclaim(on);
2820 0 : if (sorted)
2821 0 : *sorted = NULL;
2822 0 : if (order)
2823 0 : *order = NULL;
2824 0 : if (groups)
2825 0 : *groups = NULL;
2826 : return GDK_FAIL;
2827 : }
2828 :
2829 : /* return a new BAT of length n with seqbase hseq, and the constant v
2830 : * in the tail */
2831 : BAT *
2832 1861270 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2833 : {
2834 1861270 : BAT *bn;
2835 1861270 : void *restrict p;
2836 1861270 : BUN i;
2837 1861270 : lng t0 = 0;
2838 :
2839 1861270 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2840 1861270 : if (v == NULL)
2841 : return NULL;
2842 1861270 : bn = COLnew(hseq, tailtype, n, role);
2843 1869670 : if (bn != NULL && n > 0) {
2844 81384 : p = Tloc(bn, 0);
2845 81384 : switch (ATOMstorage(tailtype)) {
2846 24 : case TYPE_void:
2847 24 : v = &oid_nil;
2848 24 : BATtseqbase(bn, oid_nil);
2849 24 : break;
2850 0 : case TYPE_msk:
2851 0 : if (*(msk*)v) {
2852 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2853 0 : if (n & 31) {
2854 0 : uint32_t *m = p;
2855 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2856 : }
2857 : } else
2858 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2859 : break;
2860 9672 : case TYPE_bte:
2861 9672 : memset(p, *(bte*)v, n);
2862 9672 : break;
2863 : case TYPE_sht:
2864 7054312 : for (i = 0; i < n; i++)
2865 7035884 : ((sht *) p)[i] = *(sht *) v;
2866 : break;
2867 : case TYPE_int:
2868 : case TYPE_flt:
2869 : assert(sizeof(int) == sizeof(flt));
2870 208474403 : for (i = 0; i < n; i++)
2871 208468875 : ((int *) p)[i] = *(int *) v;
2872 : break;
2873 : case TYPE_lng:
2874 : case TYPE_dbl:
2875 : assert(sizeof(lng) == sizeof(dbl));
2876 231951032 : for (i = 0; i < n; i++)
2877 231918529 : ((lng *) p)[i] = *(lng *) v;
2878 : break;
2879 : #ifdef HAVE_HGE
2880 : case TYPE_hge:
2881 24805417 : for (i = 0; i < n; i++)
2882 24803807 : ((hge *) p)[i] = *(hge *) v;
2883 : break;
2884 : #endif
2885 : case TYPE_uuid:
2886 200047 : for (i = 0; i < n; i++)
2887 200038 : ((uuid *) p)[i] = *(uuid *) v;
2888 : break;
2889 13462 : case TYPE_str:
2890 : /* insert the first value, then just copy the
2891 : * offset lots of times */
2892 13462 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2893 0 : BBPreclaim(bn);
2894 0 : return NULL;
2895 : }
2896 13467 : char val[sizeof(var_t)];
2897 13467 : memcpy(val, Tloc(bn, 0), bn->twidth);
2898 13467 : if (bn->twidth == 1 && n > 1) {
2899 : /* single byte value: we have a
2900 : * function for that */
2901 6533 : memset(Tloc(bn, 1), val[0], n - 1);
2902 : } else {
2903 6934 : char *p = Tloc(bn, 0);
2904 6954 : for (i = 1; i < n; i++) {
2905 20 : p += bn->twidth;
2906 20 : memcpy(p, val, bn->twidth);
2907 : }
2908 : }
2909 : break;
2910 : default:
2911 333710 : for (i = 0; i < n; i++)
2912 333565 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2913 0 : BBPreclaim(bn);
2914 0 : return NULL;
2915 : }
2916 : break;
2917 : }
2918 81386 : bn->theap->dirty = true;
2919 81386 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2920 81335 : BATsetcount(bn, n);
2921 81595 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2922 81595 : bn->tnonil = !bn->tnil;
2923 81595 : bn->tkey = BATcount(bn) <= 1;
2924 : }
2925 1869881 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2926 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2927 : return bn;
2928 : }
2929 :
2930 : /*
2931 : * BAT Aggregates
2932 : *
2933 : * We retain the size() and card() aggregate results in the column
2934 : * descriptor. We would like to have such functionality in an
2935 : * extensible way for many aggregates, for DD (1) we do not want to
2936 : * change the binary BAT format on disk and (2) aggr and size are the
2937 : * most relevant aggregates.
2938 : *
2939 : * It is all hacked into the aggr[3] records; three adjacent integers
2940 : * that were left over in the column record. We refer to these as if
2941 : * it where an int aggr[3] array. The below routines set and retrieve
2942 : * the aggregate values from the tail of the BAT, as many
2943 : * aggregate-manipulating BAT functions work on tail.
2944 : *
2945 : * The rules are as follows: aggr[0] contains the alignment ID of the
2946 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2947 : * equal to b->talign, the precomputed aggregate values in
2948 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2949 : * of them may be set at the time. This is encoded by the value
2950 : * int_nil, which cannot occur in these two aggregates.
2951 : *
2952 : * This was now extended to record the property whether we know there
2953 : * is a nil value present by mis-using the highest bits of both
2954 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2955 : */
2956 :
2957 : void
2958 43359367 : PROPdestroy_nolock(BAT *b)
2959 : {
2960 43359367 : PROPrec *p = b->tprops;
2961 43359367 : PROPrec *n;
2962 :
2963 43359367 : b->tprops = NULL;
2964 43391186 : while (p) {
2965 3020 : n = p->next;
2966 3020 : assert(p->id != (enum prop_t) 20);
2967 3020 : VALclear(&p->v);
2968 3020 : GDKfree(p);
2969 3020 : p = n;
2970 : }
2971 43388166 : }
2972 :
2973 : void
2974 429 : PROPdestroy(BAT *b)
2975 : {
2976 429 : MT_lock_set(&b->theaplock);
2977 429 : PROPdestroy_nolock(b);
2978 429 : MT_lock_unset(&b->theaplock);
2979 429 : }
2980 :
2981 : ValPtr
2982 194769895 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2983 : {
2984 194769895 : PROPrec *p;
2985 :
2986 194769895 : p = b->tprops;
2987 194773518 : while (p && p->id != idx)
2988 3623 : p = p->next;
2989 194769895 : return p ? &p->v : NULL;
2990 : }
2991 :
2992 : void
2993 396940 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2994 : {
2995 396940 : PROPrec *prop = b->tprops, *prev = NULL;
2996 :
2997 397159 : while (prop) {
2998 397048 : if (prop->id == idx) {
2999 396829 : if (prev)
3000 106 : prev->next = prop->next;
3001 : else
3002 396723 : b->tprops = prop->next;
3003 396829 : VALclear(&prop->v);
3004 396829 : GDKfree(prop);
3005 396829 : return;
3006 : }
3007 219 : prev = prop;
3008 219 : prop = prop->next;
3009 : }
3010 : }
3011 :
3012 : ValPtr
3013 400486 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3014 : {
3015 400486 : PROPrec *p;
3016 :
3017 400486 : p = b->tprops;
3018 406992 : while (p && p->id != idx)
3019 6506 : p = p->next;
3020 400486 : if (p == NULL) {
3021 400479 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3022 : /* properties are hints, so if we can't create
3023 : * one we ignore the error */
3024 0 : GDKclrerr();
3025 0 : return NULL;
3026 : }
3027 400479 : p->id = idx;
3028 400479 : p->next = b->tprops;
3029 400479 : p->v.vtype = 0;
3030 400479 : b->tprops = p;
3031 : } else {
3032 7 : VALclear(&p->v);
3033 : }
3034 400486 : if (VALinit(&p->v, type, v) == NULL) {
3035 : /* failed to initialize, so remove property */
3036 0 : BATrmprop_nolock(b, idx);
3037 0 : GDKclrerr();
3038 0 : p = NULL;
3039 : }
3040 0 : return p ? &p->v : NULL;
3041 : }
3042 :
3043 : ValPtr
3044 3164166 : BATgetprop(BAT *b, enum prop_t idx)
3045 : {
3046 3164166 : ValPtr p;
3047 :
3048 3164166 : MT_lock_set(&b->theaplock);
3049 3167519 : p = BATgetprop_nolock(b, idx);
3050 3167344 : MT_lock_unset(&b->theaplock);
3051 3168377 : return p;
3052 : }
3053 :
3054 : ValPtr
3055 3472 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3056 : {
3057 3472 : ValPtr p;
3058 3472 : MT_lock_set(&b->theaplock);
3059 3472 : p = BATsetprop_nolock(b, idx, type, v);
3060 3472 : MT_lock_unset(&b->theaplock);
3061 3472 : return p;
3062 : }
3063 :
3064 : void
3065 2 : BATrmprop(BAT *b, enum prop_t idx)
3066 : {
3067 2 : MT_lock_set(&b->theaplock);
3068 2 : BATrmprop_nolock(b, idx);
3069 2 : MT_lock_unset(&b->theaplock);
3070 2 : }
3071 :
3072 : /*
3073 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3074 : * non-nil tail value.
3075 : * This function does not fail (the callers currently don't check for failure).
3076 : */
3077 : BUN
3078 2250 : BATcount_no_nil(BAT *b, BAT *s)
3079 : {
3080 2250 : BUN cnt = 0;
3081 2250 : const void *restrict p, *restrict nil;
3082 2250 : const char *restrict base;
3083 2250 : int t;
3084 2250 : int (*cmp)(const void *, const void *);
3085 2250 : struct canditer ci;
3086 2250 : oid hseq;
3087 :
3088 2250 : BATcheck(b, 0);
3089 :
3090 2250 : hseq = b->hseqbase;
3091 2250 : canditer_init(&ci, b, s);
3092 2252 : BATiter bi = bat_iterator(b);
3093 2250 : if (bi.nonil) {
3094 2012 : bat_iterator_end(&bi);
3095 2012 : return ci.ncand;
3096 : }
3097 238 : p = bi.base;
3098 238 : t = ATOMbasetype(bi.type);
3099 238 : switch (t) {
3100 0 : case TYPE_void:
3101 0 : cnt = ci.ncand * BATtdensebi(&bi);
3102 0 : break;
3103 0 : case TYPE_msk:
3104 0 : cnt = ci.ncand;
3105 0 : break;
3106 16 : case TYPE_bte:
3107 32 : CAND_LOOP(&ci)
3108 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3109 : break;
3110 0 : case TYPE_sht:
3111 0 : CAND_LOOP(&ci)
3112 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3113 : break;
3114 112 : case TYPE_int:
3115 2852656 : CAND_LOOP(&ci)
3116 2852543 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3117 : break;
3118 85 : case TYPE_lng:
3119 156873 : CAND_LOOP(&ci)
3120 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3121 : break;
3122 : #ifdef HAVE_HGE
3123 0 : case TYPE_hge:
3124 0 : CAND_LOOP(&ci)
3125 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3126 : break;
3127 : #endif
3128 0 : case TYPE_flt:
3129 0 : CAND_LOOP(&ci)
3130 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 0 : case TYPE_dbl:
3133 0 : CAND_LOOP(&ci)
3134 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3135 : break;
3136 0 : case TYPE_uuid:
3137 0 : CAND_LOOP(&ci)
3138 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3139 : break;
3140 25 : case TYPE_str:
3141 25 : base = bi.vh->base;
3142 25 : switch (bi.width) {
3143 23 : case 1:
3144 4809 : CAND_LOOP(&ci)
3145 4786 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3146 : break;
3147 2 : case 2:
3148 312 : CAND_LOOP(&ci)
3149 310 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3150 : break;
3151 0 : case 4:
3152 0 : CAND_LOOP(&ci)
3153 0 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3154 : break;
3155 : #if SIZEOF_VAR_T == 8
3156 0 : case 8:
3157 0 : CAND_LOOP(&ci)
3158 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3159 : break;
3160 : #endif
3161 : default:
3162 0 : MT_UNREACHABLE();
3163 : }
3164 : break;
3165 0 : default:
3166 0 : nil = ATOMnilptr(t);
3167 0 : cmp = ATOMcompare(t);
3168 0 : if (nil == NULL) {
3169 0 : cnt = ci.ncand;
3170 0 : } else if (b->tvheap) {
3171 0 : base = b->tvheap->base;
3172 0 : CAND_LOOP(&ci)
3173 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3174 : } else {
3175 0 : CAND_LOOP(&ci)
3176 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3177 : }
3178 : break;
3179 : }
3180 239 : if (cnt == bi.count) {
3181 35 : MT_lock_set(&b->theaplock);
3182 35 : if (cnt == BATcount(b) && bi.h == b->theap) {
3183 : /* we learned something */
3184 35 : b->tnonil = true;
3185 35 : assert(!b->tnil);
3186 35 : b->tnil = false;
3187 : }
3188 35 : bat pbid = VIEWtparent(b);
3189 35 : MT_lock_unset(&b->theaplock);
3190 35 : if (pbid) {
3191 21 : BAT *pb = BATdescriptor(pbid);
3192 21 : if (pb) {
3193 21 : MT_lock_set(&pb->theaplock);
3194 21 : if (cnt == BATcount(pb) &&
3195 0 : bi.h == pb->theap &&
3196 0 : !pb->tnonil) {
3197 0 : pb->tnonil = true;
3198 0 : assert(!pb->tnil);
3199 0 : pb->tnil = false;
3200 : }
3201 21 : MT_lock_unset(&pb->theaplock);
3202 21 : BBPunfix(pb->batCacheid);
3203 : }
3204 : }
3205 : }
3206 239 : bat_iterator_end(&bi);
3207 239 : return cnt;
3208 : }
|