Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 54322900 : unshare_varsized_heap(BAT *b)
27 : {
28 54322900 : if (ATOMvarsized(b->ttype) &&
29 22029010 : b->tvheap->parentid != b->batCacheid) {
30 872 : Heap *h = GDKmalloc(sizeof(Heap));
31 872 : if (h == NULL)
32 : return GDK_FAIL;
33 872 : MT_thread_setalgorithm("unshare vheap");
34 1744 : *h = (Heap) {
35 872 : .parentid = b->batCacheid,
36 872 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 872 : strconcat_len(h->filename, sizeof(h->filename),
40 872 : BBP_physical(b->batCacheid), ".theap", NULL);
41 872 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 872 : MT_lock_set(&b->theaplock);
47 871 : Heap *oh = b->tvheap;
48 871 : b->tvheap = h;
49 871 : MT_lock_unset(&b->theaplock);
50 872 : BBPrelease(oh->parentid);
51 872 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 75502 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 75502 : size_t toff = ~(size_t) 0; /* tail offset */
66 75502 : BUN p, r; /* loop variables */
67 75502 : const void *tp = NULL; /* tail value pointer */
68 75502 : var_t v;
69 75502 : size_t off; /* offset within n's string heap */
70 75502 : BUN cnt = ci->ncand;
71 75502 : BUN oldcnt = BATcount(b);
72 :
73 75502 : assert(b->ttype == TYPE_str);
74 75502 : assert(b->tbaseoff == 0);
75 75502 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 75502 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 75502 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 75502 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 10699 : toff = 0;
85 10699 : MT_thread_setalgorithm("shared vheap");
86 64803 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 51232 : MT_lock_set(&b->theaplock);
90 51227 : bat bid = b->tvheap->parentid;
91 51227 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 51237 : HEAPincref(ni->vh);
93 51237 : b->tvheap = ni->vh;
94 51237 : b->tascii = ni->ascii;
95 51237 : MT_lock_unset(&b->theaplock);
96 51237 : BBPretain(ni->vh->parentid);
97 51237 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 51237 : toff = 0;
100 51237 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 14443 : if (b->tvheap->parentid != b->batCacheid &&
106 872 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 13571 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 72 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5833303 : for (int i = 0; i < 1024; i++) {
120 5827606 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5827608 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5827608 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5697 : len = (len + 512) / 1024; /* rounded average length */
125 5697 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5697 : BUN ecnt = ci->ncand;
129 5697 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 55 : ecnt = (BUN)ni->b->tunique_est;
131 5697 : if (r < ecnt)
132 909 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4788 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5697 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5683 : if (oldcnt == 0) {
140 5658 : toff = 0;
141 5658 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 25 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 25 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5683 : MT_lock_set(&b->theaplock);
148 5683 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5683 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5683 : b->tvheap->free = toff + ni->vhfree;
154 5683 : b->tvheap->dirty = true;
155 5683 : b->tascii &= ni->ascii;
156 5683 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 67619 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 67616 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 75506 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 75506 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 67320 : MT_thread_setalgorithm("memcpy offsets");
177 67317 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 8186 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 296 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 296 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 296 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 296 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 296 : MT_thread_setalgorithm("copy offset values");
197 296 : r = b->batCount;
198 3089708 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3088944 : p = canditer_next(ci) - ni->b->hseqbase;
200 3088944 : switch (ni->width) {
201 110 : case 1:
202 110 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 110 : break;
204 5060 : case 2:
205 5060 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 5060 : break;
207 3083774 : case 4:
208 3083774 : v = (var_t) tip[p];
209 3083774 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3088944 : v = (var_t) ((size_t) v + toff);
219 3088944 : assert(v >= GDK_VAROFFSET);
220 3088944 : assert((size_t) v < b->tvheap->free);
221 3088944 : switch (b->twidth) {
222 4640 : case 1:
223 4640 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 4640 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 4640 : break;
226 765 : case 2:
227 765 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 765 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 765 : break;
230 3083539 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3083539 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3083539 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3083539 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3088944 : MT_UNREACHABLE();
243 : }
244 : }
245 7890 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 7844 : r = b->batCount;
253 7844 : oid hseq = ni->b->hseqbase;
254 7844 : MT_thread_setalgorithm("insert string values");
255 11237356 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 11221255 : p = canditer_next(ci) - hseq;
257 11002342 : tp = BUNtvar(*ni, p);
258 11002342 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 11221254 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 46 : r = b->batCount;
271 46 : MT_thread_setalgorithm("insert string values with check");
272 7693 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 7601 : p = canditer_next(ci) - ni->b->hseqbase;
274 7601 : off = BUNtvaroff(*ni, p); /* the offset */
275 7601 : tp = ni->vh->base + off; /* the string */
276 7601 : if (off < b->tvheap->free &&
277 7601 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 3946 : v = (var_t) off;
284 3946 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 3942 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 3942 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 3942 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 3942 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3655 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 7601 : r++;
313 : }
314 : }
315 75501 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 75503 : MT_rwlock_wrlock(&b->thashlock);
317 75506 : MT_lock_set(&b->theaplock);
318 75500 : BATsetcount(b, oldcnt + ci->ncand);
319 75503 : assert(b->batCapacity >= b->batCount);
320 75503 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 78493 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 2988 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 75505 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 75505 : MT_rwlock_wrunlock(&b->thashlock);
327 75506 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 448 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 448 : BUN cnt = ci->ncand, r;
339 448 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 448 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 448 : assert(b->twidth == ni->width);
345 448 : assert(b->twidth == SIZEOF_VAR_T);
346 448 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 448 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 448 : if (mayshare &&
360 448 : BATcount(b) == 0 &&
361 233 : b->batRole == TRANSIENT &&
362 148 : ni->restricted == BAT_READ &&
363 148 : b->tvheap != ni->vh) {
364 : /* if b is still empty, in the transient farm, and n
365 : * is read-only, we replace b's vheap with a reference
366 : * to n's */
367 148 : MT_lock_set(&b->theaplock);
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : MT_lock_unset(&b->theaplock);
373 148 : BBPretain(ni->vh->parentid);
374 148 : if (bid != b->batCacheid)
375 0 : BBPrelease(bid);
376 : }
377 448 : if (b->tvheap == ni->vh) {
378 : /* if b and n use the same vheap, we only need to copy
379 : * the offsets from n to b */
380 286 : if (ci->tpe == cand_dense) {
381 : /* fast memcpy since we copy a consecutive
382 : * chunk of memory */
383 286 : memcpy(Tloc(b, BATcount(b)),
384 286 : (const var_t *) ni->base + (ci->seq - hseq),
385 286 : cnt << b->tshift);
386 : } else {
387 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
388 0 : const var_t *restrict src = (const var_t *) ni->base;
389 0 : while (cnt > 0) {
390 0 : cnt--;
391 0 : *dst++ = src[canditer_next(ci) - hseq];
392 : }
393 : }
394 286 : MT_rwlock_wrlock(&b->thashlock);
395 286 : MT_lock_set(&b->theaplock);
396 286 : BATsetcount(b, BATcount(b) + ci->ncand);
397 286 : MT_lock_unset(&b->theaplock);
398 : /* maintain hash table */
399 286 : for (BUN i = BATcount(b) - ci->ncand;
400 286 : b->thash && i < BATcount(b);
401 0 : i++) {
402 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
403 : }
404 286 : BUN nunique = b->thash ? b->thash->nunique : 0;
405 286 : MT_rwlock_wrunlock(&b->thashlock);
406 286 : if (nunique != 0) {
407 0 : MT_lock_set(&b->theaplock);
408 0 : b->tunique_est = (double) nunique;
409 0 : MT_lock_unset(&b->theaplock);
410 : }
411 286 : return GDK_SUCCEED;
412 : }
413 : /* b and n do not share their vheap, so we need to copy data */
414 162 : if (b->tvheap->parentid != b->batCacheid) {
415 : /* if b shares its vheap with some other bat, unshare it */
416 20 : Heap *h = GDKmalloc(sizeof(Heap));
417 20 : if (h == NULL) {
418 : return GDK_FAIL;
419 : }
420 40 : *h = (Heap) {
421 20 : .parentid = b->batCacheid,
422 20 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
423 : .refs = ATOMIC_VAR_INIT(1),
424 : };
425 20 : strconcat_len(h->filename, sizeof(h->filename),
426 20 : BBP_physical(b->batCacheid), ".theap", NULL);
427 20 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
428 0 : HEAPfree(h, true);
429 0 : GDKfree(h);
430 0 : return GDK_FAIL;
431 : }
432 20 : MT_lock_set(&b->theaplock);
433 20 : Heap *oh = b->tvheap;
434 20 : b->tvheap = h;
435 20 : MT_lock_unset(&b->theaplock);
436 20 : if (oh->parentid != b->batCacheid)
437 20 : BBPrelease(oh->parentid);
438 20 : HEAPdecref(oh, false);
439 : }
440 162 : if (BATcount(b) == 0 &&
441 85 : ci->tpe == cand_dense && ci->ncand == ni->count) {
442 : /* just copy the heaps */
443 85 : MT_lock_set(&b->theaplock);
444 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
445 0 : MT_lock_unset(&b->theaplock);
446 0 : return GDK_FAIL;
447 : }
448 85 : memcpy(b->theap->base, ni->base, ni->hfree);
449 85 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
450 85 : b->theap->free = ni->hfree;
451 85 : b->theap->dirty = true;
452 85 : b->tvheap->free = ni->vhfree;
453 85 : b->tvheap->dirty = true;
454 85 : BATsetcount(b, ni->count);
455 85 : b->tnil = ni->nil;
456 85 : b->tnonil = ni->nonil;
457 85 : b->tsorted = ni->sorted;
458 85 : b->tnosorted = ni->nosorted;
459 85 : b->trevsorted = ni->revsorted;
460 85 : b->tnorevsorted = ni->norevsorted;
461 85 : b->tkey = ni->key;
462 85 : b->tnokey[0] = ni->nokey[0];
463 85 : b->tnokey[1] = ni->nokey[1];
464 85 : b->tminpos = ni->minpos;
465 85 : b->tmaxpos = ni->maxpos;
466 85 : b->tunique_est = ni->unique_est;
467 85 : MT_lock_unset(&b->theaplock);
468 85 : return GDK_SUCCEED;
469 : }
470 : /* copy data from n to b */
471 : r = BATcount(b);
472 380587 : for (BUN i = 0; i < cnt; i++) {
473 380510 : BUN p = canditer_next(ci) - hseq;
474 380510 : const void *t = BUNtvar(*ni, p);
475 380510 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
476 : return GDK_FAIL;
477 : }
478 380510 : r++;
479 : }
480 77 : MT_rwlock_wrlock(&b->thashlock);
481 77 : if (b->thash) {
482 0 : r -= cnt;
483 0 : BATiter bi = bat_iterator_nolock(b);
484 0 : for (BUN i = 0; i < cnt; i++) {
485 0 : const void *t = BUNtvar(bi, r);
486 0 : HASHappend_locked(b, r, t);
487 0 : r++;
488 : }
489 : }
490 77 : BUN nunique = b->thash ? b->thash->nunique : 0;
491 77 : MT_lock_set(&b->theaplock);
492 77 : BATsetcount(b, r);
493 77 : if (nunique != 0)
494 0 : b->tunique_est = (double) nunique;
495 77 : MT_lock_unset(&b->theaplock);
496 77 : MT_rwlock_wrunlock(&b->thashlock);
497 77 : return GDK_SUCCEED;
498 : }
499 :
500 : static gdk_return
501 127 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
502 : {
503 127 : if (ci->ncand == 0)
504 : return GDK_SUCCEED;
505 127 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
506 : return GDK_FAIL;
507 :
508 127 : MT_lock_set(&b->theaplock);
509 :
510 127 : uint32_t boff = b->batCount % 32;
511 127 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
512 127 : b->batCount += ci->ncand;
513 127 : b->theap->dirty = true;
514 127 : b->theap->free = ((b->batCount + 31) / 32) * 4;
515 127 : if (ci->tpe == cand_dense) {
516 127 : const uint32_t *np;
517 127 : uint32_t noff, mask;
518 127 : BUN cnt;
519 127 : noff = (ci->seq - ni->b->hseqbase) % 32;
520 127 : cnt = ci->ncand;
521 127 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
522 127 : if (boff == noff) {
523 : /* words of b and n are aligned, so we don't
524 : * need to shift bits around */
525 47 : if (boff + cnt <= 32) {
526 : /* all new bits within one word */
527 41 : if (cnt == 32) {
528 0 : *bp = *np;
529 : } else {
530 41 : mask = ((1U << cnt) - 1) << boff;
531 41 : *bp &= ~mask;
532 41 : *bp |= *np & mask;
533 : }
534 : } else {
535 : /* multiple words of b are affected */
536 6 : if (boff != 0) {
537 : /* first fill up the rest of the first
538 : * word */
539 0 : mask = ~0U << boff;
540 0 : *bp &= ~mask;
541 0 : *bp++ |= *np++ & mask;
542 0 : cnt -= 32 - boff;
543 : }
544 6 : if (cnt >= 32) {
545 : /* copy an integral number of words fast */
546 6 : BUN nw = cnt / 32;
547 6 : memcpy(bp, np, nw*sizeof(int));
548 6 : bp += nw;
549 6 : np += nw;
550 6 : cnt %= 32;
551 : }
552 6 : if (cnt > 0) {
553 : /* do the left over bits */
554 6 : mask = (1U << cnt) - 1;
555 6 : *bp = *np & mask;
556 : }
557 : }
558 80 : } else if (boff > noff) {
559 80 : if (boff + cnt <= 32) {
560 : /* we only need to copy bits from a
561 : * single word of n to a single word
562 : * of b */
563 : /* boff > 0, so cnt < 32, hence the
564 : * shift is ok */
565 71 : mask = (1U << cnt) - 1;
566 71 : *bp &= ~(mask << boff);
567 71 : *bp |= (*np & (mask << noff)) << (boff - noff);
568 : } else {
569 : /* first fill the rest of the last partial
570 : * word of b, so that's 32-boff bits */
571 9 : mask = (1U << (32 - boff)) - 1;
572 9 : *bp &= ~(mask << boff);
573 9 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
574 9 : cnt -= 32 - boff;
575 :
576 : /* set boff and noff to the amount we need to
577 : * shift bits in consecutive words of n around
578 : * to fit into the next word of b; set mask to
579 : * the mask of the bottom bits of n that fit
580 : * in a word of b (and the complement are the
581 : * top bits that go to another word of b) */
582 9 : boff -= noff;
583 9 : noff = 32 - boff;
584 9 : mask = (1U << noff) - 1;
585 141 : while (cnt >= 32) {
586 132 : *bp = (*np++ & ~mask) >> noff;
587 132 : *bp++ |= (*np & mask) << boff;
588 132 : cnt -= 32;
589 : }
590 9 : if (cnt > noff) {
591 : /* the last bits come from two words
592 : * in n */
593 5 : *bp = (*np++ & ~mask) >> noff;
594 5 : cnt -= noff;
595 5 : mask = (1U << cnt) - 1;
596 5 : *bp++ |= (*np & mask) << boff;
597 4 : } else if (cnt > 0) {
598 : /* the last bits come from a single
599 : * word in n */
600 4 : mask = ((1U << cnt) - 1) << noff;
601 4 : *bp = (*np & mask) >> noff;
602 : }
603 : }
604 : } else {
605 : /* boff < noff */
606 0 : if (noff + cnt <= 32) {
607 : /* only need part of the first word of n */
608 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
609 0 : mask = (1U << cnt) - 1;
610 0 : *bp &= ~(mask << boff);
611 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
612 0 : } else if (boff + cnt <= 32) {
613 : /* only need to fill a single word of
614 : * b, but from two of n */
615 0 : if (cnt < 32)
616 0 : *bp &= ~(((1U << cnt) - 1) << boff);
617 : else
618 0 : *bp = 0;
619 0 : mask = ~((1U << noff) - 1);
620 0 : *bp |= (*np++ & mask) >> (noff - boff);
621 0 : cnt -= 32 - noff;
622 0 : mask = (1U << cnt) - 1;
623 0 : *bp |= (*np & mask) << (32 - noff);
624 : } else {
625 0 : if (boff > 0) {
626 : /* fill the rest of the first word of b */
627 0 : cnt -= 32 - boff;
628 0 : *bp &= (1U << boff) - 1;
629 0 : mask = ~((1U << noff) - 1);
630 0 : noff -= boff;
631 0 : boff = 32 - noff;
632 0 : *bp |= (*np++ & mask) >> noff;
633 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
634 : } else {
635 0 : boff = 32 - noff;
636 : }
637 0 : mask = (1U << noff) - 1;
638 0 : while (cnt >= 32) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : *bp++ |= (*np & mask) << boff;
641 0 : cnt -= 32;
642 : }
643 0 : if (cnt > 0) {
644 0 : *bp = (*np++ & ~mask) >> noff;
645 0 : if (cnt > noff)
646 0 : *bp++ |= (*np & mask) << boff;
647 : }
648 : }
649 : }
650 : } else {
651 0 : oid o;
652 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
653 0 : do {
654 0 : for (uint32_t i = boff; i < 32; i++) {
655 0 : o = canditer_next(ci);
656 0 : if (is_oid_nil(o))
657 : break;
658 0 : o -= ni->b->hseqbase;
659 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
660 : }
661 0 : *bp++ = v;
662 0 : v = 0;
663 0 : boff = 0;
664 0 : } while (!is_oid_nil(o));
665 : }
666 127 : MT_lock_unset(&b->theaplock);
667 127 : return GDK_SUCCEED;
668 : }
669 :
670 : /* Append the contents of BAT n (subject to the optional candidate
671 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
672 : * was passed in, and else the seqbase of n. */
673 : static gdk_return
674 1287255 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
675 : {
676 1287255 : struct canditer ci;
677 1287255 : BUN r;
678 1287255 : oid hseq = n->hseqbase;
679 1287255 : char buf[64];
680 1287255 : lng t0 = 0;
681 1287255 : const ValRecord *prop = NULL;
682 1287255 : ValRecord minprop, maxprop;
683 1287255 : const void *minbound = NULL, *maxbound = NULL;
684 1287255 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
685 1287255 : bool hlocked = false;
686 :
687 1287255 : if (b == NULL || n == NULL || BATcount(n) == 0) {
688 : return GDK_SUCCEED;
689 : }
690 824053 : assert(b->theap->parentid == b->batCacheid);
691 :
692 824053 : TRC_DEBUG_IF(ALGO) {
693 0 : t0 = GDKusec();
694 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
695 : }
696 :
697 824053 : ALIGNapp(b, force, GDK_FAIL);
698 :
699 2338528 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
700 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
701 0 : return GDK_FAIL;
702 : }
703 :
704 824124 : if (BATttype(b) != BATttype(n) &&
705 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
706 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
707 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
708 : }
709 :
710 824124 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
711 :
712 824093 : BATiter ni = bat_iterator(n);
713 :
714 824029 : canditer_init(&ci, n, s);
715 823908 : if (ci.ncand == 0) {
716 0 : goto doreturn;
717 : }
718 :
719 823908 : if (BATcount(b) + ci.ncand > BUN_MAX) {
720 0 : bat_iterator_end(&ni);
721 0 : GDKerror("combined BATs too large\n");
722 0 : return GDK_FAIL;
723 : }
724 :
725 823908 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
726 0 : bat_iterator_end(&ni);
727 0 : GDKerror("overflow of head value\n");
728 0 : return GDK_FAIL;
729 : }
730 :
731 823908 : OIDXdestroy(b);
732 824225 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
733 824148 : RTREEdestroy(b);
734 :
735 824147 : MT_lock_set(&b->theaplock);
736 823543 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
737 823561 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
738 48 : VALcopy(&minprop, prop) != NULL) {
739 48 : minbound = VALptr(&minprop);
740 48 : if (ci.ncand == BATcount(n) &&
741 62 : ni.minpos != BUN_NONE &&
742 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
743 0 : assert(0);
744 : GDKerror("value out of bounds\n");
745 : MT_lock_unset(&b->theaplock);
746 : goto bailout;
747 : }
748 : }
749 823531 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
750 40 : VALcopy(&maxprop, prop) != NULL) {
751 40 : maxbound = VALptr(&maxprop);
752 40 : if (ci.ncand == BATcount(n) &&
753 52 : ni.maxpos != BUN_NONE &&
754 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
755 0 : assert(0);
756 : GDKerror("value out of bounds\n");
757 : MT_lock_unset(&b->theaplock);
758 : goto bailout;
759 : }
760 : }
761 :
762 823760 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
763 397505 : if (ni.maxpos != BUN_NONE) {
764 149936 : BATiter bi = bat_iterator_nolock(b);
765 149936 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
766 146144 : if (s == NULL) {
767 146008 : b->tmaxpos = BATcount(b) + ni.maxpos;
768 : } else {
769 136 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 : } else {
773 247569 : b->tmaxpos = BUN_NONE;
774 : }
775 : }
776 823760 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
777 397488 : if (ni.minpos != BUN_NONE) {
778 149896 : BATiter bi = bat_iterator_nolock(b);
779 149896 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
780 145484 : if (s == NULL) {
781 145350 : b->tminpos = BATcount(b) + ni.minpos;
782 : } else {
783 134 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 : } else {
787 247592 : b->tminpos = BUN_NONE;
788 : }
789 : }
790 823760 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
791 823066 : b->tunique_est = 0;
792 : }
793 823760 : MT_lock_unset(&b->theaplock);
794 : /* load hash so that we can maintain it */
795 823930 : (void) BATcheckhash(b);
796 :
797 824131 : if (b->ttype == TYPE_void) {
798 : /* b does not have storage, keep it that way if we can */
799 66335 : HASHdestroy(b); /* we're not maintaining the hash here */
800 66332 : MT_lock_set(&b->theaplock);
801 66335 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
802 60716 : (BATcount(b) == 0 ||
803 43777 : (BATtdense(b) &&
804 43777 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
805 : /* n is also dense and consecutive with b */
806 60656 : if (BATcount(b) == 0) {
807 16939 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
808 0 : assert(0);
809 : GDKerror("value not within bounds\n");
810 : MT_lock_unset(&b->theaplock);
811 : goto bailout;
812 : }
813 16939 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
814 : }
815 60652 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
816 0 : assert(0);
817 : GDKerror("value not within bounds\n");
818 : MT_lock_unset(&b->theaplock);
819 : goto bailout;
820 : }
821 60652 : BATsetcount(b, BATcount(b) + ci.ncand);
822 60649 : MT_lock_unset(&b->theaplock);
823 60655 : goto doreturn;
824 : }
825 5679 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
826 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
827 : /* both b and n are void/nil */
828 0 : if (notnull) {
829 0 : assert(0);
830 : GDKerror("NULL value not within bounds\n");
831 : MT_lock_unset(&b->theaplock);
832 : goto bailout;
833 : }
834 0 : BATtseqbase(b, oid_nil);
835 0 : BATsetcount(b, BATcount(b) + ci.ncand);
836 0 : MT_lock_unset(&b->theaplock);
837 0 : goto doreturn;
838 : }
839 : /* we need to materialize b; allocate enough capacity */
840 5679 : MT_lock_unset(&b->theaplock);
841 5679 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
842 0 : goto bailout;
843 : }
844 : }
845 :
846 : /* property setting */
847 763475 : MT_lock_set(&b->theaplock);
848 763450 : r = BATcount(b);
849 :
850 763450 : if (BATcount(b) == 0) {
851 373828 : b->tsorted = ni.sorted;
852 373828 : b->trevsorted = ni.revsorted;
853 373828 : b->tseqbase = oid_nil;
854 373828 : b->tnonil = ni.nonil;
855 373828 : b->tnil = ni.nil && ci.ncand == BATcount(n);
856 373828 : if (ci.tpe == cand_dense) {
857 373676 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
858 373676 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
859 373676 : if (BATtdensebi(&ni)) {
860 1968 : b->tseqbase = n->tseqbase + ci.seq - hseq;
861 : }
862 : } else {
863 152 : b->tnosorted = 0;
864 152 : b->tnorevsorted = 0;
865 : }
866 373828 : b->tkey = ni.key;
867 373828 : if (ci.ncand == BATcount(n)) {
868 373135 : b->tnokey[0] = ni.nokey[0];
869 373135 : b->tnokey[1] = ni.nokey[1];
870 : } else {
871 693 : b->tnokey[0] = b->tnokey[1] = 0;
872 : }
873 : } else {
874 389622 : BUN last = r - 1;
875 389622 : BATiter bi = bat_iterator_nolock(b);
876 389622 : int xx = ATOMcmp(b->ttype,
877 : BUNtail(ni, ci.seq - hseq),
878 : BUNtail(bi, last));
879 389608 : if (b->tsorted && (!ni.sorted || xx < 0)) {
880 15468 : b->tsorted = false;
881 15468 : b->tnosorted = 0;
882 15468 : b->tseqbase = oid_nil;
883 : }
884 389608 : if (b->trevsorted &&
885 37307 : (!ni.revsorted || xx > 0)) {
886 11525 : b->trevsorted = false;
887 11525 : b->tnorevsorted = 0;
888 : }
889 389608 : if (b->tkey &&
890 44449 : (!(b->tsorted || b->trevsorted) ||
891 34803 : !ni.key || xx == 0)) {
892 14109 : BATkey(b, false);
893 : }
894 389608 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
895 5744 : (!BATtdensebi(&ni) ||
896 445 : ci.tpe != cand_dense ||
897 445 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
898 5316 : b->tseqbase = oid_nil;
899 : }
900 389608 : b->tnonil &= ni.nonil;
901 770319 : b->tnil |= ni.nil && ci.ncand == ni.count;
902 : }
903 763436 : MT_lock_unset(&b->theaplock);
904 763483 : if (b->ttype == TYPE_str) {
905 75495 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 687988 : } else if (ATOMvarsized(b->ttype)) {
909 448 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
910 0 : goto bailout;
911 : }
912 687540 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
913 : /* no bounds and NOT_NULL property on MSK bats */
914 127 : assert(minbound == NULL && maxbound == NULL && !notnull);
915 127 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
916 0 : goto bailout;
917 : }
918 : } else {
919 687413 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
920 : /* if needed space exceeds a normal growth
921 : * extend just with what's needed */
922 10330 : BUN ncap = BATcount(b) + ci.ncand;
923 10330 : BUN grows = BATgrows(b);
924 :
925 10331 : if (ncap > grows)
926 : grows = ncap;
927 10331 : if (BATextend(b, grows) != GDK_SUCCEED) {
928 0 : goto bailout;
929 : }
930 : }
931 687417 : MT_rwlock_wrlock(&b->thashlock);
932 687465 : hlocked = true;
933 687465 : if (b->ttype != TYPE_void &&
934 687402 : ni.type != TYPE_void &&
935 680332 : ci.tpe == cand_dense) {
936 : /* use fast memcpy if we can */
937 680231 : memcpy(Tloc(b, BATcount(b)),
938 680231 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
939 680231 : ci.ncand << ni.shift);
940 680244 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
941 351 : HASHappend_locked(b, r, Tloc(b, r));
942 13 : r++;
943 : }
944 : } else {
945 7234 : const void *atomnil = ATOMnilptr(b->ttype);
946 11186296 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
947 11171808 : BUN p = canditer_next(&ci) - hseq;
948 11172051 : const void *t = BUNtail(ni, p);
949 11172136 : bool isnil = atomcmp(t, atomnil) == 0;
950 11171985 : if (notnull && isnil) {
951 0 : assert(0);
952 : GDKerror("NULL value not within bounds\n");
953 : goto bailout;
954 11171985 : } else if (minbound &&
955 11171985 : !isnil &&
956 0 : atomcmp(t, minbound) < 0) {
957 0 : assert(0);
958 : GDKerror("value not within bounds\n");
959 : goto bailout;
960 11171985 : } else if (maxbound &&
961 0 : !isnil &&
962 0 : atomcmp(t, maxbound) >= 0) {
963 0 : assert(0);
964 : GDKerror("value not within bounds\n");
965 : goto bailout;
966 11171985 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
967 0 : goto bailout;
968 : }
969 11171792 : if (b->thash)
970 0 : HASHappend_locked(b, r, t);
971 11171807 : r++;
972 : }
973 7233 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
974 : }
975 687126 : BUN nunique;
976 687126 : nunique = b->thash ? b->thash->nunique : 0;
977 687126 : MT_lock_set(&b->theaplock);
978 687173 : BATsetcount(b, b->batCount + ci.ncand);
979 687179 : if (nunique != 0)
980 5 : b->tunique_est = (double) nunique;
981 687179 : MT_lock_unset(&b->theaplock);
982 687266 : assert(hlocked);
983 687266 : MT_rwlock_wrunlock(&b->thashlock);
984 687266 : hlocked = false;
985 : }
986 :
987 824296 : doreturn:
988 824296 : bat_iterator_end(&ni);
989 824093 : if (minbound)
990 48 : VALclear(&minprop);
991 824097 : if (maxbound)
992 40 : VALclear(&maxprop);
993 824097 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
994 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
995 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
996 : GDKusec() - t0);
997 :
998 : return GDK_SUCCEED;
999 : bailout:
1000 0 : if (hlocked)
1001 0 : MT_rwlock_wrunlock(&b->thashlock);
1002 0 : if (minbound)
1003 0 : VALclear(&minprop);
1004 0 : if (maxbound)
1005 0 : VALclear(&maxprop);
1006 0 : bat_iterator_end(&ni);
1007 0 : return GDK_FAIL;
1008 : }
1009 :
1010 : gdk_return
1011 1287437 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1012 : {
1013 1287437 : return BATappend2(b, n, s, force, true);
1014 : }
1015 :
1016 : gdk_return
1017 4 : BATdel(BAT *b, BAT *d)
1018 : {
1019 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1020 4 : MT_lock_set(&b->theaplock);
1021 4 : BATiter bi = bat_iterator_nolock(b);
1022 4 : MT_lock_unset(&b->theaplock);
1023 :
1024 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1025 4 : assert(d->tsorted);
1026 4 : assert(d->tkey);
1027 4 : if (BATcount(d) == 0)
1028 : return GDK_SUCCEED;
1029 4 : OIDXdestroy(b);
1030 4 : HASHdestroy(b);
1031 4 : PROPdestroy(b);
1032 4 : STRMPdestroy(b);
1033 4 : RTREEdestroy(b);
1034 4 : if (BATtdense(d)) {
1035 2 : oid o = d->tseqbase;
1036 2 : BUN c = BATcount(d);
1037 :
1038 2 : if (o + c <= b->hseqbase)
1039 : return GDK_SUCCEED;
1040 2 : if (o < b->hseqbase) {
1041 0 : c -= b->hseqbase - o;
1042 0 : o = b->hseqbase;
1043 : }
1044 2 : if (o - b->hseqbase < b->batInserted) {
1045 0 : GDKerror("cannot delete committed values\n");
1046 0 : return GDK_FAIL;
1047 : }
1048 2 : if (o + c > b->hseqbase + BATcount(b))
1049 0 : c = b->hseqbase + BATcount(b) - o;
1050 2 : if (c == 0)
1051 : return GDK_SUCCEED;
1052 2 : if (atmdel) {
1053 0 : BUN p = o - b->hseqbase;
1054 0 : BUN q = p + c;
1055 0 : while (p < q) {
1056 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1057 0 : p++;
1058 : }
1059 : }
1060 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1061 : return GDK_FAIL;
1062 2 : MT_lock_set(&b->theaplock);
1063 2 : if (o + c < b->hseqbase + BATcount(b)) {
1064 0 : o -= b->hseqbase;
1065 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1066 0 : BUN n = BATcount(b) - (o + c);
1067 : /* not very efficient, but first see
1068 : * how much this is used */
1069 0 : for (BUN i = 0; i < n; i++)
1070 0 : mskSetVal(b, o + i,
1071 0 : mskGetVal(b, o + c + i));
1072 : } else {
1073 0 : memmove(Tloc(b, o),
1074 0 : Tloc(b, o + c),
1075 0 : b->twidth * (BATcount(b) - (o + c)));
1076 : }
1077 0 : b->theap->dirty = true;
1078 : // o += b->hseqbase; // if this were to be used again
1079 : }
1080 2 : b->batCount -= c;
1081 : } else {
1082 2 : BATiter di = bat_iterator(d);
1083 2 : const oid *o = (const oid *) di.base;
1084 2 : const oid *s;
1085 2 : BUN c = di.count;
1086 2 : BUN nd = 0;
1087 2 : BUN pos;
1088 2 : char *p = NULL;
1089 :
1090 2 : if (o[c - 1] <= b->hseqbase) {
1091 0 : bat_iterator_end(&di);
1092 0 : return GDK_SUCCEED;
1093 : }
1094 2 : while (*o < b->hseqbase) {
1095 0 : o++;
1096 0 : c--;
1097 : }
1098 2 : if (*o - b->hseqbase < b->batInserted) {
1099 0 : bat_iterator_end(&di);
1100 0 : GDKerror("cannot delete committed values\n");
1101 0 : return GDK_FAIL;
1102 : }
1103 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1104 0 : bat_iterator_end(&di);
1105 0 : return GDK_FAIL;
1106 : }
1107 2 : s = o;
1108 2 : pos = *o - b->hseqbase;
1109 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1110 2 : p = Tloc(b, pos);
1111 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1112 4 : size_t n;
1113 4 : if (atmdel)
1114 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1115 4 : o++;
1116 4 : c--;
1117 4 : nd++;
1118 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1119 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1120 2 : else if ((oid) (o - s) < *o - *s)
1121 2 : n = o[0] - o[-1] - 1;
1122 : else
1123 : n = 0;
1124 4 : if (n > 0) {
1125 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1126 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1127 : /* not very efficient, but
1128 : * first see how much this is
1129 : * used */
1130 0 : for (BUN i = 0; i < n; i++) {
1131 0 : mskSetVal(b, pos + i,
1132 0 : mskGetVal(b, opos + i));
1133 : }
1134 0 : pos += n;
1135 : } else {
1136 2 : n *= b->twidth;
1137 2 : memmove(p,
1138 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1139 : n);
1140 2 : p += n;
1141 : }
1142 : s = o;
1143 : }
1144 : }
1145 2 : bat_iterator_end(&di);
1146 2 : MT_lock_set(&b->theaplock);
1147 2 : b->theap->dirty = true;
1148 2 : b->batCount -= nd;
1149 : }
1150 4 : if (b->batCount <= 1) {
1151 : /* some trivial properties */
1152 2 : b->tkey = true;
1153 2 : b->tsorted = b->trevsorted = true;
1154 2 : if (b->batCount == 0) {
1155 2 : b->tnil = false;
1156 2 : b->tnonil = true;
1157 : }
1158 : }
1159 : /* not sure about these anymore */
1160 4 : b->tnosorted = b->tnorevsorted = 0;
1161 4 : b->tnokey[0] = b->tnokey[1] = 0;
1162 4 : b->tminpos = BUN_NONE;
1163 4 : b->tmaxpos = BUN_NONE;
1164 4 : b->tunique_est = 0.0;
1165 4 : MT_lock_unset(&b->theaplock);
1166 :
1167 4 : return GDK_SUCCEED;
1168 : }
1169 :
1170 : /*
1171 : * Replace all values in b with values from n whose location is given by
1172 : * the oid in either p or positions.
1173 : * If positions is used, autoincr specifies whether it is the first of a
1174 : * dense range of positions or whether it is a full-blown array of
1175 : * position.
1176 : * If mayappend is set, the position in p/positions may refer to
1177 : * locations beyond the end of b.
1178 : */
1179 : static gdk_return
1180 121925 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1181 : bool mayappend, bool autoincr, bool force)
1182 : {
1183 121925 : lng t0 = GDKusec();
1184 121917 : oid pos = oid_nil;
1185 121917 : BUN nunique = 0;
1186 :
1187 121917 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1188 : return GDK_SUCCEED;
1189 : }
1190 : /* either p or positions */
1191 121917 : assert((p == NULL) != (positions == NULL));
1192 121917 : if (p != NULL) {
1193 121741 : if (BATcount(p) != BATcount(n)) {
1194 0 : GDKerror("update BATs not the same size\n");
1195 0 : return GDK_FAIL;
1196 : }
1197 121741 : if (ATOMtype(p->ttype) != TYPE_oid) {
1198 0 : GDKerror("positions BAT not type OID\n");
1199 0 : return GDK_FAIL;
1200 : }
1201 121741 : if (BATtdense(p)) {
1202 112941 : pos = p->tseqbase;
1203 112941 : positions = &pos;
1204 112941 : autoincr = true;
1205 112941 : p = NULL;
1206 8800 : } else if (p->ttype != TYPE_void) {
1207 8798 : positions = (const oid *) Tloc(p, 0);
1208 8798 : autoincr = false;
1209 : } else {
1210 : autoincr = false;
1211 : }
1212 176 : } else if (autoincr) {
1213 176 : pos = *positions;
1214 : }
1215 121917 : if (BATcount(n) == 0) {
1216 : return GDK_SUCCEED;
1217 : }
1218 :
1219 25553 : BATiter ni = bat_iterator(n);
1220 :
1221 25553 : OIDXdestroy(b);
1222 25552 : STRMPdestroy(b);
1223 25553 : RTREEdestroy(b);
1224 : /* load hash so that we can maintain it */
1225 25553 : (void) BATcheckhash(b);
1226 :
1227 25553 : MT_lock_set(&b->theaplock);
1228 25553 : if (!force && (b->batRestricted != BAT_WRITE ||
1229 17 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1230 0 : MT_lock_unset(&b->theaplock);
1231 0 : bat_iterator_end(&ni);
1232 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1233 0 : return GDK_FAIL;
1234 : }
1235 25553 : BATiter bi = bat_iterator_nolock(b);
1236 25553 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1237 24966 : b->tunique_est = 0;
1238 : }
1239 :
1240 25553 : b->tsorted = b->trevsorted = false;
1241 25553 : b->tnosorted = b->tnorevsorted = 0;
1242 25553 : b->tseqbase = oid_nil;
1243 25553 : b->tkey = false;
1244 25553 : b->tnokey[0] = b->tnokey[1] = 0;
1245 :
1246 25553 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1247 25553 : const void *nil = ATOMnilptr(b->ttype);
1248 25553 : oid hseqend = b->hseqbase + BATcount(b);
1249 :
1250 25553 : MT_lock_unset(&b->theaplock);
1251 :
1252 29623 : bool anynil = false;
1253 29623 : bool locked = false;
1254 :
1255 29623 : if (b->tvheap) {
1256 1241542 : for (BUN i = 0; i < ni.count; i++) {
1257 1238835 : oid updid;
1258 1238835 : if (positions) {
1259 1237822 : updid = autoincr ? pos++ : *positions++;
1260 : } else {
1261 1013 : updid = BUNtoid(p, i);
1262 : }
1263 :
1264 1238835 : if (updid < b->hseqbase ||
1265 1238835 : (!mayappend && updid >= hseqend)) {
1266 0 : GDKerror("id out of range\n");
1267 0 : goto bailout;
1268 : }
1269 1238835 : updid -= b->hseqbase;
1270 1238835 : if (!force && updid < b->batInserted) {
1271 0 : GDKerror("updating committed value\n");
1272 0 : goto bailout;
1273 : }
1274 :
1275 1238835 : const void *new = BUNtvar(ni, i);
1276 :
1277 1238835 : if (updid >= BATcount(b)) {
1278 23534 : assert(mayappend);
1279 23534 : if (locked) {
1280 4 : MT_rwlock_wrunlock(&b->thashlock);
1281 4 : locked = false;
1282 : }
1283 23534 : if (b->tminpos != bi.minpos ||
1284 23533 : b->tmaxpos != bi.maxpos) {
1285 1 : MT_lock_set(&b->theaplock);
1286 1 : b->tminpos = bi.minpos;
1287 1 : b->tmaxpos = bi.maxpos;
1288 1 : MT_lock_unset(&b->theaplock);
1289 : }
1290 23534 : if (BATcount(b) < updid &&
1291 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1292 0 : bat_iterator_end(&ni);
1293 0 : return GDK_FAIL;
1294 : }
1295 23534 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1296 0 : bat_iterator_end(&ni);
1297 0 : return GDK_FAIL;
1298 : }
1299 23534 : bi = bat_iterator_nolock(b);
1300 162870 : continue;
1301 : }
1302 :
1303 : /* it is possible that a previous run was killed
1304 : * after an update (with a mmapped tail file)
1305 : * but before that was committed, then the
1306 : * offset may point outside of the vheap */
1307 1215301 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1308 :
1309 1209412 : if (old && atomcmp(old, new) == 0) {
1310 : /* replacing with the same value:
1311 : * nothing to do */
1312 139336 : continue;
1313 : }
1314 :
1315 1075041 : bool isnil = atomcmp(new, nil) == 0;
1316 1070859 : anynil |= isnil;
1317 1070859 : MT_lock_set(&b->theaplock);
1318 1071231 : if (old == NULL ||
1319 1071231 : (b->tnil &&
1320 715 : !anynil &&
1321 715 : atomcmp(old, nil) == 0)) {
1322 : /* if old value is nil and no new
1323 : * value is, we're not sure anymore
1324 : * about the nil property, so we must
1325 : * clear it */
1326 713 : b->tnil = false;
1327 : }
1328 1071231 : b->tnonil &= !isnil;
1329 1071231 : b->tnil |= isnil;
1330 1071231 : MT_lock_unset(&b->theaplock);
1331 1071725 : if (bi.maxpos != BUN_NONE) {
1332 3444 : if (!isnil &&
1333 1722 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1334 : /* new value is larger than
1335 : * previous largest */
1336 23 : bi.maxpos = updid;
1337 3398 : } else if (old == NULL ||
1338 1712 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1339 13 : atomcmp(new, old) != 0)) {
1340 : /* old value is equal to
1341 : * largest and new value is
1342 : * smaller, so we don't know
1343 : * anymore which is the
1344 : * largest */
1345 13 : bi.maxpos = BUN_NONE;
1346 : }
1347 : }
1348 1071725 : if (bi.minpos != BUN_NONE) {
1349 3436 : if (!isnil &&
1350 1718 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1351 : /* new value is smaller than
1352 : * previous smallest */
1353 16 : bi.minpos = updid;
1354 3404 : } else if (old == NULL ||
1355 1724 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1356 22 : atomcmp(new, old) != 0)) {
1357 : /* old value is equal to
1358 : * smallest and new value is
1359 : * larger, so we don't know
1360 : * anymore which is the
1361 : * smallest */
1362 22 : bi.minpos = BUN_NONE;
1363 : }
1364 : }
1365 1071725 : if (!locked) {
1366 2251 : MT_rwlock_wrlock(&b->thashlock);
1367 2251 : locked = true;
1368 : }
1369 1071725 : if (old)
1370 1071725 : HASHdelete_locked(&bi, updid, old);
1371 0 : else if (b->thash) {
1372 0 : doHASHdestroy(b, b->thash);
1373 0 : b->thash = NULL;
1374 : }
1375 :
1376 1071722 : var_t d;
1377 1071722 : switch (b->twidth) {
1378 1058278 : case 1:
1379 1058278 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1380 1058278 : break;
1381 10429 : case 2:
1382 10429 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1383 10429 : break;
1384 2980 : case 4:
1385 2980 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1386 2980 : break;
1387 : #if SIZEOF_VAR_T == 8
1388 35 : case 8:
1389 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1390 35 : break;
1391 : #endif
1392 : default:
1393 0 : MT_UNREACHABLE();
1394 : }
1395 1071722 : MT_lock_set(&b->theaplock);
1396 1071258 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1397 1071179 : MT_lock_unset(&b->theaplock);
1398 1071924 : if (rc != GDK_SUCCEED) {
1399 0 : goto bailout;
1400 : }
1401 1071924 : if (b->twidth < SIZEOF_VAR_T &&
1402 1071622 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1403 : /* doesn't fit in current heap, upgrade it */
1404 20 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1405 0 : goto bailout;
1406 : }
1407 : }
1408 : /* in case ATOMreplaceVAR and/or
1409 : * GDKupgradevarheap replaces a heap, we need to
1410 : * reinitialize the iterator */
1411 : {
1412 : /* save and restore minpos/maxpos */
1413 1071924 : BUN minpos = bi.minpos;
1414 1071924 : BUN maxpos = bi.maxpos;
1415 1071924 : bi = bat_iterator_nolock(b);
1416 1071924 : bi.minpos = minpos;
1417 1071924 : bi.maxpos = maxpos;
1418 : }
1419 1071924 : switch (b->twidth) {
1420 1058462 : case 1:
1421 1058462 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1422 1058462 : break;
1423 10445 : case 2:
1424 10445 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1425 10445 : break;
1426 2982 : case 4:
1427 2982 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1428 2982 : break;
1429 : #if SIZEOF_VAR_T == 8
1430 35 : case 8:
1431 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1432 35 : break;
1433 : #endif
1434 : default:
1435 0 : MT_UNREACHABLE();
1436 : }
1437 1071924 : HASHinsert_locked(&bi, updid, new);
1438 :
1439 : }
1440 2707 : if (locked) {
1441 2247 : if (b->thash)
1442 2 : nunique = b->thash->nunique;
1443 2247 : MT_rwlock_wrunlock(&b->thashlock);
1444 2247 : locked = false;
1445 : }
1446 2707 : MT_lock_set(&b->theaplock);
1447 2708 : b->tvheap->dirty = true;
1448 2708 : MT_lock_unset(&b->theaplock);
1449 22845 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1450 0 : assert(b->thash == NULL);
1451 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1452 0 : for (BUN i = 0; i < ni.count; i++) {
1453 0 : oid updid;
1454 0 : if (positions) {
1455 0 : updid = autoincr ? pos++ : *positions++;
1456 : } else {
1457 0 : updid = BUNtoid(p, i);
1458 : }
1459 :
1460 0 : if (updid < b->hseqbase ||
1461 0 : (!mayappend && updid >= hseqend)) {
1462 0 : GDKerror("id out of range\n");
1463 0 : bat_iterator_end(&ni);
1464 0 : return GDK_FAIL;
1465 : }
1466 0 : updid -= b->hseqbase;
1467 0 : if (!force && updid < b->batInserted) {
1468 0 : GDKerror("updating committed value\n");
1469 0 : bat_iterator_end(&ni);
1470 0 : return GDK_FAIL;
1471 : }
1472 0 : if (updid >= BATcount(b)) {
1473 0 : assert(mayappend);
1474 0 : if (BATcount(b) < updid &&
1475 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1476 0 : bat_iterator_end(&ni);
1477 0 : return GDK_FAIL;
1478 : }
1479 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1480 0 : bat_iterator_end(&ni);
1481 0 : return GDK_FAIL;
1482 : }
1483 0 : continue;
1484 : }
1485 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1486 : }
1487 0 : bi = bat_iterator_nolock(b);
1488 22845 : } else if (autoincr) {
1489 14492 : if (pos < b->hseqbase ||
1490 13673 : (!mayappend && pos + ni.count > hseqend)) {
1491 0 : GDKerror("id out of range\n");
1492 0 : bat_iterator_end(&ni);
1493 0 : return GDK_FAIL;
1494 : }
1495 14492 : pos -= b->hseqbase;
1496 14492 : if (!force && pos < b->batInserted) {
1497 0 : GDKerror("updating committed value\n");
1498 0 : bat_iterator_end(&ni);
1499 0 : return GDK_FAIL;
1500 : }
1501 :
1502 14492 : if (pos >= BATcount(b)) {
1503 442 : assert(mayappend);
1504 442 : bat_iterator_end(&ni);
1505 442 : if (BATcount(b) < pos &&
1506 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1507 : return GDK_FAIL;
1508 : }
1509 442 : return BATappend(b, n, NULL, force);
1510 : }
1511 14056 : if (pos + ni.count > BATcount(b) &&
1512 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1513 0 : bat_iterator_end(&ni);
1514 0 : return GDK_FAIL;
1515 : }
1516 14050 : bi = bat_iterator_nolock(b);
1517 :
1518 : /* we copy all of n, so if there are nils in n we get
1519 : * nils in b (and else we don't know) */
1520 14050 : b->tnil = ni.nil;
1521 : /* we may not copy over all of b, so we only know that
1522 : * there are no nils in b afterward if there weren't
1523 : * any in either b or n to begin with */
1524 14050 : b->tnonil &= ni.nonil;
1525 : /* if there is no hash, we don't start the loop, if
1526 : * there is only a persisted hash, it will get destroyed
1527 : * in the first iteration, after which there is no hash
1528 : * and the loop ends */
1529 14050 : MT_rwlock_wrlock(&b->thashlock);
1530 14050 : locked = true;
1531 14050 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1532 0 : HASHdelete_locked(&bi, i, Tloc(b, i));
1533 14050 : if (ni.type == TYPE_void) {
1534 0 : assert(b->ttype == TYPE_oid);
1535 0 : oid *o = Tloc(b, pos);
1536 0 : if (is_oid_nil(ni.tseq)) {
1537 : /* we may or may not overwrite the old
1538 : * min/max values */
1539 0 : bi.minpos = BUN_NONE;
1540 0 : bi.maxpos = BUN_NONE;
1541 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1542 0 : o[i] = oid_nil;
1543 0 : b->tnil = true;
1544 : } else {
1545 0 : oid v = ni.tseq;
1546 : /* we know min/max of n, so we know
1547 : * the new min/max of b if those of n
1548 : * are smaller/larger than the old */
1549 0 : if (bi.minpos != BUN_NONE) {
1550 0 : if (v <= BUNtoid(b, bi.minpos))
1551 0 : bi.minpos = pos;
1552 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1553 0 : bi.minpos = BUN_NONE;
1554 : }
1555 0 : if (complex_cand(n)) {
1556 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1557 0 : o[i] = *(oid *)Tpos(&ni, i);
1558 : /* last value */
1559 0 : v = o[ni.count - 1];
1560 : } else {
1561 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1562 0 : o[i] = v++;
1563 : /* last value added (not one beyond) */
1564 0 : v--;
1565 : }
1566 0 : if (bi.maxpos != BUN_NONE) {
1567 0 : if (v >= BUNtoid(b, bi.maxpos))
1568 0 : bi.maxpos = pos + ni.count - 1;
1569 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1570 0 : bi.maxpos = BUN_NONE;
1571 : }
1572 : }
1573 : } else {
1574 : /* if the extremes of n are at least as
1575 : * extreme as those of b, we can replace b's
1576 : * min/max, else we don't know what b's new
1577 : * min/max are*/
1578 14319 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1579 269 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1580 159 : bi.minpos = pos + ni.minpos;
1581 : } else {
1582 13891 : bi.minpos = BUN_NONE;
1583 : }
1584 14347 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1585 297 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1586 216 : bi.maxpos = pos + ni.maxpos;
1587 : } else {
1588 13834 : bi.maxpos = BUN_NONE;
1589 : }
1590 14050 : memcpy(Tloc(b, pos), ni.base,
1591 14050 : ni.count << b->tshift);
1592 : }
1593 : /* either we have a hash that was updated above, or we
1594 : * have no hash; we cannot have the case where there is
1595 : * only a persisted (unloaded) hash since it would have
1596 : * been destroyed above */
1597 14050 : if (b->thash != NULL) {
1598 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1599 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1600 0 : if (b->thash)
1601 0 : nunique = b->thash->nunique;
1602 : }
1603 14050 : MT_rwlock_wrunlock(&b->thashlock);
1604 14050 : locked = false;
1605 14050 : if (ni.count == BATcount(b)) {
1606 : /* if we replaced all values of b by values
1607 : * from n, we can also copy the min/max
1608 : * properties */
1609 6456 : bi.minpos = ni.minpos;
1610 6456 : bi.maxpos = ni.maxpos;
1611 6456 : if (BATtdensebi(&ni)) {
1612 : /* replaced all of b with a dense sequence */
1613 47 : MT_lock_set(&b->theaplock);
1614 47 : BATtseqbase(b, ni.tseq);
1615 47 : MT_lock_unset(&b->theaplock);
1616 : }
1617 : }
1618 : } else {
1619 158231496 : for (BUN i = 0; i < ni.count; i++) {
1620 158223144 : oid updid;
1621 158223144 : if (positions) {
1622 : /* assert(!autoincr) */
1623 158223144 : updid = *positions++;
1624 : } else {
1625 0 : updid = BUNtoid(p, i);
1626 : }
1627 :
1628 158223144 : if (updid < b->hseqbase ||
1629 158223144 : (!mayappend && updid >= hseqend)) {
1630 0 : GDKerror("id out of range\n");
1631 0 : goto bailout;
1632 : }
1633 158223144 : updid -= b->hseqbase;
1634 158223144 : if (!force && updid < b->batInserted) {
1635 0 : GDKerror("updating committed value\n");
1636 0 : goto bailout;
1637 : }
1638 :
1639 158223144 : const void *new = BUNtloc(ni, i);
1640 :
1641 158223144 : if (updid >= BATcount(b)) {
1642 16059 : assert(mayappend);
1643 16059 : if (locked) {
1644 10 : MT_rwlock_wrunlock(&b->thashlock);
1645 10 : locked = false;
1646 : }
1647 16059 : if (b->tminpos != bi.minpos ||
1648 16057 : b->tmaxpos != bi.maxpos) {
1649 3 : MT_lock_set(&b->theaplock);
1650 3 : b->tminpos = bi.minpos;
1651 3 : b->tmaxpos = bi.maxpos;
1652 3 : MT_lock_unset(&b->theaplock);
1653 : }
1654 16059 : if (BATcount(b) < updid &&
1655 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1656 0 : goto bailout;
1657 : }
1658 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1659 0 : bat_iterator_end(&ni);
1660 0 : return GDK_FAIL;
1661 : }
1662 16059 : bi = bat_iterator_nolock(b);
1663 16059 : continue;
1664 : }
1665 :
1666 158207085 : const void *old = BUNtloc(bi, updid);
1667 158207085 : bool isnil = atomcmp(new, nil) == 0;
1668 158360591 : anynil |= isnil;
1669 158360591 : if (b->tnil &&
1670 1915 : !anynil &&
1671 1915 : atomcmp(old, nil) == 0) {
1672 : /* if old value is nil and no new
1673 : * value is, we're not sure anymore
1674 : * about the nil property, so we must
1675 : * clear it */
1676 1911 : b->tnil = false;
1677 : }
1678 158360591 : b->tnonil &= !isnil;
1679 158360591 : b->tnil |= isnil;
1680 158360591 : if (bi.maxpos != BUN_NONE) {
1681 8864 : if (!isnil &&
1682 4430 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1683 : /* new value is larger than
1684 : * previous largest */
1685 69 : bi.maxpos = updid;
1686 4372 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1687 7 : atomcmp(new, old) != 0) {
1688 : /* old value is equal to
1689 : * largest and new value is
1690 : * smaller, so we don't know
1691 : * anymore which is the
1692 : * largest */
1693 7 : bi.maxpos = BUN_NONE;
1694 : }
1695 : }
1696 158360591 : if (bi.minpos != BUN_NONE) {
1697 8864 : if (!isnil &&
1698 4430 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1699 : /* new value is smaller than
1700 : * previous smallest */
1701 6 : bi.minpos = updid;
1702 4434 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1703 6 : atomcmp(new, old) != 0) {
1704 : /* old value is equal to
1705 : * smallest and new value is
1706 : * larger, so we don't know
1707 : * anymore which is the
1708 : * smallest */
1709 6 : bi.minpos = BUN_NONE;
1710 : }
1711 : }
1712 :
1713 158360591 : if (!locked) {
1714 8352 : MT_rwlock_wrlock(&b->thashlock);
1715 8352 : locked = true;
1716 : }
1717 158360591 : HASHdelete_locked(&bi, updid, old);
1718 158683070 : switch (b->twidth) {
1719 30638980 : case 1:
1720 30638980 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1721 30638980 : break;
1722 527207 : case 2:
1723 527207 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1724 527207 : break;
1725 21668878 : case 4:
1726 21668878 : ((int *) b->theap->base)[updid] = * (int *) new;
1727 21668878 : break;
1728 100436845 : case 8:
1729 100436845 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1730 100436845 : break;
1731 5411160 : case 16:
1732 : #ifdef HAVE_HGE
1733 5411160 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1734 : #else
1735 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1736 : #endif
1737 5411160 : break;
1738 0 : default:
1739 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1740 0 : break;
1741 : }
1742 158683070 : HASHinsert_locked(&bi, updid, new);
1743 : }
1744 8352 : if (locked) {
1745 8342 : if (b->thash)
1746 0 : nunique = b->thash->nunique;
1747 8342 : MT_rwlock_wrunlock(&b->thashlock);
1748 8342 : locked = false;
1749 : }
1750 : }
1751 25110 : bat_iterator_end(&ni);
1752 25111 : MT_lock_set(&b->theaplock);
1753 25111 : if (nunique != 0)
1754 2 : b->tunique_est = (double) nunique;
1755 25111 : b->tminpos = bi.minpos;
1756 25111 : b->tmaxpos = bi.maxpos;
1757 25111 : b->theap->dirty = true;
1758 25111 : MT_lock_unset(&b->theaplock);
1759 25111 : TRC_DEBUG(ALGO,
1760 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1761 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1762 : GDKusec() - t0);
1763 : return GDK_SUCCEED;
1764 :
1765 0 : bailout:
1766 0 : bat_iterator_end(&ni);
1767 0 : if (locked) {
1768 0 : Hash *h = b->thash;
1769 0 : b->thash = NULL;
1770 0 : MT_rwlock_wrunlock(&b->thashlock);
1771 0 : doHASHdestroy(b, h);
1772 : }
1773 : return GDK_FAIL;
1774 : }
1775 :
1776 : /* replace values from b at locations specified in p with values in n */
1777 : gdk_return
1778 120825 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1779 : {
1780 120825 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1781 : }
1782 :
1783 : /* like BATreplace, but p may specify locations beyond the end of b */
1784 : gdk_return
1785 923 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1786 : {
1787 923 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1788 : }
1789 :
1790 : #if 0 /* not used */
1791 : /* like BATreplace, but the positions are given by an array of oid values */
1792 : gdk_return
1793 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1794 : {
1795 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1796 : }
1797 : #endif
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid
1800 : * values, and they may specify locations beyond the end of b */
1801 : gdk_return
1802 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1803 : {
1804 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1805 : }
1806 :
1807 : /*
1808 : * BAT Selections
1809 : * The BAT selectors are among the most heavily used operators.
1810 : * Their efficient implementation is therefore mandatory.
1811 : *
1812 : * BAT slice
1813 : * This function returns a horizontal slice from a BAT. It optimizes
1814 : * execution by avoiding to copy when the BAT is memory mapped (in
1815 : * this case, an independent submap is created) or else when it is
1816 : * read-only, then a VIEW bat is created as a result.
1817 : *
1818 : * If a new copy has to be created, this function takes care to
1819 : * preserve void-columns (in this case, the seqbase has to be
1820 : * recomputed in the result).
1821 : *
1822 : * The selected range is excluding the high value.
1823 : */
1824 : BAT *
1825 10128480 : BATslice(BAT *b, BUN l, BUN h)
1826 : {
1827 10128480 : BUN low = l;
1828 10128480 : BAT *bn = NULL;
1829 :
1830 10128480 : BATcheck(b, NULL);
1831 10128480 : BATiter bi = bat_iterator(b);
1832 10129207 : if (l > bi.count)
1833 : l = bi.count;
1834 10129207 : if (h > bi.count)
1835 : h = bi.count;
1836 10129207 : if (h < l)
1837 : h = l;
1838 :
1839 10129207 : if (complex_cand(b)) {
1840 : /* slicing a candidate list with exceptions */
1841 78 : struct canditer ci;
1842 78 : canditer_init(&ci, NULL, b);
1843 78 : if (b->hseqbase + l >= ci.hseq) {
1844 78 : l = b->hseqbase + l - ci.hseq;
1845 78 : h = b->hseqbase + h - ci.hseq;
1846 : } else {
1847 0 : l = 0;
1848 0 : if (b->hseqbase + h >= ci.hseq)
1849 0 : h = b->hseqbase + h - ci.hseq;
1850 : else
1851 : h = 0;
1852 : }
1853 78 : bn = canditer_slice(&ci, l, h);
1854 78 : goto doreturn;
1855 : }
1856 : /* If the source BAT is readonly, then we can obtain a VIEW
1857 : * that just reuses the memory of the source. */
1858 10129129 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1859 : /* forget about slices for bit masks: we can't deal
1860 : * with difference in alignment, so we'll just make a
1861 : * copy */
1862 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1863 : /* we use BATappend with a candidate list to easily
1864 : * copy the part of b that we need */
1865 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1866 0 : if (bn == NULL ||
1867 0 : s == NULL ||
1868 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1869 0 : BBPreclaim(bn);
1870 0 : BBPreclaim(s);
1871 0 : bn = NULL;
1872 0 : goto doreturn;
1873 : }
1874 0 : BBPunfix(s->batCacheid);
1875 0 : goto doreturn;
1876 : }
1877 10129129 : restrict_t prestricted;
1878 11251453 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1879 1122532 : BAT *pb = BBP_desc(VIEWtparent(b));
1880 1122532 : MT_lock_set(&pb->theaplock);
1881 1121942 : prestricted = pb->batRestricted;
1882 1121942 : MT_lock_unset(&pb->theaplock);
1883 : } else {
1884 : prestricted = BAT_WRITE; /* just initialize with anything */
1885 : }
1886 10128921 : if (bi.restricted == BAT_READ &&
1887 10087904 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1888 10087906 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1889 10087906 : if (bn == NULL)
1890 : goto doreturn;
1891 : } else {
1892 : /* create a new BAT and put everything into it */
1893 41015 : BUN p = l;
1894 41015 : BUN q = h;
1895 :
1896 41015 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1897 41015 : if (bn == NULL)
1898 0 : goto doreturn;
1899 :
1900 41015 : if (bn->ttype == TYPE_void) {
1901 24222 : BATsetcount(bn, h - l);
1902 24222 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1903 16793 : } else if (bn->tvheap == NULL) {
1904 10364 : assert(BATatoms[bn->ttype].atomPut == NULL);
1905 10364 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1906 10364 : (q - p) << bn->tshift);
1907 10364 : bn->theap->dirty = true;
1908 10364 : BATsetcount(bn, h - l);
1909 : } else {
1910 1659951 : for (; p < q; p++) {
1911 1653520 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1912 0 : BBPreclaim(bn);
1913 0 : bn = NULL;
1914 0 : goto doreturn;
1915 : }
1916 : }
1917 : }
1918 41017 : bn->theap->dirty = true;
1919 41017 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1920 41017 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1921 41017 : bn->tkey = bi.key || bn->batCount <= 1;
1922 41017 : bn->tnonil = bi.nonil;
1923 41017 : bn->tnil = false; /* we don't know */
1924 41017 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1925 1742 : bn->tnosorted = bi.nosorted - l;
1926 : else
1927 39275 : bn->tnosorted = 0;
1928 41017 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1929 4009 : bn->tnorevsorted = bi.norevsorted - l;
1930 : else
1931 37008 : bn->tnorevsorted = 0;
1932 41017 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1933 34299 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1934 408 : bi.nokey[0] != bi.nokey[1] &&
1935 : !bn->tkey) {
1936 408 : bn->tnokey[0] = bi.nokey[0] - l;
1937 408 : bn->tnokey[1] = bi.nokey[1] - l;
1938 : } else {
1939 40609 : bn->tnokey[0] = bn->tnokey[1] = 0;
1940 : }
1941 : }
1942 10128721 : doreturn:
1943 10128721 : bat_iterator_end(&bi);
1944 10128900 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1945 : ALGOOPTBATFMT "\n",
1946 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1947 : return bn;
1948 : }
1949 :
1950 : #define BAT_ORDERED(TPE) \
1951 : do { \
1952 : const TPE *restrict vals = Tloc(b, 0); \
1953 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1954 : if (vals[p - 1] > vals[p]) { \
1955 : b->tnosorted = p; \
1956 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1957 : goto doreturn; \
1958 : } else if (vals[p - 1] < vals[p]) { \
1959 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1960 : b->tnorevsorted = p; \
1961 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1962 : } \
1963 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1964 : b->tnokey[0] = p - 1; \
1965 : b->tnokey[1] = p; \
1966 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1967 : } \
1968 : } \
1969 : } while (0)
1970 :
1971 : #define BAT_ORDERED_FP(TPE) \
1972 : do { \
1973 : const TPE *restrict vals = Tloc(b, 0); \
1974 : TPE prev = vals[0]; \
1975 : bool prevnil = is_##TPE##_nil(prev); \
1976 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1977 : TPE next = vals[p]; \
1978 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1979 : prev = next; \
1980 : if (cmp > 0) { \
1981 : b->tnosorted = bi.nosorted = p; \
1982 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1983 : goto doreturn; \
1984 : } else if (cmp < 0) { \
1985 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1986 : b->tnorevsorted = bi.norevsorted = p; \
1987 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1988 : } \
1989 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1990 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1991 : b->tnokey[1] = bi.nokey[1] = p; \
1992 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1993 : } \
1994 : } \
1995 : } while (0)
1996 :
1997 : /* Return whether the BAT is ordered or not. If we don't know, invest
1998 : * in a scan and record the results in the bat descriptor. If during
1999 : * the scan we happen to find evidence that the BAT is not reverse
2000 : * sorted, we record the location. */
2001 : bool
2002 2947083 : BATordered(BAT *b)
2003 : {
2004 2947083 : lng t0 = GDKusec();
2005 2947070 : bool sorted;
2006 :
2007 2947070 : MT_lock_set(&b->theaplock);
2008 2947057 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2009 451298 : MT_lock_unset(&b->theaplock);
2010 451288 : return true;
2011 : }
2012 2495759 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2013 2082628 : MT_lock_unset(&b->theaplock);
2014 2082610 : return false;
2015 : }
2016 :
2017 : /* There are a few reasons why we need a lock here. It may be
2018 : * that multiple threads call this functions at the same time
2019 : * (happens a lot with mitosis/mergetable), but we only need to
2020 : * scan the bat in one thread: the others can reap the rewards
2021 : * when that one thread is done. Also, we need the heap to
2022 : * remain accessible (could have used bat_iterator for that),
2023 : * and, and this is the killer argument, we may need to make
2024 : * changes to the bat descriptor. */
2025 413131 : BATiter bi = bat_iterator_nolock(b);
2026 413131 : if (!b->tsorted && b->tnosorted == 0) {
2027 760482 : switch (ATOMbasetype(b->ttype)) {
2028 62945 : case TYPE_bte:
2029 128464477 : BAT_ORDERED(bte);
2030 : break;
2031 7593 : case TYPE_sht:
2032 1590262 : BAT_ORDERED(sht);
2033 : break;
2034 296363 : case TYPE_int:
2035 119775792 : BAT_ORDERED(int);
2036 : break;
2037 7436 : case TYPE_lng:
2038 63782843 : BAT_ORDERED(lng);
2039 : break;
2040 : #ifdef HAVE_HGE
2041 376 : case TYPE_hge:
2042 8002679 : BAT_ORDERED(hge);
2043 : break;
2044 : #endif
2045 963 : case TYPE_flt:
2046 8007194 : BAT_ORDERED_FP(flt);
2047 : break;
2048 736 : case TYPE_dbl:
2049 8195533 : BAT_ORDERED_FP(dbl);
2050 : break;
2051 : case TYPE_str:
2052 21075285 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2053 21061336 : int c;
2054 21061336 : const char *p1 = BUNtvar(bi, p - 1);
2055 21061336 : const char *p2 = BUNtvar(bi, p);
2056 21061336 : if (p1 == p2)
2057 : c = 0;
2058 2208231 : else if (p1[0] == '\200') {
2059 1739 : if (p2[0] == '\200')
2060 : c = 0;
2061 : else
2062 : c = -1;
2063 2206492 : } else if (p2[0] == '\200')
2064 : c = 1;
2065 : else
2066 2205375 : c = strcmp(p1, p2);
2067 2205375 : if (c > 0) {
2068 22584 : b->tnosorted = bi.nosorted = p;
2069 22584 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2070 22584 : goto doreturn;
2071 21038752 : } else if (c < 0) {
2072 2185597 : assert(!b->trevsorted);
2073 2185597 : if (b->tnorevsorted == 0) {
2074 9672 : b->tnorevsorted = bi.norevsorted = p;
2075 9672 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2076 : }
2077 18853155 : } else if (b->tnokey[1] == 0) {
2078 3187 : assert(!b->tkey);
2079 3187 : b->tnokey[0] = bi.nokey[0] = p - 1;
2080 3187 : b->tnokey[1] = bi.nokey[1] = p;
2081 21038752 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2082 : }
2083 : }
2084 : break;
2085 186 : default: {
2086 186 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2087 2482 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 2437 : int c;
2089 2437 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2090 141 : b->tnosorted = bi.nosorted = p;
2091 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2092 141 : goto doreturn;
2093 2296 : } else if (c < 0) {
2094 2246 : if (!b->trevsorted && b->tnorevsorted == 0) {
2095 90 : b->tnorevsorted = bi.norevsorted = p;
2096 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2097 : }
2098 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2099 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2100 8 : b->tnokey[1] = bi.nokey[1] = p;
2101 2296 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2102 : }
2103 : }
2104 : break;
2105 : }
2106 : }
2107 : /* we only get here if we completed the scan; note that
2108 : * if we didn't record evidence about *reverse*
2109 : * sortedness, we know that the BAT is also reverse
2110 : * sorted; similarly, if we didn't record evidence about
2111 : * keyness, we know the BAT is key */
2112 110108 : b->tsorted = bi.sorted = true;
2113 110108 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2114 110100 : if (!b->trevsorted && b->tnorevsorted == 0) {
2115 45711 : b->trevsorted = bi.revsorted = true;
2116 45711 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2117 : }
2118 110100 : if (!b->tkey && b->tnokey[1] == 0) {
2119 44276 : b->tkey = bi.key = true;
2120 44276 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 : }
2123 110100 : doreturn:
2124 413123 : sorted = b->tsorted;
2125 413123 : bat pbid = VIEWtparent(b);
2126 413123 : MT_lock_unset(&b->theaplock);
2127 413131 : if (pbid) {
2128 234715 : BAT *pb = BBP_desc(pbid);
2129 234715 : MT_lock_set(&pb->theaplock);
2130 234715 : if (bi.count == BATcount(pb) &&
2131 196490 : bi.h == pb->theap &&
2132 196490 : bi.type == pb->ttype) {
2133 : /* add to knowledge in parent bat */
2134 196483 : pb->tsorted |= bi.sorted;
2135 196483 : if (pb->tnosorted == 0)
2136 196483 : pb->tnosorted = bi.nosorted;
2137 196483 : pb->trevsorted |= bi.revsorted;
2138 196483 : if (pb->tnorevsorted == 0)
2139 27995 : pb->tnorevsorted = bi.norevsorted;
2140 196483 : pb->tkey |= bi.key;
2141 196483 : if (pb->tnokey[1] == 0) {
2142 173647 : pb->tnokey[0] = bi.nokey[0];
2143 173647 : pb->tnokey[1] = bi.nokey[1];
2144 : }
2145 : }
2146 234715 : MT_lock_unset(&pb->theaplock);
2147 : }
2148 : return sorted;
2149 : }
2150 :
2151 : #define BAT_REVORDERED(TPE) \
2152 : do { \
2153 : const TPE *restrict vals = Tloc(b, 0); \
2154 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2155 : if (vals[p - 1] < vals[p]) { \
2156 : b->tnorevsorted = p; \
2157 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2158 : goto doreturn; \
2159 : } \
2160 : } \
2161 : } while (0)
2162 :
2163 : #define BAT_REVORDERED_FP(TPE) \
2164 : do { \
2165 : const TPE *restrict vals = Tloc(b, 0); \
2166 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2167 : TPE prev = vals[p - 1], next = vals[p]; \
2168 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2169 : if (cmp < 0) { \
2170 : b->tnorevsorted = bi.norevsorted = p; \
2171 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2172 : goto doreturn; \
2173 : } \
2174 : } \
2175 : } while (0)
2176 :
2177 : /* Return whether the BAT is reverse ordered or not. If we don't
2178 : * know, invest in a scan and record the results in the bat
2179 : * descriptor. */
2180 : bool
2181 2738695 : BATordered_rev(BAT *b)
2182 : {
2183 2738695 : lng t0 = GDKusec();
2184 2738718 : bool revsorted;
2185 :
2186 2738718 : if (b == NULL || !ATOMlinear(b->ttype))
2187 : return false;
2188 2738692 : MT_lock_set(&b->theaplock);
2189 2738702 : if (BATcount(b) <= 1 || b->trevsorted) {
2190 204852 : MT_lock_unset(&b->theaplock);
2191 204851 : return true;
2192 : }
2193 2533850 : if (b->ttype == TYPE_void) {
2194 17120 : MT_lock_unset(&b->theaplock);
2195 17119 : return is_oid_nil(b->tseqbase);
2196 : }
2197 2516730 : if (BATtdense(b) || b->tnorevsorted > 0) {
2198 2438131 : MT_lock_unset(&b->theaplock);
2199 2438100 : return false;
2200 : }
2201 78599 : BATiter bi = bat_iterator_nolock(b);
2202 78599 : if (!b->trevsorted && b->tnorevsorted == 0) {
2203 123205 : switch (ATOMbasetype(b->ttype)) {
2204 32551 : case TYPE_bte:
2205 9704998 : BAT_REVORDERED(bte);
2206 : break;
2207 3686 : case TYPE_sht:
2208 3142452 : BAT_REVORDERED(sht);
2209 : break;
2210 24200 : case TYPE_int:
2211 1600562 : BAT_REVORDERED(int);
2212 : break;
2213 3854 : case TYPE_lng:
2214 2842565 : BAT_REVORDERED(lng);
2215 : break;
2216 : #ifdef HAVE_HGE
2217 135 : case TYPE_hge:
2218 1048 : BAT_REVORDERED(hge);
2219 : break;
2220 : #endif
2221 512 : case TYPE_flt:
2222 1994 : BAT_REVORDERED_FP(flt);
2223 : break;
2224 282 : case TYPE_dbl:
2225 500614 : BAT_REVORDERED_FP(dbl);
2226 : break;
2227 13379 : default: {
2228 13379 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2229 505821 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2230 502589 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2231 10148 : b->tnorevsorted = p;
2232 10148 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2233 10148 : goto doreturn;
2234 : }
2235 : }
2236 : break;
2237 : }
2238 : }
2239 18845 : b->trevsorted = bi.revsorted = true;
2240 18845 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2241 : }
2242 18845 : doreturn:
2243 78600 : revsorted = b->trevsorted;
2244 78600 : bat pbid = VIEWtparent(b);
2245 78600 : MT_lock_unset(&b->theaplock);
2246 78602 : if (pbid) {
2247 19763 : BAT *pb = BBP_desc(pbid);
2248 19763 : MT_lock_set(&pb->theaplock);
2249 19763 : if (bi.count == BATcount(pb) &&
2250 4371 : bi.h == pb->theap &&
2251 4371 : bi.type == pb->ttype) {
2252 : /* add to knowledge in parent bat */
2253 4371 : pb->trevsorted |= bi.revsorted;
2254 4371 : if (pb->tnorevsorted == 0)
2255 4371 : pb->tnorevsorted = bi.norevsorted;
2256 : }
2257 19763 : MT_lock_unset(&pb->theaplock);
2258 : }
2259 : return revsorted;
2260 : }
2261 :
2262 : /* figure out which sort function is to be called
2263 : * stable sort can produce an error (not enough memory available),
2264 : * "quick" sort does not produce errors */
2265 : static gdk_return
2266 3953083 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2267 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2268 : bool stable)
2269 : {
2270 3953083 : if (n <= 1) /* trivially sorted */
2271 : return GDK_SUCCEED;
2272 2754059 : switch (tpe) {
2273 2706234 : case TYPE_bte:
2274 : case TYPE_sht:
2275 : case TYPE_int:
2276 : case TYPE_lng:
2277 : #ifdef HAVE_HGE
2278 : case TYPE_hge:
2279 : #endif
2280 : case TYPE_date:
2281 : case TYPE_daytime:
2282 : case TYPE_timestamp:
2283 2706234 : assert(base == NULL);
2284 2706234 : if (nilslast == reverse && (stable || n > 100))
2285 20326 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2286 : break;
2287 4 : case TYPE_uuid:
2288 4 : assert(base == NULL);
2289 4 : if (nilslast == reverse && (stable || n > 100))
2290 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2291 : break;
2292 : default:
2293 : break;
2294 : }
2295 47914 : if (stable) {
2296 27 : if (reverse)
2297 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2298 : else
2299 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2300 : } else {
2301 2733705 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2302 : }
2303 2733705 : return GDK_SUCCEED;
2304 : }
2305 :
2306 : /* Sort the bat b according to both o and g. The stable and reverse
2307 : * parameters indicate whether the sort should be stable or descending
2308 : * respectively. The parameter b is required, o and g are optional
2309 : * (i.e., they may be NULL).
2310 : *
2311 : * A sorted copy is returned through the sorted parameter, the new
2312 : * ordering is returned through the order parameter, group information
2313 : * is returned through the groups parameter. All three output
2314 : * parameters may be NULL. If they're all NULL, this function does
2315 : * nothing.
2316 : *
2317 : * If o is specified, it is used to first rearrange b according to the
2318 : * order specified in o, after which b is sorted taking g into
2319 : * account.
2320 : *
2321 : * If g is specified, it indicates groups which should be individually
2322 : * ordered. Each row of consecutive equal values in g indicates a
2323 : * group which is sorted according to stable and reverse. g is used
2324 : * after the order in b was rearranged according to o.
2325 : *
2326 : * The outputs order and groups can be used in subsequent calls to
2327 : * this function. This can be used if multiple BATs need to be sorted
2328 : * together. The BATs should then be sorted in order of significance,
2329 : * and each following call should use the original unordered BAT plus
2330 : * the order and groups bat from the previous call. In this case, the
2331 : * sorted BATs are not of much use, so the sorted output parameter
2332 : * does not need to be specified.
2333 : * Apart from error checking and maintaining reference counts, sorting
2334 : * three columns (col1, col2, col3) could look like this with the
2335 : * sorted results in (col1s, col2s, col3s):
2336 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2337 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2338 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2339 : * Note that the "reverse" parameter can be different for each call.
2340 : */
2341 : gdk_return
2342 29649 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2343 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2344 : {
2345 29649 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2346 29649 : BATiter pbi;
2347 29649 : oid *restrict grps, *restrict ords, prev;
2348 29649 : BUN p, q, r;
2349 29649 : lng t0 = GDKusec();
2350 29649 : bool mkorderidx, orderidxlock = false;
2351 29649 : Heap *oidxh = NULL;
2352 :
2353 : /* we haven't implemented NILs as largest value for stable
2354 : * sort, so NILs come first for ascending and last for
2355 : * descending */
2356 29649 : assert(!stable || reverse == nilslast);
2357 :
2358 29649 : if (b == NULL) {
2359 0 : GDKerror("b must exist\n");
2360 0 : return GDK_FAIL;
2361 : }
2362 29649 : if (stable && reverse != nilslast) {
2363 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2364 0 : return GDK_FAIL;
2365 : }
2366 29649 : if (!ATOMlinear(b->ttype)) {
2367 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2368 0 : return GDK_FAIL;
2369 : }
2370 29649 : MT_lock_set(&b->theaplock);
2371 29649 : if (b->ttype == TYPE_void) {
2372 115 : b->tsorted = true;
2373 168 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2374 0 : b->trevsorted = !b->trevsorted;
2375 : }
2376 230 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2377 0 : b->tkey = !b->tkey;
2378 : }
2379 29534 : } else if (b->batCount <= 1) {
2380 9610 : if (!b->tsorted || !b->trevsorted) {
2381 23 : b->tsorted = b->trevsorted = true;
2382 : }
2383 : }
2384 29649 : MT_lock_unset(&b->theaplock);
2385 29649 : if (o != NULL &&
2386 16104 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2387 16104 : BATcount(o) != BATcount(b) || /* same size as b */
2388 6374 : (o->ttype == TYPE_void && /* no nil tail */
2389 2434 : BATcount(o) != 0 &&
2390 2434 : is_oid_nil(o->tseqbase)))) {
2391 0 : GDKerror("o must have type oid and same size as b\n");
2392 0 : return GDK_FAIL;
2393 : }
2394 29649 : if (g != NULL &&
2395 16104 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2396 16104 : !g->tsorted || /* sorted */
2397 16104 : BATcount(g) != BATcount(b) || /* same size as b */
2398 6637 : (g->ttype == TYPE_void && /* no nil tail */
2399 2697 : BATcount(g) != 0 &&
2400 2697 : is_oid_nil(g->tseqbase)))) {
2401 0 : GDKerror("g must have type oid, sorted on the tail, "
2402 : "and same size as b\n");
2403 0 : return GDK_FAIL;
2404 : }
2405 29649 : if (sorted == NULL && order == NULL) {
2406 : /* no place to put result, so we're done quickly */
2407 0 : GDKerror("no place to put the result.\n");
2408 0 : return GDK_FAIL;
2409 : }
2410 29649 : if (g == NULL && !stable) {
2411 : /* pre-ordering doesn't make sense if we're not
2412 : * subsorting and the sort is not stable */
2413 13263 : o = NULL;
2414 : }
2415 29649 : if (b->tnonil) {
2416 : /* if there are no nils, placement of nils doesn't
2417 : * matter, so set nilslast such that ordered bits can
2418 : * be used */
2419 21653 : nilslast = reverse;
2420 : }
2421 29649 : pbi = bat_iterator(NULL);
2422 30613 : if (BATcount(b) <= 1 ||
2423 19880 : (reverse == nilslast &&
2424 : (reverse ? b->trevsorted : b->tsorted) &&
2425 5053 : o == NULL && g == NULL &&
2426 2324 : (groups == NULL || BATtkey(b) ||
2427 : (reverse ? b->tsorted : b->trevsorted)))) {
2428 : /* trivially (sub)sorted, and either we don't need to
2429 : * return group information, or we can trivially
2430 : * deduce the groups */
2431 11547 : if (sorted) {
2432 10113 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2433 10113 : if (bn == NULL)
2434 0 : goto error;
2435 10113 : *sorted = bn;
2436 : }
2437 11547 : if (order) {
2438 10656 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2439 10656 : if (on == NULL)
2440 0 : goto error;
2441 10656 : *order = on;
2442 : }
2443 11547 : if (groups) {
2444 5961 : if (BATtkey(b)) {
2445 : /* singleton groups */
2446 5548 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2447 5548 : if (gn == NULL)
2448 0 : goto error;
2449 : } else {
2450 : /* single group */
2451 413 : const oid *o = 0;
2452 413 : assert(BATcount(b) == 1 ||
2453 : (b->tsorted && b->trevsorted));
2454 413 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2455 413 : if (gn == NULL)
2456 0 : goto error;
2457 : }
2458 5961 : *groups = gn;
2459 : }
2460 11547 : bat_iterator_end(&pbi);
2461 11547 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2462 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2463 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2464 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2465 : ALGOOPTBATFMT " -- trivial (" LLFMT
2466 : " usec)\n",
2467 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2468 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2469 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2470 : ALGOOPTBATPAR(on), GDKusec() - t0);
2471 11547 : return GDK_SUCCEED;
2472 : }
2473 18102 : if (VIEWtparent(b)) {
2474 3509 : pb = BATdescriptor(VIEWtparent(b));
2475 3509 : if (pb != NULL &&
2476 3509 : (b->tbaseoff != pb->tbaseoff ||
2477 2790 : BATcount(b) != BATcount(pb) ||
2478 2576 : b->hseqbase != pb->hseqbase ||
2479 2567 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2480 942 : BBPunfix(pb->batCacheid);
2481 942 : pb = NULL;
2482 : }
2483 : } else {
2484 : pb = b;
2485 : }
2486 18102 : bat_iterator_end(&pbi);
2487 18102 : pbi = bat_iterator(pb);
2488 : /* when we will create an order index if it doesn't already exist */
2489 18102 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2490 18102 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2491 5950 : (void) BATcheckorderidx(pb);
2492 5950 : MT_lock_set(&pb->batIdxLock);
2493 5950 : if (pb->torderidx) {
2494 59 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2495 : /* we can use the order index */
2496 59 : oidxh = pb->torderidx;
2497 59 : HEAPincref(oidxh);
2498 : }
2499 : mkorderidx = false;
2500 5891 : } else if (b != pb) {
2501 : /* don't build orderidx on parent bat */
2502 : mkorderidx = false;
2503 4985 : } else if (mkorderidx) {
2504 : /* keep lock when going to create */
2505 4372 : orderidxlock = true;
2506 : }
2507 5950 : if (!orderidxlock)
2508 1578 : MT_lock_unset(&pb->batIdxLock);
2509 : }
2510 18102 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2511 : /* there is an order index that we can use */
2512 59 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2513 59 : if (on == NULL)
2514 0 : goto error;
2515 59 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2516 59 : BATsetcount(on, BATcount(b));
2517 59 : HEAPdecref(oidxh, false);
2518 59 : oidxh = NULL;
2519 59 : on->tkey = true;
2520 59 : on->tnil = false;
2521 59 : on->tnonil = true;
2522 59 : on->tsorted = on->trevsorted = false;
2523 59 : on->tseqbase = oid_nil;
2524 59 : if (sorted || groups) {
2525 59 : bn = BATproject(on, b);
2526 59 : if (bn == NULL)
2527 0 : goto error;
2528 59 : bn->tsorted = true;
2529 59 : if (groups) {
2530 4 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2531 0 : goto error;
2532 4 : if (sorted &&
2533 4 : (*groups)->tkey &&
2534 : g == NULL) {
2535 : /* if new groups bat is key
2536 : * and since there is no input
2537 : * groups bat, we know the
2538 : * result bat is key */
2539 4 : bn->tkey = true;
2540 : }
2541 : }
2542 59 : if (sorted)
2543 59 : *sorted = bn;
2544 : else {
2545 0 : BBPunfix(bn->batCacheid);
2546 0 : bn = NULL;
2547 : }
2548 : }
2549 59 : if (order)
2550 9 : *order = on;
2551 : else {
2552 50 : BBPunfix(on->batCacheid);
2553 50 : on = NULL;
2554 : }
2555 59 : bat_iterator_end(&pbi);
2556 59 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2557 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2558 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2559 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2560 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2561 : " usec)\n",
2562 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2563 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2564 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2565 : ALGOOPTBATPAR(on), GDKusec() - t0);
2566 59 : if (pb != NULL && pb != b)
2567 53 : BBPunfix(pb->batCacheid);
2568 59 : return GDK_SUCCEED;
2569 11654 : } else if (oidxh) {
2570 0 : HEAPdecref(oidxh, false);
2571 0 : oidxh = NULL;
2572 : }
2573 18043 : if (o) {
2574 11085 : bn = BATproject(o, b);
2575 11085 : if (bn == NULL)
2576 0 : goto error;
2577 11085 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2578 4840 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2579 2420 : BBPunfix(bn->batCacheid);
2580 2420 : bn = b2;
2581 : }
2582 11085 : if (pb) {
2583 10662 : bat_iterator_end(&pbi);
2584 10662 : if (pb != b)
2585 1270 : BBPunfix(pb->batCacheid);
2586 10662 : pbi = bat_iterator(NULL);
2587 10662 : pb = NULL;
2588 : }
2589 : } else {
2590 6958 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2591 : }
2592 18043 : if (bn == NULL)
2593 0 : goto error;
2594 18043 : if (order) {
2595 : /* prepare order bat */
2596 15261 : if (o) {
2597 : /* make copy of input so that we can refine it;
2598 : * copy can be read-only if we take the shortcut
2599 : * below in the case g is "key" */
2600 9443 : on = COLcopy(o, TYPE_oid,
2601 9443 : g == NULL ||
2602 9443 : !(g->tkey || g->ttype == TYPE_void),
2603 : TRANSIENT);
2604 9443 : if (on == NULL)
2605 0 : goto error;
2606 9443 : BAThseqbase(on, b->hseqbase);
2607 9443 : on->tminpos = BUN_NONE;
2608 9443 : on->tmaxpos = BUN_NONE;
2609 : } else {
2610 : /* create new order */
2611 5818 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2612 5818 : if (on == NULL)
2613 0 : goto error;
2614 5818 : ords = (oid *) Tloc(on, 0);
2615 31453833 : for (p = 0, q = BATcount(bn); p < q; p++)
2616 31448015 : ords[p] = p + b->hseqbase;
2617 5818 : BATsetcount(on, BATcount(bn));
2618 5818 : on->tkey = true;
2619 5818 : on->tnil = false;
2620 5818 : on->tnonil = true;
2621 : }
2622 : /* COLcopy above can create TYPE_void */
2623 15261 : if (on->ttype != TYPE_void) {
2624 14513 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2625 14513 : on->tseqbase = oid_nil; /* and hence not dense */
2626 14513 : on->tnosorted = on->tnorevsorted = 0;
2627 : }
2628 15261 : *order = on;
2629 15261 : ords = (oid *) Tloc(on, 0);
2630 : } else {
2631 : ords = NULL;
2632 : }
2633 18043 : if (g) {
2634 11085 : if (g->tkey || g->ttype == TYPE_void) {
2635 : /* if g is "key", all groups are size 1, so no
2636 : * subsorting needed */
2637 4985 : if (sorted) {
2638 4597 : *sorted = bn;
2639 : } else {
2640 388 : BBPunfix(bn->batCacheid);
2641 388 : bn = NULL;
2642 : }
2643 4985 : if (order) {
2644 3814 : *order = on;
2645 3814 : if (o) {
2646 : /* we can inherit sortedness
2647 : * after all */
2648 3814 : on->tsorted = o->tsorted;
2649 3814 : on->trevsorted = o->trevsorted;
2650 3814 : if (o->tnosorted)
2651 56 : on->tnosorted = o->tnosorted;
2652 3814 : if (o->tnorevsorted)
2653 74 : on->tnorevsorted = o->tnorevsorted;
2654 : } else {
2655 : /* we didn't rearrange, so
2656 : * still sorted */
2657 0 : on->tsorted = true;
2658 0 : on->trevsorted = false;
2659 : }
2660 3814 : if (BATcount(on) <= 1) {
2661 0 : on->tsorted = true;
2662 0 : on->trevsorted = true;
2663 : }
2664 : }
2665 4985 : if (groups) {
2666 2881 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2667 2881 : if (gn == NULL)
2668 0 : goto error;
2669 2881 : *groups = gn;
2670 : }
2671 4985 : bat_iterator_end(&pbi);
2672 4985 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2673 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2674 : ",reverse=%d,nilslast=%d,stable=%d"
2675 : ") = (" ALGOOPTBATFMT ","
2676 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2677 : " -- key group (" LLFMT " usec)\n",
2678 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2679 : ALGOBATPAR(g), reverse, nilslast,
2680 : stable, ALGOOPTBATPAR(bn),
2681 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2682 : GDKusec() - t0);
2683 4985 : if (pb != NULL && pb != b)
2684 0 : BBPunfix(pb->batCacheid);
2685 4985 : return GDK_SUCCEED;
2686 : }
2687 6100 : assert(g->ttype == TYPE_oid);
2688 6100 : grps = (oid *) Tloc(g, 0);
2689 6100 : prev = grps[0];
2690 6100 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2691 0 : goto error;
2692 45641516 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2693 45635416 : if (grps[p] != prev) {
2694 : /* sub sort [r,p) */
2695 7581036 : if (do_sort(Tloc(bn, r),
2696 3640429 : ords ? ords + r : NULL,
2697 3940607 : bn->tvheap ? bn->tvheap->base : NULL,
2698 3940607 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2699 3940607 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2700 0 : goto error;
2701 3940607 : r = p;
2702 3940607 : prev = grps[p];
2703 : }
2704 : }
2705 : /* sub sort [r,q) */
2706 11729 : if (do_sort(Tloc(bn, r),
2707 5629 : ords ? ords + r : NULL,
2708 6100 : bn->tvheap ? bn->tvheap->base : NULL,
2709 6100 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2710 6100 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2711 0 : goto error;
2712 : /* if single group (r==0) the result is (rev)sorted,
2713 : * otherwise (maybe) not */
2714 6100 : bn->tsorted = r == 0 && !reverse && !nilslast;
2715 12171 : bn->trevsorted = r == 0 && reverse && nilslast;
2716 : } else {
2717 6958 : Heap *m = NULL;
2718 : /* only invest in creating an order index if the BAT
2719 : * is persistent */
2720 6958 : if (mkorderidx) {
2721 4372 : assert(orderidxlock);
2722 4372 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2723 : ords == NULL) {
2724 0 : ords = (oid *) m->base + ORDERIDXOFF;
2725 0 : if (o && o->ttype != TYPE_void)
2726 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2727 0 : else if (o)
2728 0 : for (p = 0, q = BATcount(o); p < q; p++)
2729 0 : ords[p] = p + o->tseqbase;
2730 : else
2731 0 : for (p = 0, q = BATcount(b); p < q; p++)
2732 0 : ords[p] = p + b->hseqbase;
2733 : }
2734 : }
2735 6958 : if ((reverse != nilslast ||
2736 13241 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2737 12752 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2738 6375 : do_sort(Tloc(bn, 0),
2739 : ords,
2740 6375 : bn->tvheap ? bn->tvheap->base : NULL,
2741 6375 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2742 6375 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2743 0 : if (m != NULL) {
2744 0 : HEAPfree(m, true);
2745 0 : GDKfree(m);
2746 : }
2747 0 : goto error;
2748 : }
2749 6958 : bn->tsorted = !reverse && !nilslast;
2750 6958 : bn->trevsorted = reverse && nilslast;
2751 6958 : if (m != NULL) {
2752 4372 : assert(orderidxlock);
2753 4372 : if (pb->torderidx == NULL) {
2754 4372 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2755 4372 : memcpy((oid *) m->base + ORDERIDXOFF,
2756 : ords,
2757 4372 : pbi.count * sizeof(oid));
2758 : }
2759 4372 : pb->torderidx = m;
2760 4372 : persistOIDX(pb);
2761 : } else {
2762 0 : HEAPfree(m, true);
2763 0 : GDKfree(m);
2764 : }
2765 : }
2766 : }
2767 13058 : if (orderidxlock) {
2768 4372 : MT_lock_unset(&pb->batIdxLock);
2769 4372 : orderidxlock = false;
2770 : }
2771 13058 : bn->theap->dirty = true;
2772 13058 : bn->tnosorted = 0;
2773 13058 : bn->tnorevsorted = 0;
2774 13058 : bn->tnokey[0] = bn->tnokey[1] = 0;
2775 13058 : bn->tminpos = BUN_NONE;
2776 13058 : bn->tmaxpos = BUN_NONE;
2777 13058 : if (groups) {
2778 7933 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2779 0 : goto error;
2780 7932 : if ((*groups)->tkey &&
2781 906 : (g == NULL || (g->tsorted && g->trevsorted))) {
2782 : /* if new groups bat is key and the input
2783 : * group bat has a single value (both sorted
2784 : * and revsorted), we know the result bat is
2785 : * key */
2786 1126 : bn->tkey = true;
2787 : }
2788 : }
2789 :
2790 13057 : bat_iterator_end(&pbi);
2791 13058 : if (sorted)
2792 9869 : *sorted = bn;
2793 : else {
2794 3189 : BBPunfix(bn->batCacheid);
2795 3189 : bn = NULL;
2796 : }
2797 :
2798 13058 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2799 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2800 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2801 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2802 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2803 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2804 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2805 : g ? "grouped " : "", GDKusec() - t0);
2806 13058 : if (pb && pb != b)
2807 1244 : BBPunfix(pb->batCacheid);
2808 : return GDK_SUCCEED;
2809 :
2810 0 : error:
2811 0 : bat_iterator_end(&pbi);
2812 0 : if (orderidxlock)
2813 0 : MT_lock_unset(&pb->batIdxLock);
2814 0 : if (oidxh)
2815 0 : HEAPdecref(oidxh, false);
2816 0 : BBPreclaim(bn);
2817 0 : if (pb && pb != b)
2818 0 : BBPunfix(pb->batCacheid);
2819 0 : BBPreclaim(on);
2820 0 : if (sorted)
2821 0 : *sorted = NULL;
2822 0 : if (order)
2823 0 : *order = NULL;
2824 0 : if (groups)
2825 0 : *groups = NULL;
2826 : return GDK_FAIL;
2827 : }
2828 :
2829 : /* return a new BAT of length n with seqbase hseq, and the constant v
2830 : * in the tail */
2831 : BAT *
2832 1074560 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2833 : {
2834 1074560 : BAT *bn;
2835 1074560 : void *restrict p;
2836 1074560 : BUN i;
2837 1074560 : lng t0 = 0;
2838 :
2839 1074560 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2840 1074560 : if (v == NULL)
2841 : return NULL;
2842 1074560 : bn = COLnew(hseq, tailtype, n, role);
2843 1074419 : if (bn != NULL && n > 0) {
2844 74590 : p = Tloc(bn, 0);
2845 74590 : switch (ATOMstorage(tailtype)) {
2846 24 : case TYPE_void:
2847 24 : v = &oid_nil;
2848 24 : BATtseqbase(bn, oid_nil);
2849 24 : break;
2850 0 : case TYPE_msk:
2851 0 : if (*(msk*)v) {
2852 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2853 0 : if (n & 31) {
2854 0 : uint32_t *m = p;
2855 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2856 : }
2857 : } else
2858 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2859 : break;
2860 10321 : case TYPE_bte:
2861 10321 : memset(p, *(bte*)v, n);
2862 10321 : break;
2863 : case TYPE_sht:
2864 7158621 : for (i = 0; i < n; i++)
2865 7142848 : ((sht *) p)[i] = *(sht *) v;
2866 : break;
2867 : case TYPE_int:
2868 : case TYPE_flt:
2869 : assert(sizeof(int) == sizeof(flt));
2870 217263555 : for (i = 0; i < n; i++)
2871 217258066 : ((int *) p)[i] = *(int *) v;
2872 : break;
2873 : case TYPE_lng:
2874 : case TYPE_dbl:
2875 : assert(sizeof(lng) == sizeof(dbl));
2876 235004377 : for (i = 0; i < n; i++)
2877 234977055 : ((lng *) p)[i] = *(lng *) v;
2878 : break;
2879 : #ifdef HAVE_HGE
2880 : case TYPE_hge:
2881 25697851 : for (i = 0; i < n; i++)
2882 25696640 : ((hge *) p)[i] = *(hge *) v;
2883 : break;
2884 : #endif
2885 : case TYPE_uuid:
2886 200047 : for (i = 0; i < n; i++)
2887 200038 : ((uuid *) p)[i] = *(uuid *) v;
2888 : break;
2889 14299 : case TYPE_str:
2890 : /* insert the first value, then just copy the
2891 : * offset lots of times */
2892 14299 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2893 0 : BBPreclaim(bn);
2894 0 : return NULL;
2895 : }
2896 14300 : char val[sizeof(var_t)];
2897 14300 : memcpy(val, Tloc(bn, 0), bn->twidth);
2898 14300 : if (bn->twidth == 1 && n > 1) {
2899 : /* single byte value: we have a
2900 : * function for that */
2901 7441 : memset(Tloc(bn, 1), val[0], n - 1);
2902 : } else {
2903 6859 : char *p = Tloc(bn, 0);
2904 6879 : for (i = 1; i < n; i++) {
2905 20 : p += bn->twidth;
2906 20 : memcpy(p, val, bn->twidth);
2907 : }
2908 : }
2909 : break;
2910 : default:
2911 333705 : for (i = 0; i < n; i++)
2912 333563 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2913 0 : BBPreclaim(bn);
2914 0 : return NULL;
2915 : }
2916 : break;
2917 : }
2918 74591 : bn->theap->dirty = true;
2919 74591 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2920 74591 : BATsetcount(bn, n);
2921 74586 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2922 74586 : bn->tnonil = !bn->tnil;
2923 74586 : bn->tkey = BATcount(bn) <= 1;
2924 : }
2925 1074415 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2926 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2927 : return bn;
2928 : }
2929 :
2930 : /*
2931 : * BAT Aggregates
2932 : *
2933 : * We retain the size() and card() aggregate results in the column
2934 : * descriptor. We would like to have such functionality in an
2935 : * extensible way for many aggregates, for DD (1) we do not want to
2936 : * change the binary BAT format on disk and (2) aggr and size are the
2937 : * most relevant aggregates.
2938 : *
2939 : * It is all hacked into the aggr[3] records; three adjacent integers
2940 : * that were left over in the column record. We refer to these as if
2941 : * it where an int aggr[3] array. The below routines set and retrieve
2942 : * the aggregate values from the tail of the BAT, as many
2943 : * aggregate-manipulating BAT functions work on tail.
2944 : *
2945 : * The rules are as follows: aggr[0] contains the alignment ID of the
2946 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2947 : * equal to b->talign, the precomputed aggregate values in
2948 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2949 : * of them may be set at the time. This is encoded by the value
2950 : * int_nil, which cannot occur in these two aggregates.
2951 : *
2952 : * This was now extended to record the property whether we know there
2953 : * is a nil value present by mis-using the highest bits of both
2954 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2955 : */
2956 :
2957 : void
2958 39283980 : PROPdestroy_nolock(BAT *b)
2959 : {
2960 39283980 : PROPrec *p = b->tprops;
2961 39283980 : PROPrec *n;
2962 :
2963 39283980 : b->tprops = NULL;
2964 39288001 : while (p) {
2965 4232 : n = p->next;
2966 4232 : assert(p->id != (enum prop_t) 20);
2967 4232 : VALclear(&p->v);
2968 4232 : GDKfree(p);
2969 4232 : p = n;
2970 : }
2971 39283769 : }
2972 :
2973 : void
2974 432 : PROPdestroy(BAT *b)
2975 : {
2976 432 : MT_lock_set(&b->theaplock);
2977 432 : PROPdestroy_nolock(b);
2978 432 : MT_lock_unset(&b->theaplock);
2979 432 : }
2980 :
2981 : ValPtr
2982 195095962 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2983 : {
2984 195095962 : PROPrec *p;
2985 :
2986 195095962 : p = b->tprops;
2987 195102113 : while (p && p->id != idx)
2988 6151 : p = p->next;
2989 195095962 : return p ? &p->v : NULL;
2990 : }
2991 :
2992 : void
2993 410490 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2994 : {
2995 410490 : PROPrec *prop = b->tprops, *prev = NULL;
2996 :
2997 410725 : while (prop) {
2998 410598 : if (prop->id == idx) {
2999 410363 : if (prev)
3000 106 : prev->next = prop->next;
3001 : else
3002 410257 : b->tprops = prop->next;
3003 410363 : VALclear(&prop->v);
3004 410363 : GDKfree(prop);
3005 410363 : return;
3006 : }
3007 235 : prev = prop;
3008 235 : prop = prop->next;
3009 : }
3010 : }
3011 :
3012 : ValPtr
3013 414634 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3014 : {
3015 414634 : PROPrec *p;
3016 :
3017 414634 : p = b->tprops;
3018 421571 : while (p && p->id != idx)
3019 6937 : p = p->next;
3020 414634 : if (p == NULL) {
3021 414627 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3022 : /* properties are hints, so if we can't create
3023 : * one we ignore the error */
3024 0 : GDKclrerr();
3025 0 : return NULL;
3026 : }
3027 414627 : p->id = idx;
3028 414627 : p->next = b->tprops;
3029 414627 : p->v.vtype = 0;
3030 414627 : b->tprops = p;
3031 : } else {
3032 7 : VALclear(&p->v);
3033 : }
3034 414634 : if (VALinit(&p->v, type, v) == NULL) {
3035 : /* failed to initialize, so remove property */
3036 0 : BATrmprop_nolock(b, idx);
3037 0 : GDKclrerr();
3038 0 : p = NULL;
3039 : }
3040 0 : return p ? &p->v : NULL;
3041 : }
3042 :
3043 : ValPtr
3044 2918906 : BATgetprop(BAT *b, enum prop_t idx)
3045 : {
3046 2918906 : ValPtr p;
3047 :
3048 2918906 : MT_lock_set(&b->theaplock);
3049 2918753 : p = BATgetprop_nolock(b, idx);
3050 2918805 : MT_lock_unset(&b->theaplock);
3051 2918825 : return p;
3052 : }
3053 :
3054 : ValPtr
3055 4042 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3056 : {
3057 4042 : ValPtr p;
3058 4042 : MT_lock_set(&b->theaplock);
3059 4042 : p = BATsetprop_nolock(b, idx, type, v);
3060 4042 : MT_lock_unset(&b->theaplock);
3061 4042 : return p;
3062 : }
3063 :
3064 : void
3065 2 : BATrmprop(BAT *b, enum prop_t idx)
3066 : {
3067 2 : MT_lock_set(&b->theaplock);
3068 2 : BATrmprop_nolock(b, idx);
3069 2 : MT_lock_unset(&b->theaplock);
3070 2 : }
3071 :
3072 : /*
3073 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3074 : * non-nil tail value.
3075 : * This function does not fail (the callers currently don't check for failure).
3076 : */
3077 : BUN
3078 2203 : BATcount_no_nil(BAT *b, BAT *s)
3079 : {
3080 2203 : BUN cnt = 0;
3081 2203 : const void *restrict p, *restrict nil;
3082 2203 : const char *restrict base;
3083 2203 : int t;
3084 2203 : int (*cmp)(const void *, const void *);
3085 2203 : struct canditer ci;
3086 2203 : oid hseq;
3087 :
3088 2203 : BATcheck(b, 0);
3089 :
3090 2203 : hseq = b->hseqbase;
3091 2203 : canditer_init(&ci, b, s);
3092 2202 : BATiter bi = bat_iterator(b);
3093 2203 : if (bi.nonil) {
3094 1999 : bat_iterator_end(&bi);
3095 1999 : return ci.ncand;
3096 : }
3097 204 : p = bi.base;
3098 204 : t = ATOMbasetype(bi.type);
3099 204 : switch (t) {
3100 0 : case TYPE_void:
3101 0 : cnt = ci.ncand * BATtdensebi(&bi);
3102 0 : break;
3103 0 : case TYPE_msk:
3104 0 : cnt = ci.ncand;
3105 0 : break;
3106 15 : case TYPE_bte:
3107 31 : CAND_LOOP(&ci)
3108 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3109 : break;
3110 0 : case TYPE_sht:
3111 0 : CAND_LOOP(&ci)
3112 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3113 : break;
3114 79 : case TYPE_int:
3115 3497971 : CAND_LOOP(&ci)
3116 3497892 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3117 : break;
3118 85 : case TYPE_lng:
3119 156873 : CAND_LOOP(&ci)
3120 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3121 : break;
3122 : #ifdef HAVE_HGE
3123 0 : case TYPE_hge:
3124 0 : CAND_LOOP(&ci)
3125 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3126 : break;
3127 : #endif
3128 0 : case TYPE_flt:
3129 0 : CAND_LOOP(&ci)
3130 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 0 : case TYPE_dbl:
3133 0 : CAND_LOOP(&ci)
3134 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3135 : break;
3136 0 : case TYPE_uuid:
3137 0 : CAND_LOOP(&ci)
3138 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3139 : break;
3140 25 : case TYPE_str:
3141 25 : base = bi.vh->base;
3142 25 : switch (bi.width) {
3143 23 : case 1:
3144 4805 : CAND_LOOP(&ci)
3145 4782 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3146 : break;
3147 1 : case 2:
3148 144 : CAND_LOOP(&ci)
3149 143 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3150 : break;
3151 1 : case 4:
3152 168 : CAND_LOOP(&ci)
3153 167 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3154 : break;
3155 : #if SIZEOF_VAR_T == 8
3156 0 : case 8:
3157 0 : CAND_LOOP(&ci)
3158 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3159 : break;
3160 : #endif
3161 : default:
3162 0 : MT_UNREACHABLE();
3163 : }
3164 : break;
3165 0 : default:
3166 0 : nil = ATOMnilptr(t);
3167 0 : cmp = ATOMcompare(t);
3168 0 : if (nil == NULL) {
3169 0 : cnt = ci.ncand;
3170 0 : } else if (b->tvheap) {
3171 0 : base = b->tvheap->base;
3172 0 : CAND_LOOP(&ci)
3173 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3174 : } else {
3175 0 : CAND_LOOP(&ci)
3176 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3177 : }
3178 : break;
3179 : }
3180 204 : if (cnt == bi.count) {
3181 24 : MT_lock_set(&b->theaplock);
3182 24 : if (cnt == BATcount(b) && bi.h == b->theap) {
3183 : /* we learned something */
3184 24 : b->tnonil = true;
3185 24 : assert(!b->tnil);
3186 24 : b->tnil = false;
3187 : }
3188 24 : bat pbid = VIEWtparent(b);
3189 24 : MT_lock_unset(&b->theaplock);
3190 24 : if (pbid) {
3191 15 : BAT *pb = BATdescriptor(pbid);
3192 15 : if (pb) {
3193 15 : MT_lock_set(&pb->theaplock);
3194 15 : if (cnt == BATcount(pb) &&
3195 0 : bi.h == pb->theap &&
3196 0 : !pb->tnonil) {
3197 0 : pb->tnonil = true;
3198 0 : assert(!pb->tnil);
3199 0 : pb->tnil = false;
3200 : }
3201 15 : MT_lock_unset(&pb->theaplock);
3202 15 : BBPunfix(pb->batCacheid);
3203 : }
3204 : }
3205 : }
3206 204 : bat_iterator_end(&bi);
3207 204 : return cnt;
3208 : }
|