Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 55149644 : unshare_varsized_heap(BAT *b)
27 : {
28 55149644 : if (ATOMvarsized(b->ttype) &&
29 22188545 : b->tvheap->parentid != b->batCacheid) {
30 1021 : Heap *h = GDKmalloc(sizeof(Heap));
31 1027 : if (h == NULL)
32 : return GDK_FAIL;
33 1027 : MT_thread_setalgorithm("unshare vheap");
34 2052 : *h = (Heap) {
35 1024 : .parentid = b->batCacheid,
36 1028 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 1024 : strconcat_len(h->filename, sizeof(h->filename),
40 1024 : BBP_physical(b->batCacheid), ".theap", NULL);
41 1026 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 1025 : MT_lock_set(&b->theaplock);
47 1024 : Heap *oh = b->tvheap;
48 1024 : b->tvheap = h;
49 1024 : MT_lock_unset(&b->theaplock);
50 1027 : BBPrelease(oh->parentid);
51 1028 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 85724 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 85724 : size_t toff = ~(size_t) 0; /* tail offset */
66 85724 : BUN p, r; /* loop variables */
67 85724 : const void *tp = NULL; /* tail value pointer */
68 85724 : var_t v;
69 85724 : size_t off; /* offset within n's string heap */
70 85724 : BUN cnt = ci->ncand;
71 85724 : BUN oldcnt = BATcount(b);
72 :
73 85724 : assert(b->ttype == TYPE_str);
74 85724 : assert(b->tbaseoff == 0);
75 85724 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 85724 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 85724 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 85718 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 20421 : toff = 0;
85 20421 : MT_thread_setalgorithm("shared vheap");
86 65297 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 51064 : MT_lock_set(&b->theaplock);
90 51220 : bat bid = b->tvheap->parentid;
91 51220 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 51363 : HEAPincref(ni->vh);
93 51348 : b->tvheap = ni->vh;
94 51348 : b->tascii = ni->ascii;
95 51348 : MT_lock_unset(&b->theaplock);
96 51376 : BBPretain(ni->vh->parentid);
97 51325 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 51325 : toff = 0;
100 51325 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 15260 : if (b->tvheap->parentid != b->batCacheid &&
106 1024 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 14236 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 91 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5855869 : for (int i = 0; i < 1024; i++) {
120 5850090 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5850179 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5850179 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5779 : len = (len + 512) / 1024; /* rounded average length */
125 5779 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5779 : BUN ecnt = ci->ncand;
129 5779 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 55 : ecnt = (BUN)ni->b->tunique_est;
131 5779 : if (r < ecnt)
132 930 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4849 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5779 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5771 : if (oldcnt == 0) {
140 5735 : toff = 0;
141 5735 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 36 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 36 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5771 : MT_lock_set(&b->theaplock);
148 5770 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5766 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5766 : b->tvheap->free = toff + ni->vhfree;
154 5766 : b->tvheap->dirty = true;
155 5766 : b->tascii &= ni->ascii;
156 5766 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 77515 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 77464 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 86018 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 85930 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 77204 : MT_thread_setalgorithm("memcpy offsets");
177 77339 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 8726 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 183 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 183 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 183 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 183 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 183 : MT_thread_setalgorithm("copy offset values");
197 183 : r = b->batCount;
198 3527364 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3526802 : p = canditer_next(ci) - ni->b->hseqbase;
200 3526802 : switch (ni->width) {
201 106 : case 1:
202 106 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 106 : break;
204 4638 : case 2:
205 4638 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 4638 : break;
207 3522058 : case 4:
208 3522058 : v = (var_t) tip[p];
209 3522058 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3526802 : v = (var_t) ((size_t) v + toff);
219 3526802 : assert(v >= GDK_VAROFFSET);
220 3526802 : assert((size_t) v < b->tvheap->free);
221 3526802 : switch (b->twidth) {
222 4640 : case 1:
223 4640 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 4640 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 4640 : break;
226 761 : case 2:
227 761 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 761 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 761 : break;
230 3521401 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3521401 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3521401 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3521401 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3526802 : MT_UNREACHABLE();
243 : }
244 : }
245 8543 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 8489 : r = b->batCount;
253 8489 : oid hseq = ni->b->hseqbase;
254 8489 : MT_thread_setalgorithm("insert string values");
255 10760809 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 10743370 : p = canditer_next(ci) - hseq;
257 10648498 : tp = BUNtvar(*ni, p);
258 10648498 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 10743421 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 54 : r = b->batCount;
271 54 : MT_thread_setalgorithm("insert string values with check");
272 6281 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 6173 : p = canditer_next(ci) - ni->b->hseqbase;
274 6173 : off = BUNtvaroff(*ni, p); /* the offset */
275 6173 : tp = ni->vh->base + off; /* the string */
276 6173 : if (off < b->tvheap->free &&
277 6173 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 2306 : v = (var_t) off;
284 2306 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 2302 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 2302 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 2302 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 2302 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3867 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 6173 : r++;
313 : }
314 : }
315 86124 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 85786 : MT_rwlock_wrlock(&b->thashlock);
317 86184 : MT_lock_set(&b->theaplock);
318 86080 : BATsetcount(b, oldcnt + ci->ncand);
319 86108 : assert(b->batCapacity >= b->batCount);
320 86108 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 89190 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 3022 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 86168 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 86168 : MT_rwlock_wrunlock(&b->thashlock);
327 86183 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 514 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 514 : BUN cnt = ci->ncand, r;
339 514 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 514 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 514 : assert(b->twidth == ni->width);
345 514 : assert(b->twidth == SIZEOF_VAR_T);
346 514 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 515 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 515 : if (mayshare) {
360 515 : MT_lock_set(&b->theaplock);
361 517 : if (BATcount(b) == 0 &&
362 233 : b->batRole == TRANSIENT &&
363 148 : ni->restricted == BAT_READ &&
364 148 : b->tvheap != ni->vh) {
365 : /* if b is still empty, in the transient farm,
366 : * and n is read-only, we replace b's vheap with
367 : * a reference to n's */
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : BBPretain(ni->vh->parentid);
373 148 : if (bid != b->batCacheid)
374 0 : BBPrelease(bid);
375 : }
376 517 : MT_lock_unset(&b->theaplock);
377 : }
378 517 : if (b->tvheap == ni->vh) {
379 : /* if b and n use the same vheap, we only need to copy
380 : * the offsets from n to b */
381 342 : if (ci->tpe == cand_dense) {
382 : /* fast memcpy since we copy a consecutive
383 : * chunk of memory */
384 342 : memcpy(Tloc(b, BATcount(b)),
385 342 : (const var_t *) ni->base + (ci->seq - hseq),
386 342 : cnt << b->tshift);
387 : } else {
388 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
389 0 : const var_t *restrict src = (const var_t *) ni->base;
390 0 : while (cnt > 0) {
391 0 : cnt--;
392 0 : *dst++ = src[canditer_next(ci) - hseq];
393 : }
394 : }
395 342 : MT_rwlock_wrlock(&b->thashlock);
396 342 : MT_lock_set(&b->theaplock);
397 342 : BATsetcount(b, BATcount(b) + ci->ncand);
398 342 : MT_lock_unset(&b->theaplock);
399 : /* maintain hash table */
400 342 : for (BUN i = BATcount(b) - ci->ncand;
401 342 : b->thash && i < BATcount(b);
402 0 : i++) {
403 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
404 : }
405 342 : BUN nunique = b->thash ? b->thash->nunique : 0;
406 342 : MT_rwlock_wrunlock(&b->thashlock);
407 342 : if (nunique != 0) {
408 0 : MT_lock_set(&b->theaplock);
409 0 : b->tunique_est = (double) nunique;
410 0 : MT_lock_unset(&b->theaplock);
411 : }
412 342 : return GDK_SUCCEED;
413 : }
414 : /* b and n do not share their vheap, so we need to copy data */
415 175 : if (b->tvheap->parentid != b->batCacheid) {
416 : /* if b shares its vheap with some other bat, unshare it */
417 18 : Heap *h = GDKmalloc(sizeof(Heap));
418 18 : if (h == NULL) {
419 : return GDK_FAIL;
420 : }
421 36 : *h = (Heap) {
422 18 : .parentid = b->batCacheid,
423 18 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
424 : .refs = ATOMIC_VAR_INIT(1),
425 : };
426 18 : strconcat_len(h->filename, sizeof(h->filename),
427 18 : BBP_physical(b->batCacheid), ".theap", NULL);
428 18 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
429 0 : HEAPfree(h, true);
430 0 : GDKfree(h);
431 0 : return GDK_FAIL;
432 : }
433 18 : MT_lock_set(&b->theaplock);
434 18 : Heap *oh = b->tvheap;
435 18 : b->tvheap = h;
436 18 : MT_lock_unset(&b->theaplock);
437 18 : if (oh->parentid != b->batCacheid)
438 18 : BBPrelease(oh->parentid);
439 18 : HEAPdecref(oh, false);
440 : }
441 175 : if (BATcount(b) == 0 &&
442 83 : ci->tpe == cand_dense && ci->ncand == ni->count) {
443 : /* just copy the heaps */
444 85 : MT_lock_set(&b->theaplock);
445 84 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
446 0 : MT_lock_unset(&b->theaplock);
447 0 : return GDK_FAIL;
448 : }
449 83 : memcpy(b->theap->base, ni->base, ni->hfree);
450 83 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
451 83 : b->theap->free = ni->hfree;
452 83 : b->theap->dirty = true;
453 83 : b->tvheap->free = ni->vhfree;
454 83 : b->tvheap->dirty = true;
455 83 : BATsetcount(b, ni->count);
456 83 : b->tnil = ni->nil;
457 83 : b->tnonil = ni->nonil;
458 83 : b->tsorted = ni->sorted;
459 83 : b->tnosorted = ni->nosorted;
460 83 : b->trevsorted = ni->revsorted;
461 83 : b->tnorevsorted = ni->norevsorted;
462 83 : b->tkey = ni->key;
463 83 : b->tnokey[0] = ni->nokey[0];
464 83 : b->tnokey[1] = ni->nokey[1];
465 83 : b->tminpos = ni->minpos;
466 83 : b->tmaxpos = ni->maxpos;
467 83 : b->tunique_est = ni->unique_est;
468 83 : MT_lock_unset(&b->theaplock);
469 85 : return GDK_SUCCEED;
470 : }
471 : /* copy data from n to b */
472 : r = BATcount(b);
473 380595 : for (BUN i = 0; i < cnt; i++) {
474 380505 : BUN p = canditer_next(ci) - hseq;
475 380504 : const void *t = BUNtvar(*ni, p);
476 380504 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
477 : return GDK_FAIL;
478 : }
479 380505 : r++;
480 : }
481 90 : MT_rwlock_wrlock(&b->thashlock);
482 90 : if (b->thash) {
483 0 : r -= cnt;
484 0 : BATiter bi = bat_iterator_nolock(b);
485 0 : for (BUN i = 0; i < cnt; i++) {
486 0 : const void *t = BUNtvar(bi, r);
487 0 : HASHappend_locked(b, r, t);
488 0 : r++;
489 : }
490 : }
491 90 : BUN nunique = b->thash ? b->thash->nunique : 0;
492 90 : MT_lock_set(&b->theaplock);
493 90 : BATsetcount(b, r);
494 89 : if (nunique != 0)
495 0 : b->tunique_est = (double) nunique;
496 89 : MT_lock_unset(&b->theaplock);
497 89 : MT_rwlock_wrunlock(&b->thashlock);
498 89 : return GDK_SUCCEED;
499 : }
500 :
501 : static gdk_return
502 138 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
503 : {
504 138 : if (ci->ncand == 0)
505 : return GDK_SUCCEED;
506 138 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
507 : return GDK_FAIL;
508 :
509 138 : MT_lock_set(&b->theaplock);
510 :
511 138 : uint32_t boff = b->batCount % 32;
512 138 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
513 138 : b->batCount += ci->ncand;
514 138 : if (ci->tpe == cand_dense) {
515 138 : const uint32_t *np;
516 138 : uint32_t noff, mask;
517 138 : BUN cnt;
518 138 : noff = (ci->seq - ni->b->hseqbase) % 32;
519 138 : cnt = ci->ncand;
520 138 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
521 138 : if (boff == noff) {
522 : /* words of b and n are aligned, so we don't
523 : * need to shift bits around */
524 56 : if (boff + cnt <= 32) {
525 : /* all new bits within one word */
526 50 : if (cnt == 32) {
527 0 : *bp = *np;
528 : } else {
529 50 : mask = ((1U << cnt) - 1) << boff;
530 50 : *bp &= ~mask;
531 50 : *bp |= *np & mask;
532 : }
533 : } else {
534 : /* multiple words of b are affected */
535 6 : if (boff != 0) {
536 : /* first fill up the rest of the first
537 : * word */
538 0 : mask = ~0U << boff;
539 0 : *bp &= ~mask;
540 0 : *bp++ |= *np++ & mask;
541 0 : cnt -= 32 - boff;
542 : }
543 6 : if (cnt >= 32) {
544 : /* copy an integral number of words fast */
545 6 : BUN nw = cnt / 32;
546 6 : memcpy(bp, np, nw*sizeof(int));
547 6 : bp += nw;
548 6 : np += nw;
549 6 : cnt %= 32;
550 : }
551 6 : if (cnt > 0) {
552 : /* do the left over bits */
553 6 : mask = (1U << cnt) - 1;
554 6 : *bp = *np & mask;
555 : }
556 : }
557 82 : } else if (boff > noff) {
558 82 : if (boff + cnt <= 32) {
559 : /* we only need to copy bits from a
560 : * single word of n to a single word
561 : * of b */
562 : /* boff > 0, so cnt < 32, hence the
563 : * shift is ok */
564 74 : mask = (1U << cnt) - 1;
565 74 : *bp &= ~(mask << boff);
566 74 : *bp |= (*np & (mask << noff)) << (boff - noff);
567 : } else {
568 : /* first fill the rest of the last partial
569 : * word of b, so that's 32-boff bits */
570 8 : mask = (1U << (32 - boff)) - 1;
571 8 : *bp &= ~(mask << boff);
572 8 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
573 8 : cnt -= 32 - boff;
574 :
575 : /* set boff and noff to the amount we need to
576 : * shift bits in consecutive words of n around
577 : * to fit into the next word of b; set mask to
578 : * the mask of the bottom bits of n that fit
579 : * in a word of b (and the complement are the
580 : * top bits that go to another word of b) */
581 8 : boff -= noff;
582 8 : noff = 32 - boff;
583 8 : mask = (1U << noff) - 1;
584 140 : while (cnt >= 32) {
585 132 : *bp = (*np++ & ~mask) >> noff;
586 132 : *bp++ |= (*np & mask) << boff;
587 132 : cnt -= 32;
588 : }
589 8 : if (cnt > noff) {
590 : /* the last bits come from two words
591 : * in n */
592 4 : *bp = (*np++ & ~mask) >> noff;
593 4 : cnt -= noff;
594 4 : mask = (1U << cnt) - 1;
595 4 : *bp++ |= (*np & mask) << boff;
596 4 : } else if (cnt > 0) {
597 : /* the last bits come from a single
598 : * word in n */
599 4 : mask = ((1U << cnt) - 1) << noff;
600 4 : *bp = (*np & mask) >> noff;
601 : }
602 : }
603 : } else {
604 : /* boff < noff */
605 0 : if (noff + cnt <= 32) {
606 : /* only need part of the first word of n */
607 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
608 0 : mask = (1U << cnt) - 1;
609 0 : *bp &= ~(mask << boff);
610 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
611 0 : } else if (boff + cnt <= 32) {
612 : /* only need to fill a single word of
613 : * b, but from two of n */
614 0 : if (cnt < 32)
615 0 : *bp &= ~(((1U << cnt) - 1) << boff);
616 : else
617 0 : *bp = 0;
618 0 : mask = ~((1U << noff) - 1);
619 0 : *bp |= (*np++ & mask) >> (noff - boff);
620 0 : cnt -= 32 - noff;
621 0 : mask = (1U << cnt) - 1;
622 0 : *bp |= (*np & mask) << (32 - noff);
623 : } else {
624 0 : if (boff > 0) {
625 : /* fill the rest of the first word of b */
626 0 : cnt -= 32 - boff;
627 0 : *bp &= (1U << boff) - 1;
628 0 : mask = ~((1U << noff) - 1);
629 0 : noff -= boff;
630 0 : boff = 32 - noff;
631 0 : *bp |= (*np++ & mask) >> noff;
632 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
633 : } else {
634 0 : boff = 32 - noff;
635 : }
636 0 : mask = (1U << noff) - 1;
637 0 : while (cnt >= 32) {
638 0 : *bp = (*np++ & ~mask) >> noff;
639 0 : *bp++ |= (*np & mask) << boff;
640 0 : cnt -= 32;
641 : }
642 0 : if (cnt > 0) {
643 0 : *bp = (*np++ & ~mask) >> noff;
644 0 : if (cnt > noff)
645 0 : *bp++ |= (*np & mask) << boff;
646 : }
647 : }
648 : }
649 : } else {
650 0 : oid o;
651 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
652 0 : do {
653 0 : for (uint32_t i = boff; i < 32; i++) {
654 0 : o = canditer_next(ci);
655 0 : if (is_oid_nil(o))
656 : break;
657 0 : o -= ni->b->hseqbase;
658 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
659 : }
660 0 : *bp++ = v;
661 0 : v = 0;
662 0 : boff = 0;
663 0 : } while (!is_oid_nil(o));
664 : }
665 138 : b->theap->dirty = true;
666 138 : b->theap->free = ((b->batCount + 31) / 32) * 4;
667 138 : MT_lock_unset(&b->theaplock);
668 138 : return GDK_SUCCEED;
669 : }
670 :
671 : /* Append the contents of BAT n (subject to the optional candidate
672 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
673 : * was passed in, and else the seqbase of n. */
674 : static gdk_return
675 2262299 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
676 : {
677 2262299 : struct canditer ci;
678 2262299 : BUN r;
679 2262299 : oid hseq = n->hseqbase;
680 2262299 : char buf[64];
681 2262299 : lng t0 = 0;
682 2262299 : const ValRecord *prop = NULL;
683 2262299 : ValRecord minprop, maxprop;
684 2262299 : const void *minbound = NULL, *maxbound = NULL;
685 2262299 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
686 2262299 : bool hlocked = false;
687 :
688 2262299 : if (b == NULL || n == NULL || BATcount(n) == 0) {
689 : return GDK_SUCCEED;
690 : }
691 1317016 : assert(b->theap->parentid == b->batCacheid);
692 :
693 1317016 : TRC_DEBUG_IF(ALGO) {
694 0 : t0 = GDKusec();
695 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
696 : }
697 :
698 1317016 : ALIGNapp(b, force, GDK_FAIL);
699 :
700 3698389 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
701 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
702 0 : return GDK_FAIL;
703 : }
704 :
705 1319967 : if (BATttype(b) != BATttype(n) &&
706 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
707 0 : TRC_DEBUG(CHECK, "Interpreting %s as %s.\n",
708 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
709 : }
710 :
711 1319967 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
712 :
713 1319937 : BATiter ni = bat_iterator(n);
714 :
715 1320686 : canditer_init(&ci, n, s);
716 1319038 : if (ci.ncand == 0) {
717 0 : goto doreturn;
718 : }
719 :
720 1319038 : if (BATcount(b) + ci.ncand > BUN_MAX) {
721 0 : bat_iterator_end(&ni);
722 0 : GDKerror("combined BATs too large\n");
723 0 : return GDK_FAIL;
724 : }
725 :
726 1319038 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
727 0 : bat_iterator_end(&ni);
728 0 : GDKerror("overflow of head value\n");
729 0 : return GDK_FAIL;
730 : }
731 :
732 1319038 : OIDXdestroy(b);
733 1321441 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
734 1321410 : RTREEdestroy(b);
735 :
736 1319452 : MT_lock_set(&b->theaplock);
737 1320050 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
738 1320171 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
739 48 : VALcopy(&minprop, prop) != NULL) {
740 48 : minbound = VALptr(&minprop);
741 48 : if (ci.ncand == BATcount(n) &&
742 62 : ni.minpos != BUN_NONE &&
743 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
744 0 : assert(0);
745 : GDKerror("value out of bounds\n");
746 : MT_lock_unset(&b->theaplock);
747 : goto bailout;
748 : }
749 : }
750 1319823 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
751 40 : VALcopy(&maxprop, prop) != NULL) {
752 40 : maxbound = VALptr(&maxprop);
753 40 : if (ci.ncand == BATcount(n) &&
754 52 : ni.maxpos != BUN_NONE &&
755 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
756 0 : assert(0);
757 : GDKerror("value out of bounds\n");
758 : MT_lock_unset(&b->theaplock);
759 : goto bailout;
760 : }
761 : }
762 :
763 1318657 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
764 400215 : if (ni.maxpos != BUN_NONE) {
765 153646 : BATiter bi = bat_iterator_nolock(b);
766 153646 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
767 147734 : if (s == NULL) {
768 147594 : b->tmaxpos = BATcount(b) + ni.maxpos;
769 : } else {
770 140 : b->tmaxpos = BUN_NONE;
771 : }
772 : }
773 : } else {
774 246569 : b->tmaxpos = BUN_NONE;
775 : }
776 : }
777 1318667 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
778 399461 : if (ni.minpos != BUN_NONE) {
779 153048 : BATiter bi = bat_iterator_nolock(b);
780 153048 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
781 146025 : if (s == NULL) {
782 145888 : b->tminpos = BATcount(b) + ni.minpos;
783 : } else {
784 137 : b->tminpos = BUN_NONE;
785 : }
786 : }
787 : } else {
788 246413 : b->tminpos = BUN_NONE;
789 : }
790 : }
791 1318664 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
792 1316876 : b->tunique_est = 0;
793 : }
794 1318664 : MT_lock_unset(&b->theaplock);
795 : /* load hash so that we can maintain it */
796 1321524 : (void) BATcheckhash(b);
797 :
798 1319514 : if (b->ttype == TYPE_void) {
799 : /* b does not have storage, keep it that way if we can */
800 130976 : HASHdestroy(b); /* we're not maintaining the hash here */
801 130960 : MT_lock_set(&b->theaplock);
802 130967 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
803 128393 : (BATcount(b) == 0 ||
804 111444 : (BATtdense(b) &&
805 111444 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
806 : /* n is also dense and consecutive with b */
807 128334 : if (BATcount(b) == 0) {
808 16948 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
809 0 : assert(0);
810 : GDKerror("value not within bounds\n");
811 : MT_lock_unset(&b->theaplock);
812 : goto bailout;
813 : }
814 16948 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
815 : }
816 128311 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
817 0 : assert(0);
818 : GDKerror("value not within bounds\n");
819 : MT_lock_unset(&b->theaplock);
820 : goto bailout;
821 : }
822 128311 : BATsetcount(b, BATcount(b) + ci.ncand);
823 128288 : MT_lock_unset(&b->theaplock);
824 128324 : goto doreturn;
825 : }
826 2633 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
827 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
828 : /* both b and n are void/nil */
829 0 : if (notnull) {
830 0 : assert(0);
831 : GDKerror("NULL value not within bounds\n");
832 : MT_lock_unset(&b->theaplock);
833 : goto bailout;
834 : }
835 0 : BATtseqbase(b, oid_nil);
836 0 : BATsetcount(b, BATcount(b) + ci.ncand);
837 0 : MT_lock_unset(&b->theaplock);
838 0 : goto doreturn;
839 : }
840 : /* we need to materialize b; allocate enough capacity */
841 2633 : MT_lock_unset(&b->theaplock);
842 2633 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
843 0 : goto bailout;
844 : }
845 : }
846 :
847 : /* property setting */
848 1191171 : MT_lock_set(&b->theaplock);
849 1191636 : r = BATcount(b);
850 :
851 1191636 : if (BATcount(b) == 0) {
852 374032 : b->tsorted = ni.sorted;
853 374032 : b->trevsorted = ni.revsorted;
854 374032 : b->tseqbase = oid_nil;
855 374032 : b->tnonil = ni.nonil;
856 374032 : b->tnil = ni.nil && ci.ncand == BATcount(n);
857 374032 : if (ci.tpe == cand_dense) {
858 373880 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
859 373880 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
860 373880 : if (BATtdensebi(&ni)) {
861 2136 : b->tseqbase = n->tseqbase + ci.seq - hseq;
862 : }
863 : } else {
864 152 : b->tnosorted = 0;
865 152 : b->tnorevsorted = 0;
866 : }
867 374032 : b->tkey = ni.key;
868 374032 : if (ci.ncand == BATcount(n)) {
869 372481 : b->tnokey[0] = ni.nokey[0];
870 372481 : b->tnokey[1] = ni.nokey[1];
871 : } else {
872 1551 : b->tnokey[0] = b->tnokey[1] = 0;
873 : }
874 : } else {
875 817604 : BUN last = r - 1;
876 817604 : BATiter bi = bat_iterator_nolock(b);
877 817604 : int xx = ATOMcmp(b->ttype,
878 : BUNtail(ni, ci.seq - hseq),
879 : BUNtail(bi, last));
880 816733 : if (b->tsorted && (!ni.sorted || xx < 0)) {
881 21963 : b->tsorted = false;
882 21963 : b->tnosorted = 0;
883 21963 : b->tseqbase = oid_nil;
884 : }
885 816733 : if (b->trevsorted &&
886 68786 : (!ni.revsorted || xx > 0)) {
887 18601 : b->trevsorted = false;
888 18601 : b->tnorevsorted = 0;
889 : }
890 816733 : if (b->tkey &&
891 75108 : (!(b->tsorted || b->trevsorted) ||
892 60308 : !ni.key || xx == 0)) {
893 20772 : BATkey(b, false);
894 : }
895 816702 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
896 2804 : (!BATtdensebi(&ni) ||
897 553 : ci.tpe != cand_dense ||
898 553 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
899 2271 : b->tseqbase = oid_nil;
900 : }
901 816702 : b->tnonil &= ni.nonil;
902 1621087 : b->tnil |= ni.nil && ci.ncand == ni.count;
903 : }
904 1190734 : MT_lock_unset(&b->theaplock);
905 1192721 : if (b->ttype == TYPE_str) {
906 86014 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
907 0 : goto bailout;
908 : }
909 1106707 : } else if (ATOMvarsized(b->ttype)) {
910 517 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
911 0 : goto bailout;
912 : }
913 1106190 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
914 : /* no bounds and NOT_NULL property on MSK bats */
915 138 : assert(minbound == NULL && maxbound == NULL && !notnull);
916 138 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
917 0 : goto bailout;
918 : }
919 : } else {
920 1106052 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
921 : /* if needed space exceeds a normal growth
922 : * extend just with what's needed */
923 10655 : BUN ncap = BATcount(b) + ci.ncand;
924 10655 : BUN grows = BATgrows(b);
925 :
926 10643 : if (ncap > grows)
927 : grows = ncap;
928 10643 : if (BATextend(b, grows) != GDK_SUCCEED) {
929 0 : goto bailout;
930 : }
931 : }
932 1106165 : MT_rwlock_wrlock(&b->thashlock);
933 1106750 : hlocked = true;
934 1106750 : if (b->ttype != TYPE_void &&
935 1106471 : ni.type != TYPE_void &&
936 1102462 : ci.tpe == cand_dense) {
937 : /* use fast memcpy if we can */
938 1102392 : memcpy(Tloc(b, BATcount(b)),
939 1102392 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
940 1102392 : ci.ncand << ni.shift);
941 1102405 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
942 4077 : HASHappend_locked(b, r, Tloc(b, r));
943 13 : r++;
944 : }
945 : } else {
946 4358 : const void *atomnil = ATOMnilptr(b->ttype);
947 2603190 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
948 2594453 : BUN p = canditer_next(&ci) - hseq;
949 2590468 : const void *t = BUNtail(ni, p);
950 2595158 : bool isnil = atomcmp(t, atomnil) == 0;
951 2591321 : if (notnull && isnil) {
952 0 : assert(0);
953 : GDKerror("NULL value not within bounds\n");
954 : goto bailout;
955 2591321 : } else if (minbound &&
956 2591321 : !isnil &&
957 0 : atomcmp(t, minbound) < 0) {
958 0 : assert(0);
959 : GDKerror("value not within bounds\n");
960 : goto bailout;
961 2591321 : } else if (maxbound &&
962 0 : !isnil &&
963 0 : atomcmp(t, maxbound) >= 0) {
964 0 : assert(0);
965 : GDKerror("value not within bounds\n");
966 : goto bailout;
967 2591321 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
968 0 : goto bailout;
969 : }
970 2594349 : if (b->thash)
971 0 : HASHappend_locked(b, r, t);
972 2594455 : r++;
973 : }
974 4357 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
975 : }
976 1102684 : BUN nunique;
977 1102684 : nunique = b->thash ? b->thash->nunique : 0;
978 1102684 : MT_lock_set(&b->theaplock);
979 1105288 : BATsetcount(b, b->batCount + ci.ncand);
980 1105226 : if (nunique != 0)
981 5 : b->tunique_est = (double) nunique;
982 1105226 : MT_lock_unset(&b->theaplock);
983 1106722 : assert(hlocked);
984 1106722 : MT_rwlock_wrunlock(&b->thashlock);
985 1106722 : hlocked = false;
986 : }
987 :
988 1321388 : doreturn:
989 1321388 : bat_iterator_end(&ni);
990 1319097 : if (minbound)
991 48 : VALclear(&minprop);
992 1319499 : if (maxbound)
993 40 : VALclear(&maxprop);
994 1319499 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
995 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
996 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
997 : GDKusec() - t0);
998 :
999 : return GDK_SUCCEED;
1000 : bailout:
1001 0 : if (hlocked)
1002 0 : MT_rwlock_wrunlock(&b->thashlock);
1003 0 : if (minbound)
1004 0 : VALclear(&minprop);
1005 0 : if (maxbound)
1006 0 : VALclear(&maxprop);
1007 0 : bat_iterator_end(&ni);
1008 0 : return GDK_FAIL;
1009 : }
1010 :
1011 : gdk_return
1012 2270461 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1013 : {
1014 2270461 : return BATappend2(b, n, s, force, true);
1015 : }
1016 :
1017 : gdk_return
1018 4 : BATdel(BAT *b, BAT *d)
1019 : {
1020 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1021 4 : MT_lock_set(&b->theaplock);
1022 4 : BATiter bi = bat_iterator_nolock(b);
1023 4 : MT_lock_unset(&b->theaplock);
1024 :
1025 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1026 4 : assert(d->tsorted);
1027 4 : assert(d->tkey);
1028 4 : if (BATcount(d) == 0)
1029 : return GDK_SUCCEED;
1030 4 : OIDXdestroy(b);
1031 4 : HASHdestroy(b);
1032 4 : PROPdestroy(b);
1033 4 : STRMPdestroy(b);
1034 4 : RTREEdestroy(b);
1035 4 : if (BATtdense(d)) {
1036 2 : oid o = d->tseqbase;
1037 2 : BUN c = BATcount(d);
1038 :
1039 2 : if (o + c <= b->hseqbase)
1040 : return GDK_SUCCEED;
1041 2 : if (o < b->hseqbase) {
1042 0 : c -= b->hseqbase - o;
1043 0 : o = b->hseqbase;
1044 : }
1045 2 : if (o - b->hseqbase < b->batInserted) {
1046 0 : GDKerror("cannot delete committed values\n");
1047 0 : return GDK_FAIL;
1048 : }
1049 2 : if (o + c > b->hseqbase + BATcount(b))
1050 0 : c = b->hseqbase + BATcount(b) - o;
1051 2 : if (c == 0)
1052 : return GDK_SUCCEED;
1053 2 : if (atmdel) {
1054 0 : BUN p = o - b->hseqbase;
1055 0 : BUN q = p + c;
1056 0 : while (p < q) {
1057 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1058 0 : p++;
1059 : }
1060 : }
1061 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1062 : return GDK_FAIL;
1063 2 : MT_lock_set(&b->theaplock);
1064 2 : if (o + c < b->hseqbase + BATcount(b)) {
1065 0 : o -= b->hseqbase;
1066 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1067 0 : BUN n = BATcount(b) - (o + c);
1068 : /* not very efficient, but first see
1069 : * how much this is used */
1070 0 : for (BUN i = 0; i < n; i++)
1071 0 : mskSetVal(b, o + i,
1072 0 : mskGetVal(b, o + c + i));
1073 : } else {
1074 0 : memmove(Tloc(b, o),
1075 0 : Tloc(b, o + c),
1076 0 : b->twidth * (BATcount(b) - (o + c)));
1077 : }
1078 0 : b->theap->dirty = true;
1079 : // o += b->hseqbase; // if this were to be used again
1080 : }
1081 2 : b->batCount -= c;
1082 : } else {
1083 2 : BATiter di = bat_iterator(d);
1084 2 : const oid *o = (const oid *) di.base;
1085 2 : const oid *s;
1086 2 : BUN c = di.count;
1087 2 : BUN nd = 0;
1088 2 : BUN pos;
1089 2 : char *p = NULL;
1090 :
1091 2 : if (o[c - 1] <= b->hseqbase) {
1092 0 : bat_iterator_end(&di);
1093 0 : return GDK_SUCCEED;
1094 : }
1095 2 : while (*o < b->hseqbase) {
1096 0 : o++;
1097 0 : c--;
1098 : }
1099 2 : if (*o - b->hseqbase < b->batInserted) {
1100 0 : bat_iterator_end(&di);
1101 0 : GDKerror("cannot delete committed values\n");
1102 0 : return GDK_FAIL;
1103 : }
1104 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1105 0 : bat_iterator_end(&di);
1106 0 : return GDK_FAIL;
1107 : }
1108 2 : s = o;
1109 2 : pos = *o - b->hseqbase;
1110 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1111 2 : p = Tloc(b, pos);
1112 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1113 4 : size_t n;
1114 4 : if (atmdel)
1115 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1116 4 : o++;
1117 4 : c--;
1118 4 : nd++;
1119 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1120 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1121 2 : else if ((oid) (o - s) < *o - *s)
1122 2 : n = o[0] - o[-1] - 1;
1123 : else
1124 : n = 0;
1125 4 : if (n > 0) {
1126 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1127 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1128 : /* not very efficient, but
1129 : * first see how much this is
1130 : * used */
1131 0 : for (BUN i = 0; i < n; i++) {
1132 0 : mskSetVal(b, pos + i,
1133 0 : mskGetVal(b, opos + i));
1134 : }
1135 0 : pos += n;
1136 : } else {
1137 2 : n *= b->twidth;
1138 2 : memmove(p,
1139 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1140 : n);
1141 2 : p += n;
1142 : }
1143 : s = o;
1144 : }
1145 : }
1146 2 : bat_iterator_end(&di);
1147 2 : MT_lock_set(&b->theaplock);
1148 2 : b->theap->dirty = true;
1149 2 : b->batCount -= nd;
1150 : }
1151 4 : if (b->batCount <= 1) {
1152 : /* some trivial properties */
1153 2 : b->tkey = true;
1154 2 : b->tsorted = b->trevsorted = true;
1155 2 : if (b->batCount == 0) {
1156 2 : b->tnil = false;
1157 2 : b->tnonil = true;
1158 : }
1159 : }
1160 : /* not sure about these anymore */
1161 4 : b->tnosorted = b->tnorevsorted = 0;
1162 4 : b->tnokey[0] = b->tnokey[1] = 0;
1163 4 : b->tminpos = BUN_NONE;
1164 4 : b->tmaxpos = BUN_NONE;
1165 4 : b->tunique_est = 0.0;
1166 4 : MT_lock_unset(&b->theaplock);
1167 :
1168 4 : return GDK_SUCCEED;
1169 : }
1170 :
1171 : /*
1172 : * Replace all values in b with values from n whose location is given by
1173 : * the oid in either p or positions.
1174 : * If positions is used, autoincr specifies whether it is the first of a
1175 : * dense range of positions or whether it is a full-blown array of
1176 : * position.
1177 : * If mayappend is set, the position in p/positions may refer to
1178 : * locations beyond the end of b.
1179 : */
1180 : static gdk_return
1181 216463 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1182 : bool mayappend, bool autoincr, bool force)
1183 : {
1184 216463 : lng t0 = GDKusec();
1185 216546 : oid pos = oid_nil;
1186 216546 : BUN nunique = 0;
1187 :
1188 216546 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1189 : return GDK_SUCCEED;
1190 : }
1191 : /* either p or positions */
1192 216546 : assert((p == NULL) != (positions == NULL));
1193 216546 : if (p != NULL) {
1194 216370 : if (BATcount(p) != BATcount(n)) {
1195 0 : GDKerror("update BATs not the same size\n");
1196 0 : return GDK_FAIL;
1197 : }
1198 216370 : if (ATOMtype(p->ttype) != TYPE_oid) {
1199 0 : GDKerror("positions BAT not type OID\n");
1200 0 : return GDK_FAIL;
1201 : }
1202 216370 : if (BATtdense(p)) {
1203 207483 : pos = p->tseqbase;
1204 207483 : positions = &pos;
1205 207483 : autoincr = true;
1206 207483 : p = NULL;
1207 8887 : } else if (p->ttype != TYPE_void) {
1208 8885 : positions = (const oid *) Tloc(p, 0);
1209 8885 : autoincr = false;
1210 : } else {
1211 : autoincr = false;
1212 : }
1213 176 : } else if (autoincr) {
1214 176 : pos = *positions;
1215 : }
1216 216546 : if (BATcount(n) == 0) {
1217 : return GDK_SUCCEED;
1218 : }
1219 :
1220 29233 : BATiter ni = bat_iterator(n);
1221 :
1222 29242 : OIDXdestroy(b);
1223 29243 : STRMPdestroy(b);
1224 29243 : RTREEdestroy(b);
1225 : /* load hash so that we can maintain it */
1226 29229 : (void) BATcheckhash(b);
1227 :
1228 29234 : MT_lock_set(&b->theaplock);
1229 29233 : if (!force && (b->batRestricted != BAT_WRITE ||
1230 49 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1231 0 : MT_lock_unset(&b->theaplock);
1232 0 : bat_iterator_end(&ni);
1233 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1234 0 : return GDK_FAIL;
1235 : }
1236 29233 : BATiter bi = bat_iterator_nolock(b);
1237 29233 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1238 28596 : b->tunique_est = 0;
1239 : }
1240 :
1241 29233 : b->tsorted = b->trevsorted = false;
1242 29233 : b->tnosorted = b->tnorevsorted = 0;
1243 29233 : b->tseqbase = oid_nil;
1244 29233 : b->tkey = false;
1245 29233 : b->tnokey[0] = b->tnokey[1] = 0;
1246 :
1247 29233 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1248 29233 : const void *nil = ATOMnilptr(b->ttype);
1249 29233 : oid hseqend = b->hseqbase + BATcount(b);
1250 :
1251 29233 : MT_lock_unset(&b->theaplock);
1252 :
1253 32284 : bool anynil = false;
1254 32284 : bool locked = false;
1255 :
1256 32284 : if (b->tvheap) {
1257 1209868 : for (BUN i = 0; i < ni.count; i++) {
1258 1206989 : oid updid;
1259 1206989 : if (positions) {
1260 1205976 : updid = autoincr ? pos++ : *positions++;
1261 : } else {
1262 1013 : updid = BUNtoid(p, i);
1263 : }
1264 :
1265 1206989 : if (updid < b->hseqbase ||
1266 1206989 : (!mayappend && updid >= hseqend)) {
1267 0 : GDKerror("id out of range\n");
1268 0 : goto bailout;
1269 : }
1270 1206989 : updid -= b->hseqbase;
1271 1206989 : if (!force && updid < b->batInserted) {
1272 0 : GDKerror("updating committed value\n");
1273 0 : goto bailout;
1274 : }
1275 :
1276 1206989 : const void *new = BUNtvar(ni, i);
1277 :
1278 1206989 : if (updid >= BATcount(b)) {
1279 23585 : assert(mayappend);
1280 23585 : if (locked) {
1281 4 : MT_rwlock_wrunlock(&b->thashlock);
1282 4 : locked = false;
1283 : }
1284 23585 : if (b->tminpos != bi.minpos ||
1285 23584 : b->tmaxpos != bi.maxpos) {
1286 1 : MT_lock_set(&b->theaplock);
1287 1 : b->tminpos = bi.minpos;
1288 1 : b->tmaxpos = bi.maxpos;
1289 1 : MT_lock_unset(&b->theaplock);
1290 : }
1291 23585 : if (BATcount(b) < updid &&
1292 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1293 0 : bat_iterator_end(&ni);
1294 0 : return GDK_FAIL;
1295 : }
1296 23585 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1297 0 : bat_iterator_end(&ni);
1298 0 : return GDK_FAIL;
1299 : }
1300 23585 : bi = bat_iterator_nolock(b);
1301 136940 : continue;
1302 : }
1303 :
1304 : /* it is possible that a previous run was killed
1305 : * after an update (with a mmapped tail file)
1306 : * but before that was committed, then the
1307 : * offset may point outside of the vheap */
1308 1183404 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1309 :
1310 1175151 : if (old && atomcmp(old, new) == 0) {
1311 : /* replacing with the same value:
1312 : * nothing to do */
1313 113355 : continue;
1314 : }
1315 :
1316 1069726 : bool isnil = atomcmp(new, nil) == 0;
1317 1066652 : anynil |= isnil;
1318 1066652 : MT_lock_set(&b->theaplock);
1319 1066920 : if (old == NULL ||
1320 1066920 : (b->tnil &&
1321 778 : !anynil &&
1322 778 : atomcmp(old, nil) == 0)) {
1323 : /* if old value is nil and no new
1324 : * value is, we're not sure anymore
1325 : * about the nil property, so we must
1326 : * clear it */
1327 772 : b->tnil = false;
1328 : }
1329 1066920 : b->tnonil &= !isnil;
1330 1066920 : b->tnil |= isnil;
1331 1066920 : MT_lock_unset(&b->theaplock);
1332 1067107 : if (bi.maxpos != BUN_NONE) {
1333 4236 : if (!isnil &&
1334 2118 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1335 : /* new value is larger than
1336 : * previous largest */
1337 24 : bi.maxpos = updid;
1338 4188 : } else if (old == NULL ||
1339 2107 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1340 13 : atomcmp(new, old) != 0)) {
1341 : /* old value is equal to
1342 : * largest and new value is
1343 : * smaller, so we don't know
1344 : * anymore which is the
1345 : * largest */
1346 13 : bi.maxpos = BUN_NONE;
1347 : }
1348 : }
1349 1067107 : if (bi.minpos != BUN_NONE) {
1350 4226 : if (!isnil &&
1351 2113 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1352 : /* new value is smaller than
1353 : * previous smallest */
1354 15 : bi.minpos = updid;
1355 4196 : } else if (old == NULL ||
1356 2120 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1357 22 : atomcmp(new, old) != 0)) {
1358 : /* old value is equal to
1359 : * smallest and new value is
1360 : * larger, so we don't know
1361 : * anymore which is the
1362 : * smallest */
1363 22 : bi.minpos = BUN_NONE;
1364 : }
1365 : }
1366 1067107 : if (!locked) {
1367 2381 : MT_rwlock_wrlock(&b->thashlock);
1368 2381 : locked = true;
1369 : }
1370 1067107 : if (old)
1371 1067107 : HASHdelete_locked(&bi, updid, old);
1372 0 : else if (b->thash) {
1373 0 : doHASHdestroy(b, b->thash);
1374 0 : b->thash = NULL;
1375 : }
1376 :
1377 1067084 : var_t d;
1378 1067084 : switch (b->twidth) {
1379 1058556 : case 1:
1380 1058556 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1381 1058556 : break;
1382 6496 : case 2:
1383 6496 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1384 6496 : break;
1385 1997 : case 4:
1386 1997 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1387 1997 : break;
1388 : #if SIZEOF_VAR_T == 8
1389 35 : case 8:
1390 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1391 35 : break;
1392 : #endif
1393 : default:
1394 0 : MT_UNREACHABLE();
1395 : }
1396 1067084 : MT_lock_set(&b->theaplock);
1397 1067059 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1398 1067021 : MT_lock_unset(&b->theaplock);
1399 1067032 : if (rc != GDK_SUCCEED) {
1400 0 : goto bailout;
1401 : }
1402 1067032 : if (b->twidth < SIZEOF_VAR_T &&
1403 1067044 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1404 : /* doesn't fit in current heap, upgrade it */
1405 16 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1406 0 : goto bailout;
1407 : }
1408 : }
1409 : /* in case ATOMreplaceVAR and/or
1410 : * GDKupgradevarheap replaces a heap, we need to
1411 : * reinitialize the iterator */
1412 : {
1413 : /* save and restore minpos/maxpos */
1414 1067032 : BUN minpos = bi.minpos;
1415 1067032 : BUN maxpos = bi.maxpos;
1416 1067032 : bi = bat_iterator_nolock(b);
1417 1067032 : bi.minpos = minpos;
1418 1067032 : bi.maxpos = maxpos;
1419 : }
1420 1067032 : switch (b->twidth) {
1421 1058488 : case 1:
1422 1058488 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1423 1058488 : break;
1424 6512 : case 2:
1425 6512 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1426 6512 : break;
1427 1997 : case 4:
1428 1997 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1429 1997 : break;
1430 : #if SIZEOF_VAR_T == 8
1431 35 : case 8:
1432 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1433 35 : break;
1434 : #endif
1435 : default:
1436 0 : MT_UNREACHABLE();
1437 : }
1438 1067032 : HASHinsert_locked(&bi, updid, new);
1439 :
1440 : }
1441 2879 : if (locked) {
1442 2377 : if (b->thash)
1443 2 : nunique = b->thash->nunique;
1444 2377 : MT_rwlock_wrunlock(&b->thashlock);
1445 2377 : locked = false;
1446 : }
1447 2879 : MT_lock_set(&b->theaplock);
1448 2879 : b->tvheap->dirty = true;
1449 2879 : MT_lock_unset(&b->theaplock);
1450 26362 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1451 0 : assert(b->thash == NULL);
1452 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1453 0 : for (BUN i = 0; i < ni.count; i++) {
1454 0 : oid updid;
1455 0 : if (positions) {
1456 0 : updid = autoincr ? pos++ : *positions++;
1457 : } else {
1458 0 : updid = BUNtoid(p, i);
1459 : }
1460 :
1461 0 : if (updid < b->hseqbase ||
1462 0 : (!mayappend && updid >= hseqend)) {
1463 0 : GDKerror("id out of range\n");
1464 0 : bat_iterator_end(&ni);
1465 0 : return GDK_FAIL;
1466 : }
1467 0 : updid -= b->hseqbase;
1468 0 : if (!force && updid < b->batInserted) {
1469 0 : GDKerror("updating committed value\n");
1470 0 : bat_iterator_end(&ni);
1471 0 : return GDK_FAIL;
1472 : }
1473 0 : if (updid >= BATcount(b)) {
1474 0 : assert(mayappend);
1475 0 : if (BATcount(b) < updid &&
1476 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1477 0 : bat_iterator_end(&ni);
1478 0 : return GDK_FAIL;
1479 : }
1480 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1481 0 : bat_iterator_end(&ni);
1482 0 : return GDK_FAIL;
1483 : }
1484 0 : continue;
1485 : }
1486 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1487 : }
1488 0 : bi = bat_iterator_nolock(b);
1489 26362 : } else if (autoincr) {
1490 17975 : if (pos < b->hseqbase ||
1491 17085 : (!mayappend && pos + ni.count > hseqend)) {
1492 0 : GDKerror("id out of range\n");
1493 0 : bat_iterator_end(&ni);
1494 0 : return GDK_FAIL;
1495 : }
1496 17975 : pos -= b->hseqbase;
1497 17975 : if (!force && pos < b->batInserted) {
1498 0 : GDKerror("updating committed value\n");
1499 0 : bat_iterator_end(&ni);
1500 0 : return GDK_FAIL;
1501 : }
1502 :
1503 17975 : if (pos >= BATcount(b)) {
1504 467 : assert(mayappend);
1505 467 : bat_iterator_end(&ni);
1506 467 : if (BATcount(b) < pos &&
1507 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1508 : return GDK_FAIL;
1509 : }
1510 467 : return BATappend(b, n, NULL, force);
1511 : }
1512 17514 : if (pos + ni.count > BATcount(b) &&
1513 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1514 0 : bat_iterator_end(&ni);
1515 0 : return GDK_FAIL;
1516 : }
1517 17508 : bi = bat_iterator_nolock(b);
1518 :
1519 : /* we copy all of n, so if there are nils in n we get
1520 : * nils in b (and else we don't know) */
1521 17508 : b->tnil = ni.nil;
1522 : /* we may not copy over all of b, so we only know that
1523 : * there are no nils in b afterward if there weren't
1524 : * any in either b or n to begin with */
1525 17508 : b->tnonil &= ni.nonil;
1526 : /* if there is no hash, we don't start the loop, if
1527 : * there is only a persisted hash, it will get destroyed
1528 : * in the first iteration, after which there is no hash
1529 : * and the loop ends */
1530 17508 : MT_rwlock_wrlock(&b->thashlock);
1531 17512 : locked = true;
1532 17512 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1533 16 : HASHdelete_locked(&bi, i, Tloc(b, i));
1534 17496 : if (ni.type == TYPE_void) {
1535 0 : assert(b->ttype == TYPE_oid);
1536 0 : oid *o = Tloc(b, pos);
1537 0 : if (is_oid_nil(ni.tseq)) {
1538 : /* we may or may not overwrite the old
1539 : * min/max values */
1540 0 : bi.minpos = BUN_NONE;
1541 0 : bi.maxpos = BUN_NONE;
1542 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1543 0 : o[i] = oid_nil;
1544 0 : b->tnil = true;
1545 : } else {
1546 0 : oid v = ni.tseq;
1547 : /* we know min/max of n, so we know
1548 : * the new min/max of b if those of n
1549 : * are smaller/larger than the old */
1550 0 : if (bi.minpos != BUN_NONE) {
1551 0 : if (v <= BUNtoid(b, bi.minpos))
1552 0 : bi.minpos = pos;
1553 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1554 0 : bi.minpos = BUN_NONE;
1555 : }
1556 0 : if (complex_cand(n)) {
1557 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1558 0 : o[i] = *(oid *)Tpos(&ni, i);
1559 : /* last value */
1560 0 : v = o[ni.count - 1];
1561 : } else {
1562 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1563 0 : o[i] = v++;
1564 : /* last value added (not one beyond) */
1565 0 : v--;
1566 : }
1567 0 : if (bi.maxpos != BUN_NONE) {
1568 0 : if (v >= BUNtoid(b, bi.maxpos))
1569 0 : bi.maxpos = pos + ni.count - 1;
1570 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1571 0 : bi.maxpos = BUN_NONE;
1572 : }
1573 : }
1574 : } else {
1575 : /* if the extremes of n are at least as
1576 : * extreme as those of b, we can replace b's
1577 : * min/max, else we don't know what b's new
1578 : * min/max are*/
1579 17794 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1580 298 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1581 183 : bi.minpos = pos + ni.minpos;
1582 : } else {
1583 17313 : bi.minpos = BUN_NONE;
1584 : }
1585 17818 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1586 322 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1587 236 : bi.maxpos = pos + ni.maxpos;
1588 : } else {
1589 17260 : bi.maxpos = BUN_NONE;
1590 : }
1591 17496 : memcpy(Tloc(b, pos), ni.base,
1592 17496 : ni.count << b->tshift);
1593 : }
1594 : /* either we have a hash that was updated above, or we
1595 : * have no hash; we cannot have the case where there is
1596 : * only a persisted (unloaded) hash since it would have
1597 : * been destroyed above */
1598 17496 : if (b->thash != NULL) {
1599 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1600 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1601 0 : if (b->thash)
1602 0 : nunique = b->thash->nunique;
1603 : }
1604 17501 : MT_rwlock_wrunlock(&b->thashlock);
1605 17509 : locked = false;
1606 17509 : if (ni.count == BATcount(b)) {
1607 : /* if we replaced all values of b by values
1608 : * from n, we can also copy the min/max
1609 : * properties */
1610 8806 : bi.minpos = ni.minpos;
1611 8806 : bi.maxpos = ni.maxpos;
1612 8806 : if (BATtdensebi(&ni)) {
1613 : /* replaced all of b with a dense sequence */
1614 47 : MT_lock_set(&b->theaplock);
1615 47 : BATtseqbase(b, ni.tseq);
1616 47 : MT_lock_unset(&b->theaplock);
1617 : }
1618 : }
1619 : } else {
1620 170090869 : for (BUN i = 0; i < ni.count; i++) {
1621 170082483 : oid updid;
1622 170082483 : if (positions) {
1623 : /* assert(!autoincr) */
1624 170082483 : updid = *positions++;
1625 : } else {
1626 0 : updid = BUNtoid(p, i);
1627 : }
1628 :
1629 170082483 : if (updid < b->hseqbase ||
1630 170082483 : (!mayappend && updid >= hseqend)) {
1631 0 : GDKerror("id out of range\n");
1632 0 : goto bailout;
1633 : }
1634 170082483 : updid -= b->hseqbase;
1635 170082483 : if (!force && updid < b->batInserted) {
1636 0 : GDKerror("updating committed value\n");
1637 0 : goto bailout;
1638 : }
1639 :
1640 170082483 : const void *new = BUNtloc(ni, i);
1641 :
1642 170082483 : if (updid >= BATcount(b)) {
1643 16059 : assert(mayappend);
1644 16059 : if (locked) {
1645 10 : MT_rwlock_wrunlock(&b->thashlock);
1646 10 : locked = false;
1647 : }
1648 16059 : if (b->tminpos != bi.minpos ||
1649 16057 : b->tmaxpos != bi.maxpos) {
1650 3 : MT_lock_set(&b->theaplock);
1651 3 : b->tminpos = bi.minpos;
1652 3 : b->tmaxpos = bi.maxpos;
1653 3 : MT_lock_unset(&b->theaplock);
1654 : }
1655 16059 : if (BATcount(b) < updid &&
1656 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1657 0 : goto bailout;
1658 : }
1659 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1660 0 : bat_iterator_end(&ni);
1661 0 : return GDK_FAIL;
1662 : }
1663 16059 : bi = bat_iterator_nolock(b);
1664 16059 : continue;
1665 : }
1666 :
1667 170066424 : const void *old = BUNtloc(bi, updid);
1668 170066424 : bool isnil = atomcmp(new, nil) == 0;
1669 167021383 : anynil |= isnil;
1670 167021383 : if (b->tnil &&
1671 1908 : !anynil &&
1672 1908 : atomcmp(old, nil) == 0) {
1673 : /* if old value is nil and no new
1674 : * value is, we're not sure anymore
1675 : * about the nil property, so we must
1676 : * clear it */
1677 1904 : b->tnil = false;
1678 : }
1679 167021383 : b->tnonil &= !isnil;
1680 167021383 : b->tnil |= isnil;
1681 167021383 : if (bi.maxpos != BUN_NONE) {
1682 10672 : if (!isnil &&
1683 5334 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1684 : /* new value is larger than
1685 : * previous largest */
1686 137 : bi.maxpos = updid;
1687 5208 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1688 7 : atomcmp(new, old) != 0) {
1689 : /* old value is equal to
1690 : * largest and new value is
1691 : * smaller, so we don't know
1692 : * anymore which is the
1693 : * largest */
1694 7 : bi.maxpos = BUN_NONE;
1695 : }
1696 : }
1697 167021383 : if (bi.minpos != BUN_NONE) {
1698 10672 : if (!isnil &&
1699 5334 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1700 : /* new value is smaller than
1701 : * previous smallest */
1702 6 : bi.minpos = updid;
1703 5338 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1704 6 : atomcmp(new, old) != 0) {
1705 : /* old value is equal to
1706 : * smallest and new value is
1707 : * larger, so we don't know
1708 : * anymore which is the
1709 : * smallest */
1710 6 : bi.minpos = BUN_NONE;
1711 : }
1712 : }
1713 :
1714 167021383 : if (!locked) {
1715 8377 : MT_rwlock_wrlock(&b->thashlock);
1716 8377 : locked = true;
1717 : }
1718 167021391 : HASHdelete_locked(&bi, updid, old);
1719 167025246 : switch (b->twidth) {
1720 27225894 : case 1:
1721 27225894 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1722 27225894 : break;
1723 525067 : case 2:
1724 525067 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1725 525067 : break;
1726 35784700 : case 4:
1727 35784700 : ((int *) b->theap->base)[updid] = * (int *) new;
1728 35784700 : break;
1729 98202383 : case 8:
1730 98202383 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1731 98202383 : break;
1732 5287202 : case 16:
1733 : #ifdef HAVE_HGE
1734 5287202 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1735 : #else
1736 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1737 : #endif
1738 5287202 : break;
1739 0 : default:
1740 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1741 0 : break;
1742 : }
1743 167025246 : HASHinsert_locked(&bi, updid, new);
1744 : }
1745 8386 : if (locked) {
1746 8376 : if (b->thash)
1747 0 : nunique = b->thash->nunique;
1748 8376 : MT_rwlock_wrunlock(&b->thashlock);
1749 8376 : locked = false;
1750 : }
1751 : }
1752 28773 : bat_iterator_end(&ni);
1753 28759 : MT_lock_set(&b->theaplock);
1754 28769 : if (nunique != 0)
1755 2 : b->tunique_est = (double) nunique;
1756 28769 : b->tminpos = bi.minpos;
1757 28769 : b->tmaxpos = bi.maxpos;
1758 28769 : b->theap->dirty = true;
1759 28769 : MT_lock_unset(&b->theaplock);
1760 28777 : TRC_DEBUG(ALGO,
1761 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1762 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1763 : GDKusec() - t0);
1764 : return GDK_SUCCEED;
1765 :
1766 0 : bailout:
1767 0 : bat_iterator_end(&ni);
1768 0 : if (locked) {
1769 0 : Hash *h = b->thash;
1770 0 : b->thash = NULL;
1771 0 : MT_rwlock_wrunlock(&b->thashlock);
1772 0 : doHASHdestroy(b, h);
1773 : }
1774 : return GDK_FAIL;
1775 : }
1776 :
1777 : /* replace values from b at locations specified in p with values in n */
1778 : gdk_return
1779 215580 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1780 : {
1781 215580 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1782 : }
1783 :
1784 : /* like BATreplace, but p may specify locations beyond the end of b */
1785 : gdk_return
1786 1034 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1787 : {
1788 1034 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1789 : }
1790 :
1791 : #if 0 /* not used */
1792 : /* like BATreplace, but the positions are given by an array of oid values */
1793 : gdk_return
1794 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1795 : {
1796 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1797 : }
1798 : #endif
1799 :
1800 : /* like BATreplace, but the positions are given by an array of oid
1801 : * values, and they may specify locations beyond the end of b */
1802 : gdk_return
1803 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1804 : {
1805 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1806 : }
1807 :
1808 : /*
1809 : * BAT Selections
1810 : * The BAT selectors are among the most heavily used operators.
1811 : * Their efficient implementation is therefore mandatory.
1812 : *
1813 : * BAT slice
1814 : * This function returns a horizontal slice from a BAT. It optimizes
1815 : * execution by avoiding to copy when the BAT is memory mapped (in
1816 : * this case, an independent submap is created) or else when it is
1817 : * read-only, then a VIEW bat is created as a result.
1818 : *
1819 : * If a new copy has to be created, this function takes care to
1820 : * preserve void-columns (in this case, the seqbase has to be
1821 : * recomputed in the result).
1822 : *
1823 : * The selected range is excluding the high value.
1824 : */
1825 : BAT *
1826 12182154 : BATslice(BAT *b, BUN l, BUN h)
1827 : {
1828 12182154 : BUN low = l;
1829 12182154 : BAT *bn = NULL;
1830 :
1831 12182154 : BATcheck(b, NULL);
1832 12182154 : BATiter bi = bat_iterator(b);
1833 12196972 : if (l > bi.count)
1834 : l = bi.count;
1835 12196972 : if (h > bi.count)
1836 : h = bi.count;
1837 12196972 : if (h < l)
1838 : h = l;
1839 :
1840 12196972 : if (complex_cand(b)) {
1841 : /* slicing a candidate list with exceptions */
1842 95 : struct canditer ci;
1843 95 : canditer_init(&ci, NULL, b);
1844 95 : if (b->hseqbase + l >= ci.hseq) {
1845 95 : l = b->hseqbase + l - ci.hseq;
1846 95 : h = b->hseqbase + h - ci.hseq;
1847 : } else {
1848 0 : l = 0;
1849 0 : if (b->hseqbase + h >= ci.hseq)
1850 0 : h = b->hseqbase + h - ci.hseq;
1851 : else
1852 : h = 0;
1853 : }
1854 95 : bn = canditer_slice(&ci, l, h);
1855 95 : goto doreturn;
1856 : }
1857 : /* If the source BAT is readonly, then we can obtain a VIEW
1858 : * that just reuses the memory of the source. */
1859 12196877 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1860 : /* forget about slices for bit masks: we can't deal
1861 : * with difference in alignment, so we'll just make a
1862 : * copy */
1863 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1864 : /* we use BATappend with a candidate list to easily
1865 : * copy the part of b that we need */
1866 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1867 0 : if (bn == NULL ||
1868 0 : s == NULL ||
1869 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1870 0 : BBPreclaim(bn);
1871 0 : BBPreclaim(s);
1872 0 : bn = NULL;
1873 0 : goto doreturn;
1874 : }
1875 0 : BBPunfix(s->batCacheid);
1876 0 : goto doreturn;
1877 : }
1878 12196877 : restrict_t prestricted;
1879 14175393 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1880 1974350 : BAT *pb = BBP_desc(VIEWtparent(b));
1881 1974350 : MT_lock_set(&pb->theaplock);
1882 1973565 : prestricted = pb->batRestricted;
1883 1973565 : MT_lock_unset(&pb->theaplock);
1884 : } else {
1885 : prestricted = BAT_WRITE; /* just initialize with anything */
1886 : }
1887 12201043 : if (bi.restricted == BAT_READ &&
1888 12146388 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1889 12146261 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1890 12146261 : if (bn == NULL)
1891 : goto doreturn;
1892 : } else {
1893 : /* create a new BAT and put everything into it */
1894 54782 : BUN p = l;
1895 54782 : BUN q = h;
1896 :
1897 54782 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1898 54824 : if (bn == NULL)
1899 0 : goto doreturn;
1900 :
1901 54824 : if (bn->ttype == TYPE_void) {
1902 37122 : BATsetcount(bn, h - l);
1903 37084 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1904 17702 : } else if (bn->tvheap == NULL) {
1905 11121 : assert(BATatoms[bn->ttype].atomPut == NULL);
1906 11121 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1907 11121 : (q - p) << bn->tshift);
1908 11121 : bn->theap->dirty = true;
1909 11121 : BATsetcount(bn, h - l);
1910 : } else {
1911 1718156 : for (; p < q; p++) {
1912 1711617 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1913 0 : BBPreclaim(bn);
1914 0 : bn = NULL;
1915 0 : goto doreturn;
1916 : }
1917 : }
1918 : }
1919 54735 : bn->theap->dirty = true;
1920 54735 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1921 54735 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1922 54735 : bn->tkey = bi.key || bn->batCount <= 1;
1923 54735 : bn->tnonil = bi.nonil;
1924 54735 : bn->tnil = false; /* we don't know */
1925 54735 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1926 2320 : bn->tnosorted = bi.nosorted - l;
1927 : else
1928 52415 : bn->tnosorted = 0;
1929 54735 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1930 4754 : bn->tnorevsorted = bi.norevsorted - l;
1931 : else
1932 49981 : bn->tnorevsorted = 0;
1933 54735 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1934 44552 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1935 429 : bi.nokey[0] != bi.nokey[1] &&
1936 : !bn->tkey) {
1937 429 : bn->tnokey[0] = bi.nokey[0] - l;
1938 429 : bn->tnokey[1] = bi.nokey[1] - l;
1939 : } else {
1940 54306 : bn->tnokey[0] = bn->tnokey[1] = 0;
1941 : }
1942 : }
1943 12200043 : doreturn:
1944 12200043 : bat_iterator_end(&bi);
1945 12202657 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1946 : ALGOOPTBATFMT "\n",
1947 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1948 : return bn;
1949 : }
1950 :
1951 : #define BAT_ORDERED(TPE) \
1952 : do { \
1953 : const TPE *restrict vals = Tloc(b, 0); \
1954 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1955 : if (vals[p - 1] > vals[p]) { \
1956 : b->tnosorted = p; \
1957 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1958 : goto doreturn; \
1959 : } else if (vals[p - 1] < vals[p]) { \
1960 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1961 : b->tnorevsorted = p; \
1962 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1963 : } \
1964 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1965 : b->tnokey[0] = p - 1; \
1966 : b->tnokey[1] = p; \
1967 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1968 : } \
1969 : } \
1970 : } while (0)
1971 :
1972 : #define BAT_ORDERED_FP(TPE) \
1973 : do { \
1974 : const TPE *restrict vals = Tloc(b, 0); \
1975 : TPE prev = vals[0]; \
1976 : bool prevnil = is_##TPE##_nil(prev); \
1977 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1978 : TPE next = vals[p]; \
1979 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1980 : prev = next; \
1981 : if (cmp > 0) { \
1982 : b->tnosorted = bi.nosorted = p; \
1983 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1984 : goto doreturn; \
1985 : } else if (cmp < 0) { \
1986 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1987 : b->tnorevsorted = bi.norevsorted = p; \
1988 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1989 : } \
1990 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1991 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1992 : b->tnokey[1] = bi.nokey[1] = p; \
1993 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1994 : } \
1995 : } \
1996 : } while (0)
1997 :
1998 : /* Return whether the BAT is ordered or not. If we don't know, invest
1999 : * in a scan and record the results in the bat descriptor. If during
2000 : * the scan we happen to find evidence that the BAT is not reverse
2001 : * sorted, we record the location. */
2002 : bool
2003 3148752 : BATordered(BAT *b)
2004 : {
2005 3148752 : lng t0 = GDKusec();
2006 3149495 : bool sorted;
2007 :
2008 3149495 : MT_rwlock_rdlock(&b->thashlock);
2009 3150839 : MT_lock_set(&b->theaplock);
2010 3149870 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2011 521003 : MT_lock_unset(&b->theaplock);
2012 521349 : MT_rwlock_rdunlock(&b->thashlock);
2013 521349 : return true;
2014 : }
2015 2628867 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2016 2129196 : MT_lock_unset(&b->theaplock);
2017 2129413 : MT_rwlock_rdunlock(&b->thashlock);
2018 2129413 : return false;
2019 : }
2020 :
2021 : /* There are a few reasons why we need a lock here. It may be
2022 : * that multiple threads call this functions at the same time
2023 : * (happens a lot with mitosis/mergetable), but we only need to
2024 : * scan the bat in one thread: the others can reap the rewards
2025 : * when that one thread is done. Also, we need the heap to
2026 : * remain accessible (could have used bat_iterator for that),
2027 : * and, and this is the killer argument, we may need to make
2028 : * changes to the bat descriptor. */
2029 499671 : BATiter bi = bat_iterator_nolock(b);
2030 499671 : if (!b->tsorted && b->tnosorted == 0) {
2031 898485 : switch (ATOMbasetype(b->ttype)) {
2032 97204 : case TYPE_bte:
2033 125944481 : BAT_ORDERED(bte);
2034 : break;
2035 10522 : case TYPE_sht:
2036 1622355 : BAT_ORDERED(sht);
2037 : break;
2038 327922 : case TYPE_int:
2039 119750637 : BAT_ORDERED(int);
2040 : break;
2041 10230 : case TYPE_lng:
2042 62549745 : BAT_ORDERED(lng);
2043 : break;
2044 : #ifdef HAVE_HGE
2045 377 : case TYPE_hge:
2046 8002682 : BAT_ORDERED(hge);
2047 : break;
2048 : #endif
2049 972 : case TYPE_flt:
2050 8007240 : BAT_ORDERED_FP(flt);
2051 : break;
2052 796 : case TYPE_dbl:
2053 8206947 : BAT_ORDERED_FP(dbl);
2054 : break;
2055 : case TYPE_str:
2056 20862650 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2057 20847832 : int c;
2058 20847832 : const char *p1 = BUNtvar(bi, p - 1);
2059 20847832 : const char *p2 = BUNtvar(bi, p);
2060 20847832 : if (p1 == p2)
2061 : c = 0;
2062 2247030 : else if (p1[0] == '\200') {
2063 1781 : if (p2[0] == '\200')
2064 : c = 0;
2065 : else
2066 : c = -1;
2067 2245249 : } else if (p2[0] == '\200')
2068 : c = 1;
2069 : else
2070 2244113 : c = strcmp(p1, p2);
2071 2244113 : if (c > 0) {
2072 36653 : b->tnosorted = bi.nosorted = p;
2073 36653 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2074 36653 : goto doreturn;
2075 20811179 : } else if (c < 0) {
2076 2210339 : assert(!b->trevsorted);
2077 2210339 : if (b->tnorevsorted == 0) {
2078 17011 : b->tnorevsorted = bi.norevsorted = p;
2079 17011 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2080 : }
2081 18600840 : } else if (b->tnokey[1] == 0) {
2082 4809 : assert(!b->tkey);
2083 4809 : b->tnokey[0] = bi.nokey[0] = p - 1;
2084 4809 : b->tnokey[1] = bi.nokey[1] = p;
2085 20811179 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2086 : }
2087 : }
2088 : break;
2089 186 : default: {
2090 186 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2091 2482 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2092 2438 : int c;
2093 2438 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2094 142 : b->tnosorted = bi.nosorted = p;
2095 142 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2096 142 : goto doreturn;
2097 2296 : } else if (c < 0) {
2098 2246 : if (!b->trevsorted && b->tnorevsorted == 0) {
2099 90 : b->tnorevsorted = bi.norevsorted = p;
2100 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2101 : }
2102 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2103 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2104 8 : b->tnokey[1] = bi.nokey[1] = p;
2105 2296 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2106 : }
2107 : }
2108 : break;
2109 : }
2110 : }
2111 : /* we only get here if we completed the scan; note that
2112 : * if we didn't record evidence about *reverse*
2113 : * sortedness, we know that the BAT is also reverse
2114 : * sorted; similarly, if we didn't record evidence about
2115 : * keyness, we know the BAT is key */
2116 155945 : b->tsorted = bi.sorted = true;
2117 155945 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2118 156356 : if (!b->trevsorted && b->tnorevsorted == 0) {
2119 69137 : b->trevsorted = bi.revsorted = true;
2120 69137 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 156356 : if (!b->tkey && b->tnokey[1] == 0) {
2123 52609 : b->tkey = bi.key = true;
2124 52609 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2125 : }
2126 : }
2127 156356 : doreturn:
2128 500095 : MT_rwlock_rdunlock(&b->thashlock);
2129 500026 : sorted = b->tsorted;
2130 500026 : bat pbid = VIEWtparent(b);
2131 500026 : MT_lock_unset(&b->theaplock);
2132 500477 : if (pbid) {
2133 281333 : BAT *pb = BBP_desc(pbid);
2134 281333 : MT_lock_set(&pb->theaplock);
2135 281326 : if (bi.count == BATcount(pb) &&
2136 206161 : bi.h == pb->theap &&
2137 206161 : bi.type == pb->ttype) {
2138 : /* add to knowledge in parent bat */
2139 206158 : pb->tsorted |= bi.sorted;
2140 206158 : if (pb->tnosorted == 0)
2141 206158 : pb->tnosorted = bi.nosorted;
2142 206158 : pb->trevsorted |= bi.revsorted;
2143 206158 : if (pb->tnorevsorted == 0)
2144 33550 : pb->tnorevsorted = bi.norevsorted;
2145 206158 : pb->tkey |= bi.key;
2146 206158 : if (pb->tnokey[1] == 0) {
2147 182682 : pb->tnokey[0] = bi.nokey[0];
2148 182682 : pb->tnokey[1] = bi.nokey[1];
2149 : }
2150 : }
2151 281326 : MT_lock_unset(&pb->theaplock);
2152 : }
2153 : return sorted;
2154 : }
2155 :
2156 : #define BAT_REVORDERED(TPE) \
2157 : do { \
2158 : const TPE *restrict vals = Tloc(b, 0); \
2159 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2160 : if (vals[p - 1] < vals[p]) { \
2161 : b->tnorevsorted = p; \
2162 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2163 : goto doreturn; \
2164 : } \
2165 : } \
2166 : } while (0)
2167 :
2168 : #define BAT_REVORDERED_FP(TPE) \
2169 : do { \
2170 : const TPE *restrict vals = Tloc(b, 0); \
2171 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2172 : TPE prev = vals[p - 1], next = vals[p]; \
2173 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2174 : if (cmp < 0) { \
2175 : b->tnorevsorted = bi.norevsorted = p; \
2176 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2177 : goto doreturn; \
2178 : } \
2179 : } \
2180 : } while (0)
2181 :
2182 : /* Return whether the BAT is reverse ordered or not. If we don't
2183 : * know, invest in a scan and record the results in the bat
2184 : * descriptor. */
2185 : bool
2186 2936512 : BATordered_rev(BAT *b)
2187 : {
2188 2936512 : lng t0 = GDKusec();
2189 2937266 : bool revsorted;
2190 :
2191 2937266 : if (b == NULL || !ATOMlinear(b->ttype))
2192 : return false;
2193 2937256 : MT_rwlock_rdlock(&b->thashlock);
2194 2937174 : MT_lock_set(&b->theaplock);
2195 2937157 : if (BATcount(b) <= 1 || b->trevsorted) {
2196 265452 : MT_lock_unset(&b->theaplock);
2197 265551 : MT_rwlock_rdunlock(&b->thashlock);
2198 265551 : return true;
2199 : }
2200 2671705 : if (b->ttype == TYPE_void) {
2201 23199 : MT_lock_unset(&b->theaplock);
2202 23205 : MT_rwlock_rdunlock(&b->thashlock);
2203 23205 : return is_oid_nil(b->tseqbase);
2204 : }
2205 2648506 : if (BATtdense(b) || b->tnorevsorted > 0) {
2206 2546471 : MT_lock_unset(&b->theaplock);
2207 2546782 : MT_rwlock_rdunlock(&b->thashlock);
2208 2546782 : return false;
2209 : }
2210 102035 : BATiter bi = bat_iterator_nolock(b);
2211 102035 : if (!b->trevsorted && b->tnorevsorted == 0) {
2212 166616 : switch (ATOMbasetype(b->ttype)) {
2213 35467 : case TYPE_bte:
2214 11033938 : BAT_REVORDERED(bte);
2215 : break;
2216 4461 : case TYPE_sht:
2217 3161311 : BAT_REVORDERED(sht);
2218 : break;
2219 36123 : case TYPE_int:
2220 1228209 : BAT_REVORDERED(int);
2221 : break;
2222 4784 : case TYPE_lng:
2223 2616799 : BAT_REVORDERED(lng);
2224 : break;
2225 : #ifdef HAVE_HGE
2226 123 : case TYPE_hge:
2227 990 : BAT_REVORDERED(hge);
2228 : break;
2229 : #endif
2230 511 : case TYPE_flt:
2231 1997 : BAT_REVORDERED_FP(flt);
2232 : break;
2233 322 : case TYPE_dbl:
2234 485512 : BAT_REVORDERED_FP(dbl);
2235 : break;
2236 20244 : default: {
2237 20244 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2238 580555 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2239 577200 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2240 16889 : b->tnorevsorted = p;
2241 16889 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2242 16889 : goto doreturn;
2243 : }
2244 : }
2245 : break;
2246 : }
2247 : }
2248 26455 : b->trevsorted = bi.revsorted = true;
2249 26455 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2250 : }
2251 26455 : doreturn:
2252 102035 : MT_rwlock_rdunlock(&b->thashlock);
2253 102150 : revsorted = b->trevsorted;
2254 102150 : bat pbid = VIEWtparent(b);
2255 102150 : MT_lock_unset(&b->theaplock);
2256 102294 : if (pbid) {
2257 34995 : BAT *pb = BBP_desc(pbid);
2258 34995 : MT_lock_set(&pb->theaplock);
2259 35003 : if (bi.count == BATcount(pb) &&
2260 6464 : bi.h == pb->theap &&
2261 6464 : bi.type == pb->ttype) {
2262 : /* add to knowledge in parent bat */
2263 6464 : pb->trevsorted |= bi.revsorted;
2264 6464 : if (pb->tnorevsorted == 0)
2265 6464 : pb->tnorevsorted = bi.norevsorted;
2266 : }
2267 35003 : MT_lock_unset(&pb->theaplock);
2268 : }
2269 : return revsorted;
2270 : }
2271 :
2272 : /* figure out which sort function is to be called
2273 : * stable sort can produce an error (not enough memory available),
2274 : * "quick" sort does not produce errors */
2275 : static gdk_return
2276 3949878 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2277 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2278 : bool stable)
2279 : {
2280 3949878 : if (n <= 1) /* trivially sorted */
2281 : return GDK_SUCCEED;
2282 2763273 : switch (tpe) {
2283 2715010 : case TYPE_bte:
2284 : case TYPE_sht:
2285 : case TYPE_int:
2286 : case TYPE_lng:
2287 : #ifdef HAVE_HGE
2288 : case TYPE_hge:
2289 : #endif
2290 : case TYPE_date:
2291 : case TYPE_daytime:
2292 : case TYPE_timestamp:
2293 2715010 : assert(base == NULL);
2294 2715010 : if (nilslast == reverse && (stable || n > 100))
2295 20376 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2296 : break;
2297 4 : case TYPE_uuid:
2298 4 : assert(base == NULL);
2299 4 : if (nilslast == reverse && (stable || n > 100))
2300 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2301 : break;
2302 : default:
2303 : break;
2304 : }
2305 48371 : if (stable) {
2306 27 : if (reverse)
2307 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2308 : else
2309 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2310 : } else {
2311 2742869 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2312 : }
2313 2742869 : return GDK_SUCCEED;
2314 : }
2315 :
2316 : /* Sort the bat b according to both o and g. The stable and reverse
2317 : * parameters indicate whether the sort should be stable or descending
2318 : * respectively. The parameter b is required, o and g are optional
2319 : * (i.e., they may be NULL).
2320 : *
2321 : * A sorted copy is returned through the sorted parameter, the new
2322 : * ordering is returned through the order parameter, group information
2323 : * is returned through the groups parameter. All three output
2324 : * parameters may be NULL. If they're all NULL, this function does
2325 : * nothing.
2326 : *
2327 : * If o is specified, it is used to first rearrange b according to the
2328 : * order specified in o, after which b is sorted taking g into
2329 : * account.
2330 : *
2331 : * If g is specified, it indicates groups which should be individually
2332 : * ordered. Each row of consecutive equal values in g indicates a
2333 : * group which is sorted according to stable and reverse. g is used
2334 : * after the order in b was rearranged according to o.
2335 : *
2336 : * The outputs order and groups can be used in subsequent calls to
2337 : * this function. This can be used if multiple BATs need to be sorted
2338 : * together. The BATs should then be sorted in order of significance,
2339 : * and each following call should use the original unordered BAT plus
2340 : * the order and groups bat from the previous call. In this case, the
2341 : * sorted BATs are not of much use, so the sorted output parameter
2342 : * does not need to be specified.
2343 : * Apart from error checking and maintaining reference counts, sorting
2344 : * three columns (col1, col2, col3) could look like this with the
2345 : * sorted results in (col1s, col2s, col3s):
2346 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2347 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2348 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2349 : * Note that the "reverse" parameter can be different for each call.
2350 : */
2351 : gdk_return
2352 34124 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2353 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2354 : {
2355 34124 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2356 34124 : BATiter pbi;
2357 34124 : oid *restrict grps, *restrict ords, prev;
2358 34124 : BUN p, q, r;
2359 34124 : lng t0 = GDKusec();
2360 34127 : bool mkorderidx, orderidxlock = false;
2361 34127 : Heap *oidxh = NULL;
2362 :
2363 : /* we haven't implemented NILs as largest value for stable
2364 : * sort, so NILs come first for ascending and last for
2365 : * descending */
2366 34127 : assert(!stable || reverse == nilslast);
2367 :
2368 34127 : if (b == NULL) {
2369 0 : GDKerror("b must exist\n");
2370 0 : return GDK_FAIL;
2371 : }
2372 34127 : if (stable && reverse != nilslast) {
2373 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2374 0 : return GDK_FAIL;
2375 : }
2376 34127 : if (!ATOMlinear(b->ttype)) {
2377 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2378 0 : return GDK_FAIL;
2379 : }
2380 34127 : MT_lock_set(&b->theaplock);
2381 34133 : if (b->ttype == TYPE_void) {
2382 149 : b->tsorted = true;
2383 223 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2384 0 : b->trevsorted = !b->trevsorted;
2385 : }
2386 298 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2387 0 : b->tkey = !b->tkey;
2388 : }
2389 33984 : } else if (b->batCount <= 1) {
2390 12281 : if (!b->tsorted || !b->trevsorted) {
2391 23 : b->tsorted = b->trevsorted = true;
2392 : }
2393 : }
2394 34133 : MT_lock_unset(&b->theaplock);
2395 34133 : if (o != NULL &&
2396 17274 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2397 17274 : BATcount(o) != BATcount(b) || /* same size as b */
2398 6768 : (o->ttype == TYPE_void && /* no nil tail */
2399 2728 : BATcount(o) != 0 &&
2400 2728 : is_oid_nil(o->tseqbase)))) {
2401 0 : GDKerror("o must have type oid and same size as b\n");
2402 0 : return GDK_FAIL;
2403 : }
2404 34135 : if (g != NULL &&
2405 17276 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2406 17276 : !g->tsorted || /* sorted */
2407 17276 : BATcount(g) != BATcount(b) || /* same size as b */
2408 6895 : (g->ttype == TYPE_void && /* no nil tail */
2409 2855 : BATcount(g) != 0 &&
2410 2855 : is_oid_nil(g->tseqbase)))) {
2411 1 : GDKerror("g must have type oid, sorted on the tail, "
2412 : "and same size as b\n");
2413 1 : return GDK_FAIL;
2414 : }
2415 34134 : if (sorted == NULL && order == NULL) {
2416 : /* no place to put result, so we're done quickly */
2417 0 : GDKerror("no place to put the result.\n");
2418 0 : return GDK_FAIL;
2419 : }
2420 34134 : if (g == NULL && !stable) {
2421 : /* pre-ordering doesn't make sense if we're not
2422 : * subsorting and the sort is not stable */
2423 16579 : o = NULL;
2424 : }
2425 34134 : if (b->tnonil) {
2426 : /* if there are no nils, placement of nils doesn't
2427 : * matter, so set nilslast such that ordered bits can
2428 : * be used */
2429 25939 : nilslast = reverse;
2430 : }
2431 34134 : pbi = bat_iterator(NULL);
2432 35469 : if (BATcount(b) <= 1 ||
2433 21649 : (reverse == nilslast &&
2434 : (reverse ? b->trevsorted : b->tsorted) &&
2435 5664 : o == NULL && g == NULL &&
2436 3096 : (groups == NULL || BATtkey(b) ||
2437 : (reverse ? b->tsorted : b->trevsorted)))) {
2438 : /* trivially (sub)sorted, and either we don't need to
2439 : * return group information, or we can trivially
2440 : * deduce the groups */
2441 14498 : if (sorted) {
2442 12712 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2443 12712 : if (bn == NULL)
2444 0 : goto error;
2445 12712 : *sorted = bn;
2446 : }
2447 14498 : if (order) {
2448 13597 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2449 13600 : if (on == NULL)
2450 0 : goto error;
2451 13600 : *order = on;
2452 : }
2453 14501 : if (groups) {
2454 6375 : if (BATtkey(b)) {
2455 : /* singleton groups */
2456 5732 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2457 5732 : if (gn == NULL)
2458 0 : goto error;
2459 : } else {
2460 : /* single group */
2461 643 : const oid *o = 0;
2462 643 : assert(BATcount(b) == 1 ||
2463 : (b->tsorted && b->trevsorted));
2464 643 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2465 643 : if (gn == NULL)
2466 0 : goto error;
2467 : }
2468 6375 : *groups = gn;
2469 : }
2470 14501 : bat_iterator_end(&pbi);
2471 14501 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2472 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2473 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2474 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2475 : ALGOOPTBATFMT " -- trivial (" LLFMT
2476 : " usec)\n",
2477 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2478 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2479 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2480 : ALGOOPTBATPAR(on), GDKusec() - t0);
2481 14501 : return GDK_SUCCEED;
2482 : }
2483 19626 : if (VIEWtparent(b)) {
2484 4213 : pb = BATdescriptor(VIEWtparent(b));
2485 4219 : if (pb != NULL &&
2486 4219 : (b->tbaseoff != pb->tbaseoff ||
2487 3346 : BATcount(b) != BATcount(pb) ||
2488 3069 : b->hseqbase != pb->hseqbase ||
2489 3058 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2490 1161 : BBPunfix(pb->batCacheid);
2491 1161 : pb = NULL;
2492 : }
2493 : } else {
2494 : pb = b;
2495 : }
2496 19633 : bat_iterator_end(&pbi);
2497 19632 : pbi = bat_iterator(pb);
2498 : /* when we will create an order index if it doesn't already exist */
2499 19634 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2500 19634 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2501 6401 : (void) BATcheckorderidx(pb);
2502 6401 : MT_lock_set(&pb->batIdxLock);
2503 6400 : if (pb->torderidx) {
2504 66 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2505 : /* we can use the order index */
2506 66 : oidxh = pb->torderidx;
2507 66 : HEAPincref(oidxh);
2508 : }
2509 : mkorderidx = false;
2510 6334 : } else if (b != pb) {
2511 : /* don't build orderidx on parent bat */
2512 : mkorderidx = false;
2513 5263 : } else if (mkorderidx) {
2514 : /* keep lock when going to create */
2515 4637 : orderidxlock = true;
2516 : }
2517 6400 : if (!orderidxlock)
2518 1763 : MT_lock_unset(&pb->batIdxLock);
2519 : }
2520 19634 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2521 : /* there is an order index that we can use */
2522 66 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2523 66 : if (on == NULL)
2524 0 : goto error;
2525 66 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2526 66 : BATsetcount(on, BATcount(b));
2527 66 : HEAPdecref(oidxh, false);
2528 66 : oidxh = NULL;
2529 66 : on->tkey = true;
2530 66 : on->tnil = false;
2531 66 : on->tnonil = true;
2532 66 : on->tsorted = on->trevsorted = false;
2533 66 : on->tseqbase = oid_nil;
2534 66 : if (sorted || groups) {
2535 66 : bn = BATproject(on, b);
2536 66 : if (bn == NULL)
2537 0 : goto error;
2538 66 : bn->tsorted = true;
2539 66 : if (groups) {
2540 11 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2541 0 : goto error;
2542 11 : if (sorted &&
2543 11 : (*groups)->tkey &&
2544 : g == NULL) {
2545 : /* if new groups bat is key
2546 : * and since there is no input
2547 : * groups bat, we know the
2548 : * result bat is key */
2549 4 : bn->tkey = true;
2550 : }
2551 : }
2552 66 : if (sorted)
2553 66 : *sorted = bn;
2554 : else {
2555 0 : BBPunfix(bn->batCacheid);
2556 0 : bn = NULL;
2557 : }
2558 : }
2559 66 : if (order)
2560 16 : *order = on;
2561 : else {
2562 50 : BBPunfix(on->batCacheid);
2563 50 : on = NULL;
2564 : }
2565 66 : bat_iterator_end(&pbi);
2566 66 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2567 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2568 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2569 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2570 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2571 : " usec)\n",
2572 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2573 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2574 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2575 : ALGOOPTBATPAR(on), GDKusec() - t0);
2576 66 : if (pb != NULL && pb != b)
2577 53 : BBPunfix(pb->batCacheid);
2578 66 : return GDK_SUCCEED;
2579 12644 : } else if (oidxh) {
2580 0 : HEAPdecref(oidxh, false);
2581 0 : oidxh = NULL;
2582 : }
2583 19568 : if (o) {
2584 12099 : bn = BATproject(o, b);
2585 12100 : if (bn == NULL)
2586 0 : goto error;
2587 12100 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2588 5482 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2589 2741 : BBPunfix(bn->batCacheid);
2590 2741 : bn = b2;
2591 : }
2592 12100 : if (pb) {
2593 11548 : bat_iterator_end(&pbi);
2594 11546 : if (pb != b)
2595 1627 : BBPunfix(pb->batCacheid);
2596 11548 : pbi = bat_iterator(NULL);
2597 11548 : pb = NULL;
2598 : }
2599 : } else {
2600 7469 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2601 : }
2602 19560 : if (bn == NULL)
2603 0 : goto error;
2604 19560 : if (order) {
2605 : /* prepare order bat */
2606 16639 : if (o) {
2607 : /* make copy of input so that we can refine it;
2608 : * copy can be read-only if we take the shortcut
2609 : * below in the case g is "key" */
2610 10338 : on = COLcopy(o, TYPE_oid,
2611 10338 : g == NULL ||
2612 10338 : !(g->tkey || g->ttype == TYPE_void),
2613 : TRANSIENT);
2614 10337 : if (on == NULL)
2615 0 : goto error;
2616 10337 : BAThseqbase(on, b->hseqbase);
2617 10336 : on->tminpos = BUN_NONE;
2618 10336 : on->tmaxpos = BUN_NONE;
2619 : } else {
2620 : /* create new order */
2621 6301 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2622 6307 : if (on == NULL)
2623 0 : goto error;
2624 6307 : ords = (oid *) Tloc(on, 0);
2625 29874980 : for (p = 0, q = BATcount(bn); p < q; p++)
2626 29868673 : ords[p] = p + b->hseqbase;
2627 6307 : BATsetcount(on, BATcount(bn));
2628 6310 : on->tkey = true;
2629 6310 : on->tnil = false;
2630 6310 : on->tnonil = true;
2631 : }
2632 : /* COLcopy above can create TYPE_void */
2633 16646 : if (on->ttype != TYPE_void) {
2634 15882 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2635 15882 : on->tseqbase = oid_nil; /* and hence not dense */
2636 15882 : on->tnosorted = on->tnorevsorted = 0;
2637 : }
2638 16646 : *order = on;
2639 16646 : ords = (oid *) Tloc(on, 0);
2640 : } else {
2641 : ords = NULL;
2642 : }
2643 19567 : if (g) {
2644 12098 : if (g->tkey || g->ttype == TYPE_void) {
2645 : /* if g is "key", all groups are size 1, so no
2646 : * subsorting needed */
2647 5089 : if (sorted) {
2648 4604 : *sorted = bn;
2649 : } else {
2650 485 : BBPunfix(bn->batCacheid);
2651 485 : bn = NULL;
2652 : }
2653 5089 : if (order) {
2654 3911 : *order = on;
2655 3911 : if (o) {
2656 : /* we can inherit sortedness
2657 : * after all */
2658 3911 : on->tsorted = o->tsorted;
2659 3911 : on->trevsorted = o->trevsorted;
2660 3911 : if (o->tnosorted)
2661 56 : on->tnosorted = o->tnosorted;
2662 3911 : if (o->tnorevsorted)
2663 74 : on->tnorevsorted = o->tnorevsorted;
2664 : } else {
2665 : /* we didn't rearrange, so
2666 : * still sorted */
2667 0 : on->tsorted = true;
2668 0 : on->trevsorted = false;
2669 : }
2670 3911 : if (BATcount(on) <= 1) {
2671 0 : on->tsorted = true;
2672 0 : on->trevsorted = true;
2673 : }
2674 : }
2675 5089 : if (groups) {
2676 2912 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2677 2912 : if (gn == NULL)
2678 0 : goto error;
2679 2912 : *groups = gn;
2680 : }
2681 5089 : bat_iterator_end(&pbi);
2682 5089 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2683 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2684 : ",reverse=%d,nilslast=%d,stable=%d"
2685 : ") = (" ALGOOPTBATFMT ","
2686 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2687 : " -- key group (" LLFMT " usec)\n",
2688 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2689 : ALGOBATPAR(g), reverse, nilslast,
2690 : stable, ALGOOPTBATPAR(bn),
2691 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2692 : GDKusec() - t0);
2693 5089 : if (pb != NULL && pb != b)
2694 0 : BBPunfix(pb->batCacheid);
2695 5089 : return GDK_SUCCEED;
2696 : }
2697 7009 : assert(g->ttype == TYPE_oid);
2698 7009 : grps = (oid *) Tloc(g, 0);
2699 7009 : prev = grps[0];
2700 7009 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2701 0 : goto error;
2702 44662341 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2703 44655331 : if (grps[p] != prev) {
2704 : /* sub sort [r,p) */
2705 7602797 : if (do_sort(Tloc(bn, r),
2706 3666666 : ords ? ords + r : NULL,
2707 3936131 : bn->tvheap ? bn->tvheap->base : NULL,
2708 3936131 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2709 3936131 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2710 0 : goto error;
2711 3936131 : r = p;
2712 3936131 : prev = grps[p];
2713 : }
2714 : }
2715 : /* sub sort [r,q) */
2716 13436 : if (do_sort(Tloc(bn, r),
2717 6426 : ords ? ords + r : NULL,
2718 7010 : bn->tvheap ? bn->tvheap->base : NULL,
2719 7010 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2720 7010 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2721 0 : goto error;
2722 : /* if single group (r==0) the result is (rev)sorted,
2723 : * otherwise (maybe) not */
2724 7008 : bn->tsorted = r == 0 && !reverse && !nilslast;
2725 13981 : bn->trevsorted = r == 0 && reverse && nilslast;
2726 : } else {
2727 7469 : Heap *m = NULL;
2728 : /* only invest in creating an order index if the BAT
2729 : * is persistent */
2730 7469 : if (mkorderidx) {
2731 4636 : assert(orderidxlock);
2732 4636 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2733 : ords == NULL) {
2734 0 : ords = (oid *) m->base + ORDERIDXOFF;
2735 0 : if (o && o->ttype != TYPE_void)
2736 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2737 0 : else if (o)
2738 0 : for (p = 0, q = BATcount(o); p < q; p++)
2739 0 : ords[p] = p + o->tseqbase;
2740 : else
2741 0 : for (p = 0, q = BATcount(b); p < q; p++)
2742 0 : ords[p] = p + b->hseqbase;
2743 : }
2744 : }
2745 7469 : if ((reverse != nilslast ||
2746 14099 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2747 13478 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2748 6736 : do_sort(Tloc(bn, 0),
2749 : ords,
2750 6736 : bn->tvheap ? bn->tvheap->base : NULL,
2751 6736 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2752 6736 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2753 0 : if (m != NULL) {
2754 0 : HEAPfree(m, true);
2755 0 : GDKfree(m);
2756 : }
2757 0 : goto error;
2758 : }
2759 7468 : bn->tsorted = !reverse && !nilslast;
2760 7468 : bn->trevsorted = reverse && nilslast;
2761 7468 : if (m != NULL) {
2762 4635 : assert(orderidxlock);
2763 4635 : if (pb->torderidx == NULL) {
2764 4635 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2765 4635 : memcpy((oid *) m->base + ORDERIDXOFF,
2766 : ords,
2767 4635 : pbi.count * sizeof(oid));
2768 : }
2769 4635 : pb->torderidx = m;
2770 4635 : persistOIDX(pb);
2771 : } else {
2772 0 : HEAPfree(m, true);
2773 0 : GDKfree(m);
2774 : }
2775 : }
2776 : }
2777 14475 : if (orderidxlock) {
2778 4634 : MT_lock_unset(&pb->batIdxLock);
2779 4635 : orderidxlock = false;
2780 : }
2781 14478 : bn->theap->dirty = true;
2782 14478 : bn->tnosorted = 0;
2783 14478 : bn->tnorevsorted = 0;
2784 14478 : bn->tnokey[0] = bn->tnokey[1] = 0;
2785 14478 : bn->tminpos = BUN_NONE;
2786 14478 : bn->tmaxpos = BUN_NONE;
2787 14478 : if (groups) {
2788 8804 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2789 0 : goto error;
2790 8801 : if ((*groups)->tkey &&
2791 966 : (g == NULL || (g->tsorted && g->trevsorted))) {
2792 : /* if new groups bat is key and the input
2793 : * group bat has a single value (both sorted
2794 : * and revsorted), we know the result bat is
2795 : * key */
2796 1172 : bn->tkey = true;
2797 : }
2798 : }
2799 :
2800 14475 : bat_iterator_end(&pbi);
2801 14477 : if (sorted)
2802 10573 : *sorted = bn;
2803 : else {
2804 3904 : BBPunfix(bn->batCacheid);
2805 3904 : bn = NULL;
2806 : }
2807 :
2808 14480 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2809 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2810 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2811 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2812 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2813 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2814 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2815 : g ? "grouped " : "", GDKusec() - t0);
2816 14480 : if (pb && pb != b)
2817 1376 : BBPunfix(pb->batCacheid);
2818 : return GDK_SUCCEED;
2819 :
2820 0 : error:
2821 0 : bat_iterator_end(&pbi);
2822 0 : if (orderidxlock)
2823 0 : MT_lock_unset(&pb->batIdxLock);
2824 0 : if (oidxh)
2825 0 : HEAPdecref(oidxh, false);
2826 0 : BBPreclaim(bn);
2827 0 : if (pb && pb != b)
2828 0 : BBPunfix(pb->batCacheid);
2829 0 : BBPreclaim(on);
2830 0 : if (sorted)
2831 0 : *sorted = NULL;
2832 0 : if (order)
2833 0 : *order = NULL;
2834 0 : if (groups)
2835 0 : *groups = NULL;
2836 : return GDK_FAIL;
2837 : }
2838 :
2839 : /* return a new BAT of length n with seqbase hseq, and the constant v
2840 : * in the tail */
2841 : BAT *
2842 2122980 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2843 : {
2844 2122980 : BAT *bn;
2845 2122980 : void *restrict p;
2846 2122980 : BUN i;
2847 2122980 : lng t0 = 0;
2848 :
2849 2122980 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2850 2122980 : if (v == NULL)
2851 : return NULL;
2852 2122980 : bn = COLnew(hseq, tailtype, n, role);
2853 2129682 : if (bn != NULL && n > 0) {
2854 91264 : p = Tloc(bn, 0);
2855 91264 : switch (ATOMstorage(tailtype)) {
2856 25 : case TYPE_void:
2857 25 : v = &oid_nil;
2858 25 : BATtseqbase(bn, oid_nil);
2859 25 : break;
2860 0 : case TYPE_msk:
2861 0 : if (*(msk*)v) {
2862 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2863 0 : if (n & 31) {
2864 0 : uint32_t *m = p;
2865 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2866 : }
2867 : } else
2868 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2869 : break;
2870 14820 : case TYPE_bte:
2871 14820 : memset(p, *(bte*)v, n);
2872 14820 : break;
2873 : case TYPE_sht:
2874 7161483 : for (i = 0; i < n; i++)
2875 7142821 : ((sht *) p)[i] = *(sht *) v;
2876 : break;
2877 : case TYPE_int:
2878 : case TYPE_flt:
2879 : assert(sizeof(int) == sizeof(flt));
2880 220460267 : for (i = 0; i < n; i++)
2881 220453919 : ((int *) p)[i] = *(int *) v;
2882 : break;
2883 : case TYPE_lng:
2884 : case TYPE_dbl:
2885 : assert(sizeof(lng) == sizeof(dbl));
2886 232361623 : for (i = 0; i < n; i++)
2887 232327715 : ((lng *) p)[i] = *(lng *) v;
2888 : break;
2889 : #ifdef HAVE_HGE
2890 : case TYPE_hge:
2891 25672821 : for (i = 0; i < n; i++)
2892 25671193 : ((hge *) p)[i] = *(hge *) v;
2893 : break;
2894 : #endif
2895 : case TYPE_uuid:
2896 200047 : for (i = 0; i < n; i++)
2897 200038 : ((uuid *) p)[i] = *(uuid *) v;
2898 : break;
2899 15714 : case TYPE_str:
2900 : /* insert the first value, then just copy the
2901 : * offset lots of times */
2902 15714 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2903 0 : BBPreclaim(bn);
2904 0 : return NULL;
2905 : }
2906 15723 : char val[sizeof(var_t)];
2907 15723 : memcpy(val, Tloc(bn, 0), bn->twidth);
2908 15723 : if (bn->twidth == 1 && n > 1) {
2909 : /* single byte value: we have a
2910 : * function for that */
2911 8098 : memset(Tloc(bn, 1), val[0], n - 1);
2912 : } else {
2913 7625 : char *p = Tloc(bn, 0);
2914 7645 : for (i = 1; i < n; i++) {
2915 20 : p += bn->twidth;
2916 20 : memcpy(p, val, bn->twidth);
2917 : }
2918 : }
2919 : break;
2920 : default:
2921 333713 : for (i = 0; i < n; i++)
2922 333563 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2923 0 : BBPreclaim(bn);
2924 0 : return NULL;
2925 : }
2926 : break;
2927 : }
2928 91273 : bn->theap->dirty = true;
2929 91273 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2930 91209 : BATsetcount(bn, n);
2931 91565 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2932 91565 : bn->tnonil = !bn->tnil;
2933 91565 : bn->tkey = BATcount(bn) <= 1;
2934 : }
2935 2129983 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2936 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2937 : return bn;
2938 : }
2939 :
2940 : /*
2941 : * BAT Aggregates
2942 : *
2943 : * We retain the size() and card() aggregate results in the column
2944 : * descriptor. We would like to have such functionality in an
2945 : * extensible way for many aggregates, for DD (1) we do not want to
2946 : * change the binary BAT format on disk and (2) aggr and size are the
2947 : * most relevant aggregates.
2948 : *
2949 : * It is all hacked into the aggr[3] records; three adjacent integers
2950 : * that were left over in the column record. We refer to these as if
2951 : * it where an int aggr[3] array. The below routines set and retrieve
2952 : * the aggregate values from the tail of the BAT, as many
2953 : * aggregate-manipulating BAT functions work on tail.
2954 : *
2955 : * The rules are as follows: aggr[0] contains the alignment ID of the
2956 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2957 : * equal to b->talign, the precomputed aggregate values in
2958 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2959 : * of them may be set at the time. This is encoded by the value
2960 : * int_nil, which cannot occur in these two aggregates.
2961 : *
2962 : * This was now extended to record the property whether we know there
2963 : * is a nil value present by mis-using the highest bits of both
2964 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2965 : */
2966 :
2967 : void
2968 48969768 : PROPdestroy_nolock(BAT *b)
2969 : {
2970 48969768 : PROPrec *p = b->tprops;
2971 48969768 : PROPrec *n;
2972 :
2973 48969768 : b->tprops = NULL;
2974 49006014 : while (p) {
2975 4967 : n = p->next;
2976 4967 : assert(p->id != (enum prop_t) 20);
2977 4967 : VALclear(&p->v);
2978 4967 : GDKfree(p);
2979 4967 : p = n;
2980 : }
2981 49001047 : }
2982 :
2983 : void
2984 433 : PROPdestroy(BAT *b)
2985 : {
2986 433 : MT_lock_set(&b->theaplock);
2987 433 : PROPdestroy_nolock(b);
2988 433 : MT_lock_unset(&b->theaplock);
2989 433 : }
2990 :
2991 : ValPtr
2992 210425973 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2993 : {
2994 210425973 : PROPrec *p;
2995 :
2996 210425973 : p = b->tprops;
2997 210433223 : while (p && p->id != idx)
2998 7250 : p = p->next;
2999 210425973 : return p ? &p->v : NULL;
3000 : }
3001 :
3002 : void
3003 418342 : BATrmprop_nolock(BAT *b, enum prop_t idx)
3004 : {
3005 418342 : PROPrec *prop = b->tprops, *prev = NULL;
3006 :
3007 418577 : while (prop) {
3008 418450 : if (prop->id == idx) {
3009 418215 : if (prev)
3010 106 : prev->next = prop->next;
3011 : else
3012 418109 : b->tprops = prop->next;
3013 418215 : VALclear(&prop->v);
3014 418215 : GDKfree(prop);
3015 418215 : return;
3016 : }
3017 235 : prev = prop;
3018 235 : prop = prop->next;
3019 : }
3020 : }
3021 :
3022 : ValPtr
3023 423221 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3024 : {
3025 423221 : PROPrec *p;
3026 :
3027 423221 : p = b->tprops;
3028 430843 : while (p && p->id != idx)
3029 7622 : p = p->next;
3030 423221 : if (p == NULL) {
3031 423214 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3032 : /* properties are hints, so if we can't create
3033 : * one we ignore the error */
3034 0 : GDKclrerr();
3035 0 : return NULL;
3036 : }
3037 423214 : p->id = idx;
3038 423214 : p->next = b->tprops;
3039 423214 : p->v.vtype = 0;
3040 423214 : b->tprops = p;
3041 : } else {
3042 7 : VALclear(&p->v);
3043 : }
3044 423221 : if (VALinit(&p->v, type, v) == NULL) {
3045 : /* failed to initialize, so remove property */
3046 0 : BATrmprop_nolock(b, idx);
3047 0 : GDKclrerr();
3048 0 : p = NULL;
3049 : }
3050 0 : return p ? &p->v : NULL;
3051 : }
3052 :
3053 : ValPtr
3054 3431222 : BATgetprop(BAT *b, enum prop_t idx)
3055 : {
3056 3431222 : ValPtr p;
3057 :
3058 3431222 : MT_lock_set(&b->theaplock);
3059 3434899 : p = BATgetprop_nolock(b, idx);
3060 3434751 : MT_lock_unset(&b->theaplock);
3061 3435924 : return p;
3062 : }
3063 :
3064 : ValPtr
3065 4777 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3066 : {
3067 4777 : ValPtr p;
3068 4777 : MT_lock_set(&b->theaplock);
3069 4777 : p = BATsetprop_nolock(b, idx, type, v);
3070 4777 : MT_lock_unset(&b->theaplock);
3071 4777 : return p;
3072 : }
3073 :
3074 : void
3075 2 : BATrmprop(BAT *b, enum prop_t idx)
3076 : {
3077 2 : MT_lock_set(&b->theaplock);
3078 2 : BATrmprop_nolock(b, idx);
3079 2 : MT_lock_unset(&b->theaplock);
3080 2 : }
3081 :
3082 : /*
3083 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3084 : * non-nil tail value.
3085 : * This function does not fail (the callers currently don't check for failure).
3086 : */
3087 : BUN
3088 2246 : BATcount_no_nil(BAT *b, BAT *s)
3089 : {
3090 2246 : BUN cnt = 0;
3091 2246 : const void *restrict p, *restrict nil;
3092 2246 : const char *restrict base;
3093 2246 : int t;
3094 2246 : int (*cmp)(const void *, const void *);
3095 2246 : struct canditer ci;
3096 2246 : oid hseq;
3097 :
3098 2246 : BATcheck(b, 0);
3099 :
3100 2246 : hseq = b->hseqbase;
3101 2246 : canditer_init(&ci, b, s);
3102 2248 : BATiter bi = bat_iterator(b);
3103 2248 : if (bi.nonil) {
3104 2011 : bat_iterator_end(&bi);
3105 2012 : return ci.ncand;
3106 : }
3107 237 : p = bi.base;
3108 237 : t = ATOMbasetype(bi.type);
3109 237 : switch (t) {
3110 0 : case TYPE_void:
3111 0 : cnt = ci.ncand * BATtdensebi(&bi);
3112 0 : break;
3113 0 : case TYPE_msk:
3114 0 : cnt = ci.ncand;
3115 0 : break;
3116 16 : case TYPE_bte:
3117 32 : CAND_LOOP(&ci)
3118 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3119 : break;
3120 0 : case TYPE_sht:
3121 0 : CAND_LOOP(&ci)
3122 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3123 : break;
3124 111 : case TYPE_int:
3125 4303232 : CAND_LOOP(&ci)
3126 4303121 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3127 : break;
3128 85 : case TYPE_lng:
3129 156873 : CAND_LOOP(&ci)
3130 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 : #ifdef HAVE_HGE
3133 0 : case TYPE_hge:
3134 0 : CAND_LOOP(&ci)
3135 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3136 : break;
3137 : #endif
3138 0 : case TYPE_flt:
3139 0 : CAND_LOOP(&ci)
3140 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3141 : break;
3142 0 : case TYPE_dbl:
3143 0 : CAND_LOOP(&ci)
3144 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3145 : break;
3146 0 : case TYPE_uuid:
3147 0 : CAND_LOOP(&ci)
3148 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3149 : break;
3150 25 : case TYPE_str:
3151 25 : base = bi.vh->base;
3152 25 : switch (bi.width) {
3153 23 : case 1:
3154 4809 : CAND_LOOP(&ci)
3155 4786 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3156 : break;
3157 2 : case 2:
3158 312 : CAND_LOOP(&ci)
3159 310 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3160 : break;
3161 0 : case 4:
3162 0 : CAND_LOOP(&ci)
3163 0 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3164 : break;
3165 : #if SIZEOF_VAR_T == 8
3166 0 : case 8:
3167 0 : CAND_LOOP(&ci)
3168 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3169 : break;
3170 : #endif
3171 : default:
3172 0 : MT_UNREACHABLE();
3173 : }
3174 : break;
3175 0 : default:
3176 0 : nil = ATOMnilptr(t);
3177 0 : cmp = ATOMcompare(t);
3178 0 : if (nil == NULL) {
3179 0 : cnt = ci.ncand;
3180 0 : } else if (b->tvheap) {
3181 0 : base = b->tvheap->base;
3182 0 : CAND_LOOP(&ci)
3183 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3184 : } else {
3185 0 : CAND_LOOP(&ci)
3186 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3187 : }
3188 : break;
3189 : }
3190 237 : if (cnt == bi.count) {
3191 34 : MT_lock_set(&b->theaplock);
3192 35 : if (cnt == BATcount(b) && bi.h == b->theap) {
3193 : /* we learned something */
3194 35 : b->tnonil = true;
3195 35 : assert(!b->tnil);
3196 35 : b->tnil = false;
3197 : }
3198 35 : bat pbid = VIEWtparent(b);
3199 35 : MT_lock_unset(&b->theaplock);
3200 35 : if (pbid) {
3201 21 : BAT *pb = BATdescriptor(pbid);
3202 21 : if (pb) {
3203 21 : MT_lock_set(&pb->theaplock);
3204 21 : if (cnt == BATcount(pb) &&
3205 0 : bi.h == pb->theap &&
3206 0 : !pb->tnonil) {
3207 0 : pb->tnonil = true;
3208 0 : assert(!pb->tnil);
3209 0 : pb->tnil = false;
3210 : }
3211 21 : MT_lock_unset(&pb->theaplock);
3212 21 : BBPunfix(pb->batCacheid);
3213 : }
3214 : }
3215 : }
3216 238 : bat_iterator_end(&bi);
3217 238 : return cnt;
3218 : }
|