Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 52428937 : unshare_varsized_heap(BAT *b)
27 : {
28 52428937 : if (ATOMvarsized(b->ttype) &&
29 20972389 : b->tvheap->parentid != b->batCacheid) {
30 772 : Heap *h = GDKmalloc(sizeof(Heap));
31 772 : if (h == NULL)
32 : return GDK_FAIL;
33 772 : MT_thread_setalgorithm("unshare vheap");
34 1544 : *h = (Heap) {
35 772 : .parentid = b->batCacheid,
36 772 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 772 : strconcat_len(h->filename, sizeof(h->filename),
40 772 : BBP_physical(b->batCacheid), ".theap", NULL);
41 772 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 772 : MT_lock_set(&b->theaplock);
47 772 : Heap *oh = b->tvheap;
48 772 : b->tvheap = h;
49 772 : MT_lock_unset(&b->theaplock);
50 772 : BBPrelease(oh->parentid);
51 772 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 59147 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 59147 : size_t toff = ~(size_t) 0; /* tail offset */
66 59147 : BUN p, r; /* loop variables */
67 59147 : const void *tp = NULL; /* tail value pointer */
68 59147 : var_t v;
69 59147 : size_t off; /* offset within n's string heap */
70 59147 : BUN cnt = ci->ncand;
71 59147 : BUN oldcnt = BATcount(b);
72 :
73 59147 : assert(b->ttype == TYPE_str);
74 59147 : assert(b->tbaseoff == 0);
75 59147 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 59147 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 59147 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 59148 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 7883 : toff = 0;
85 7883 : MT_thread_setalgorithm("shared vheap");
86 51265 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 41454 : MT_lock_set(&b->theaplock);
90 41449 : bat bid = b->tvheap->parentid;
91 41449 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 41455 : HEAPincref(ni->vh);
93 41454 : b->tvheap = ni->vh;
94 41454 : b->tascii = ni->ascii;
95 41454 : MT_lock_unset(&b->theaplock);
96 41455 : BBPretain(ni->vh->parentid);
97 41455 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 41455 : toff = 0;
100 41455 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 10583 : if (b->tvheap->parentid != b->batCacheid &&
106 772 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 9811 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 56 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5798737 : for (int i = 0; i < 1024; i++) {
120 5793074 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5793074 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5793074 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5663 : len = (len + 512) / 1024; /* rounded average length */
125 5663 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5663 : BUN ecnt = ci->ncand;
129 5663 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 55 : ecnt = (BUN)ni->b->tunique_est;
131 5663 : if (r < ecnt)
132 910 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4753 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5663 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5657 : if (oldcnt == 0) {
140 5634 : toff = 0;
141 5634 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 23 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 23 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5657 : MT_lock_set(&b->theaplock);
148 5657 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5657 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5657 : b->tvheap->free = toff + ni->vhfree;
154 5657 : b->tvheap->dirty = true;
155 5657 : b->tascii &= ni->ascii;
156 5657 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 54995 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 54994 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 59148 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 59150 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 54871 : MT_thread_setalgorithm("memcpy offsets");
177 54867 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 4279 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 124 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 124 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 124 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 124 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 124 : MT_thread_setalgorithm("copy offset values");
197 124 : r = b->batCount;
198 3083454 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3083034 : p = canditer_next(ci) - ni->b->hseqbase;
200 3083034 : switch (ni->width) {
201 94 : case 1:
202 94 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 94 : break;
204 242 : case 2:
205 242 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 242 : break;
207 3082698 : case 4:
208 3082698 : v = (var_t) tip[p];
209 3082698 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3083034 : v = (var_t) ((size_t) v + toff);
219 3083034 : assert(v >= GDK_VAROFFSET);
220 3083034 : assert((size_t) v < b->tvheap->free);
221 3083034 : switch (b->twidth) {
222 244 : case 1:
223 244 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 244 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 244 : break;
226 749 : case 2:
227 749 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 749 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 749 : break;
230 3082041 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3082041 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3082041 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3082041 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3083034 : MT_UNREACHABLE();
243 : }
244 : }
245 4155 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 4123 : r = b->batCount;
253 4123 : oid hseq = ni->b->hseqbase;
254 4123 : MT_thread_setalgorithm("insert string values");
255 8092670 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 8084010 : p = canditer_next(ci) - hseq;
257 7979042 : tp = BUNtvar(*ni, p);
258 7979042 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 8084010 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 32 : r = b->batCount;
271 32 : MT_thread_setalgorithm("insert string values with check");
272 4017 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 3953 : p = canditer_next(ci) - ni->b->hseqbase;
274 3953 : off = BUNtvaroff(*ni, p); /* the offset */
275 3953 : tp = ni->vh->base + off; /* the string */
276 3953 : if (off < b->tvheap->free &&
277 3953 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 684 : v = (var_t) off;
284 684 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 680 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 680 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 680 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 680 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3269 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 3953 : r++;
313 : }
314 : }
315 59146 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 59148 : MT_rwlock_wrlock(&b->thashlock);
317 59152 : MT_lock_set(&b->theaplock);
318 59147 : BATsetcount(b, oldcnt + ci->ncand);
319 59145 : assert(b->batCapacity >= b->batCount);
320 59145 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 62138 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 2989 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 59149 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 59149 : MT_rwlock_wrunlock(&b->thashlock);
327 59153 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 448 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 448 : BUN cnt = ci->ncand, r;
339 448 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 448 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 448 : assert(b->twidth == ni->width);
345 448 : assert(b->twidth == SIZEOF_VAR_T);
346 448 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 448 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 448 : if (mayshare &&
360 448 : BATcount(b) == 0 &&
361 233 : b->batRole == TRANSIENT &&
362 148 : ni->restricted == BAT_READ &&
363 148 : b->tvheap != ni->vh) {
364 : /* if b is still empty, in the transient farm, and n
365 : * is read-only, we replace b's vheap with a reference
366 : * to n's */
367 148 : MT_lock_set(&b->theaplock);
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : MT_lock_unset(&b->theaplock);
373 148 : BBPretain(ni->vh->parentid);
374 148 : if (bid != b->batCacheid)
375 0 : BBPrelease(bid);
376 : }
377 448 : if (b->tvheap == ni->vh) {
378 : /* if b and n use the same vheap, we only need to copy
379 : * the offsets from n to b */
380 286 : if (ci->tpe == cand_dense) {
381 : /* fast memcpy since we copy a consecutive
382 : * chunk of memory */
383 286 : memcpy(Tloc(b, BATcount(b)),
384 286 : (const var_t *) ni->base + (ci->seq - hseq),
385 286 : cnt << b->tshift);
386 : } else {
387 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
388 0 : const var_t *restrict src = (const var_t *) ni->base;
389 0 : while (cnt > 0) {
390 0 : cnt--;
391 0 : *dst++ = src[canditer_next(ci) - hseq];
392 : }
393 : }
394 286 : MT_rwlock_wrlock(&b->thashlock);
395 286 : MT_lock_set(&b->theaplock);
396 286 : BATsetcount(b, BATcount(b) + ci->ncand);
397 286 : MT_lock_unset(&b->theaplock);
398 : /* maintain hash table */
399 286 : for (BUN i = BATcount(b) - ci->ncand;
400 286 : b->thash && i < BATcount(b);
401 0 : i++) {
402 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
403 : }
404 286 : BUN nunique = b->thash ? b->thash->nunique : 0;
405 286 : MT_rwlock_wrunlock(&b->thashlock);
406 286 : if (nunique != 0) {
407 0 : MT_lock_set(&b->theaplock);
408 0 : b->tunique_est = (double) nunique;
409 0 : MT_lock_unset(&b->theaplock);
410 : }
411 286 : return GDK_SUCCEED;
412 : }
413 : /* b and n do not share their vheap, so we need to copy data */
414 162 : if (b->tvheap->parentid != b->batCacheid) {
415 : /* if b shares its vheap with some other bat, unshare it */
416 20 : Heap *h = GDKmalloc(sizeof(Heap));
417 20 : if (h == NULL) {
418 : return GDK_FAIL;
419 : }
420 40 : *h = (Heap) {
421 20 : .parentid = b->batCacheid,
422 20 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
423 : .refs = ATOMIC_VAR_INIT(1),
424 : };
425 20 : strconcat_len(h->filename, sizeof(h->filename),
426 20 : BBP_physical(b->batCacheid), ".theap", NULL);
427 20 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
428 0 : HEAPfree(h, true);
429 0 : GDKfree(h);
430 0 : return GDK_FAIL;
431 : }
432 20 : MT_lock_set(&b->theaplock);
433 20 : Heap *oh = b->tvheap;
434 20 : b->tvheap = h;
435 20 : MT_lock_unset(&b->theaplock);
436 20 : if (oh->parentid != b->batCacheid)
437 20 : BBPrelease(oh->parentid);
438 20 : HEAPdecref(oh, false);
439 : }
440 162 : if (BATcount(b) == 0 &&
441 85 : ci->tpe == cand_dense && ci->ncand == ni->count) {
442 : /* just copy the heaps */
443 85 : MT_lock_set(&b->theaplock);
444 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
445 0 : MT_lock_unset(&b->theaplock);
446 0 : return GDK_FAIL;
447 : }
448 84 : memcpy(b->theap->base, ni->base, ni->hfree);
449 84 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
450 84 : b->theap->free = ni->hfree;
451 84 : b->theap->dirty = true;
452 84 : b->tvheap->free = ni->vhfree;
453 84 : b->tvheap->dirty = true;
454 84 : BATsetcount(b, ni->count);
455 85 : b->tnil = ni->nil;
456 85 : b->tnonil = ni->nonil;
457 85 : b->tsorted = ni->sorted;
458 85 : b->tnosorted = ni->nosorted;
459 85 : b->trevsorted = ni->revsorted;
460 85 : b->tnorevsorted = ni->norevsorted;
461 85 : b->tkey = ni->key;
462 85 : b->tnokey[0] = ni->nokey[0];
463 85 : b->tnokey[1] = ni->nokey[1];
464 85 : b->tminpos = ni->minpos;
465 85 : b->tmaxpos = ni->maxpos;
466 85 : b->tunique_est = ni->unique_est;
467 85 : MT_lock_unset(&b->theaplock);
468 85 : return GDK_SUCCEED;
469 : }
470 : /* copy data from n to b */
471 : r = BATcount(b);
472 380587 : for (BUN i = 0; i < cnt; i++) {
473 380510 : BUN p = canditer_next(ci) - hseq;
474 380510 : const void *t = BUNtvar(*ni, p);
475 380510 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
476 : return GDK_FAIL;
477 : }
478 380510 : r++;
479 : }
480 77 : MT_rwlock_wrlock(&b->thashlock);
481 77 : if (b->thash) {
482 0 : r -= cnt;
483 0 : BATiter bi = bat_iterator_nolock(b);
484 0 : for (BUN i = 0; i < cnt; i++) {
485 0 : const void *t = BUNtvar(bi, r);
486 0 : HASHappend_locked(b, r, t);
487 0 : r++;
488 : }
489 : }
490 77 : BUN nunique = b->thash ? b->thash->nunique : 0;
491 77 : MT_lock_set(&b->theaplock);
492 77 : BATsetcount(b, r);
493 77 : if (nunique != 0)
494 0 : b->tunique_est = (double) nunique;
495 77 : MT_lock_unset(&b->theaplock);
496 77 : MT_rwlock_wrunlock(&b->thashlock);
497 77 : return GDK_SUCCEED;
498 : }
499 :
500 : static gdk_return
501 127 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
502 : {
503 127 : if (ci->ncand == 0)
504 : return GDK_SUCCEED;
505 127 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
506 : return GDK_FAIL;
507 :
508 127 : MT_lock_set(&b->theaplock);
509 :
510 127 : uint32_t boff = b->batCount % 32;
511 127 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
512 127 : b->batCount += ci->ncand;
513 127 : b->theap->dirty = true;
514 127 : b->theap->free = ((b->batCount + 31) / 32) * 4;
515 127 : if (ci->tpe == cand_dense) {
516 127 : const uint32_t *np;
517 127 : uint32_t noff, mask;
518 127 : BUN cnt;
519 127 : noff = (ci->seq - ni->b->hseqbase) % 32;
520 127 : cnt = ci->ncand;
521 127 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
522 127 : if (boff == noff) {
523 : /* words of b and n are aligned, so we don't
524 : * need to shift bits around */
525 47 : if (boff + cnt <= 32) {
526 : /* all new bits within one word */
527 41 : if (cnt == 32) {
528 0 : *bp = *np;
529 : } else {
530 41 : mask = ((1U << cnt) - 1) << boff;
531 41 : *bp &= ~mask;
532 41 : *bp |= *np & mask;
533 : }
534 : } else {
535 : /* multiple words of b are affected */
536 6 : if (boff != 0) {
537 : /* first fill up the rest of the first
538 : * word */
539 0 : mask = ~0U << boff;
540 0 : *bp &= ~mask;
541 0 : *bp++ |= *np++ & mask;
542 0 : cnt -= 32 - boff;
543 : }
544 6 : if (cnt >= 32) {
545 : /* copy an integral number of words fast */
546 6 : BUN nw = cnt / 32;
547 6 : memcpy(bp, np, nw*sizeof(int));
548 6 : bp += nw;
549 6 : np += nw;
550 6 : cnt %= 32;
551 : }
552 6 : if (cnt > 0) {
553 : /* do the left over bits */
554 6 : mask = (1U << cnt) - 1;
555 6 : *bp = *np & mask;
556 : }
557 : }
558 80 : } else if (boff > noff) {
559 80 : if (boff + cnt <= 32) {
560 : /* we only need to copy bits from a
561 : * single word of n to a single word
562 : * of b */
563 : /* boff > 0, so cnt < 32, hence the
564 : * shift is ok */
565 72 : mask = (1U << cnt) - 1;
566 72 : *bp &= ~(mask << boff);
567 72 : *bp |= (*np & (mask << noff)) << (boff - noff);
568 : } else {
569 : /* first fill the rest of the last partial
570 : * word of b, so that's 32-boff bits */
571 8 : mask = (1U << (32 - boff)) - 1;
572 8 : *bp &= ~(mask << boff);
573 8 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
574 8 : cnt -= 32 - boff;
575 :
576 : /* set boff and noff to the amount we need to
577 : * shift bits in consecutive words of n around
578 : * to fit into the next word of b; set mask to
579 : * the mask of the bottom bits of n that fit
580 : * in a word of b (and the complement are the
581 : * top bits that go to another word of b) */
582 8 : boff -= noff;
583 8 : noff = 32 - boff;
584 8 : mask = (1U << noff) - 1;
585 140 : while (cnt >= 32) {
586 132 : *bp = (*np++ & ~mask) >> noff;
587 132 : *bp++ |= (*np & mask) << boff;
588 132 : cnt -= 32;
589 : }
590 8 : if (cnt > noff) {
591 : /* the last bits come from two words
592 : * in n */
593 4 : *bp = (*np++ & ~mask) >> noff;
594 4 : cnt -= noff;
595 4 : mask = (1U << cnt) - 1;
596 4 : *bp++ |= (*np & mask) << boff;
597 4 : } else if (cnt > 0) {
598 : /* the last bits come from a single
599 : * word in n */
600 4 : mask = ((1U << cnt) - 1) << noff;
601 4 : *bp = (*np & mask) >> noff;
602 : }
603 : }
604 : } else {
605 : /* boff < noff */
606 0 : if (noff + cnt <= 32) {
607 : /* only need part of the first word of n */
608 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
609 0 : mask = (1U << cnt) - 1;
610 0 : *bp &= ~(mask << boff);
611 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
612 0 : } else if (boff + cnt <= 32) {
613 : /* only need to fill a single word of
614 : * b, but from two of n */
615 0 : if (cnt < 32)
616 0 : *bp &= ~(((1U << cnt) - 1) << boff);
617 : else
618 0 : *bp = 0;
619 0 : mask = ~((1U << noff) - 1);
620 0 : *bp |= (*np++ & mask) >> (noff - boff);
621 0 : cnt -= 32 - noff;
622 0 : mask = (1U << cnt) - 1;
623 0 : *bp |= (*np & mask) << (32 - noff);
624 : } else {
625 0 : if (boff > 0) {
626 : /* fill the rest of the first word of b */
627 0 : cnt -= 32 - boff;
628 0 : *bp &= (1U << boff) - 1;
629 0 : mask = ~((1U << noff) - 1);
630 0 : noff -= boff;
631 0 : boff = 32 - noff;
632 0 : *bp |= (*np++ & mask) >> noff;
633 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
634 : } else {
635 0 : boff = 32 - noff;
636 : }
637 0 : mask = (1U << noff) - 1;
638 0 : while (cnt >= 32) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : *bp++ |= (*np & mask) << boff;
641 0 : cnt -= 32;
642 : }
643 0 : if (cnt > 0) {
644 0 : *bp = (*np++ & ~mask) >> noff;
645 0 : if (cnt > noff)
646 0 : *bp++ |= (*np & mask) << boff;
647 : }
648 : }
649 : }
650 : } else {
651 0 : oid o;
652 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
653 0 : do {
654 0 : for (uint32_t i = boff; i < 32; i++) {
655 0 : o = canditer_next(ci);
656 0 : if (is_oid_nil(o))
657 : break;
658 0 : o -= ni->b->hseqbase;
659 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
660 : }
661 0 : *bp++ = v;
662 0 : v = 0;
663 0 : boff = 0;
664 0 : } while (!is_oid_nil(o));
665 : }
666 127 : MT_lock_unset(&b->theaplock);
667 127 : return GDK_SUCCEED;
668 : }
669 :
670 : /* Append the contents of BAT n (subject to the optional candidate
671 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
672 : * was passed in, and else the seqbase of n. */
673 : static gdk_return
674 1183949 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
675 : {
676 1183949 : struct canditer ci;
677 1183949 : BUN r;
678 1183949 : oid hseq = n->hseqbase;
679 1183949 : char buf[64];
680 1183949 : lng t0 = 0;
681 1183949 : const ValRecord *prop = NULL;
682 1183949 : ValRecord minprop, maxprop;
683 1183949 : const void *minbound = NULL, *maxbound = NULL;
684 1183949 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
685 1183949 : bool hlocked = false;
686 :
687 1183949 : if (b == NULL || n == NULL || BATcount(n) == 0) {
688 : return GDK_SUCCEED;
689 : }
690 787085 : assert(b->theap->parentid == b->batCacheid);
691 :
692 787085 : TRC_DEBUG_IF(ALGO) {
693 0 : t0 = GDKusec();
694 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
695 : }
696 :
697 787085 : ALIGNapp(b, force, GDK_FAIL);
698 :
699 2226672 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
700 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
701 0 : return GDK_FAIL;
702 : }
703 :
704 787161 : if (BATttype(b) != BATttype(n) &&
705 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
706 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
707 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
708 : }
709 :
710 787161 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
711 :
712 787139 : BATiter ni = bat_iterator(n);
713 :
714 787047 : canditer_init(&ci, n, s);
715 786880 : if (ci.ncand == 0) {
716 0 : goto doreturn;
717 : }
718 :
719 786880 : if (BATcount(b) + ci.ncand > BUN_MAX) {
720 0 : bat_iterator_end(&ni);
721 0 : GDKerror("combined BATs too large\n");
722 0 : return GDK_FAIL;
723 : }
724 :
725 786880 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
726 0 : bat_iterator_end(&ni);
727 0 : GDKerror("overflow of head value\n");
728 0 : return GDK_FAIL;
729 : }
730 :
731 786880 : OIDXdestroy(b);
732 787268 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
733 787194 : RTREEdestroy(b);
734 :
735 787175 : MT_lock_set(&b->theaplock);
736 786622 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
737 786639 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
738 48 : VALcopy(&minprop, prop) != NULL) {
739 48 : minbound = VALptr(&minprop);
740 48 : if (ci.ncand == BATcount(n) &&
741 62 : ni.minpos != BUN_NONE &&
742 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
743 0 : assert(0);
744 : GDKerror("value out of bounds\n");
745 : MT_lock_unset(&b->theaplock);
746 : goto bailout;
747 : }
748 : }
749 786597 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
750 40 : VALcopy(&maxprop, prop) != NULL) {
751 40 : maxbound = VALptr(&maxprop);
752 40 : if (ci.ncand == BATcount(n) &&
753 52 : ni.maxpos != BUN_NONE &&
754 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
755 0 : assert(0);
756 : GDKerror("value out of bounds\n");
757 : MT_lock_unset(&b->theaplock);
758 : goto bailout;
759 : }
760 : }
761 :
762 786808 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
763 375416 : if (ni.maxpos != BUN_NONE) {
764 149058 : BATiter bi = bat_iterator_nolock(b);
765 149058 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
766 145536 : if (s == NULL) {
767 145400 : b->tmaxpos = BATcount(b) + ni.maxpos;
768 : } else {
769 136 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 : } else {
773 226358 : b->tmaxpos = BUN_NONE;
774 : }
775 : }
776 786808 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
777 375409 : if (ni.minpos != BUN_NONE) {
778 149015 : BATiter bi = bat_iterator_nolock(b);
779 149015 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
780 144877 : if (s == NULL) {
781 144743 : b->tminpos = BATcount(b) + ni.minpos;
782 : } else {
783 134 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 : } else {
787 226394 : b->tminpos = BUN_NONE;
788 : }
789 : }
790 786808 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
791 786309 : b->tunique_est = 0;
792 : }
793 786808 : MT_lock_unset(&b->theaplock);
794 : /* load hash so that we can maintain it */
795 786906 : (void) BATcheckhash(b);
796 :
797 787176 : if (b->ttype == TYPE_void) {
798 : /* b does not have storage, keep it that way if we can */
799 66810 : HASHdestroy(b); /* we're not maintaining the hash here */
800 66798 : MT_lock_set(&b->theaplock);
801 66810 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
802 61093 : (BATcount(b) == 0 ||
803 44037 : (BATtdense(b) &&
804 44037 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
805 : /* n is also dense and consecutive with b */
806 61033 : if (BATcount(b) == 0) {
807 17056 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
808 0 : assert(0);
809 : GDKerror("value not within bounds\n");
810 : MT_lock_unset(&b->theaplock);
811 : goto bailout;
812 : }
813 17056 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
814 : }
815 61025 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
816 0 : assert(0);
817 : GDKerror("value not within bounds\n");
818 : MT_lock_unset(&b->theaplock);
819 : goto bailout;
820 : }
821 61025 : BATsetcount(b, BATcount(b) + ci.ncand);
822 61022 : MT_lock_unset(&b->theaplock);
823 61023 : goto doreturn;
824 : }
825 5777 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
826 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
827 : /* both b and n are void/nil */
828 0 : if (notnull) {
829 0 : assert(0);
830 : GDKerror("NULL value not within bounds\n");
831 : MT_lock_unset(&b->theaplock);
832 : goto bailout;
833 : }
834 0 : BATtseqbase(b, oid_nil);
835 0 : BATsetcount(b, BATcount(b) + ci.ncand);
836 0 : MT_lock_unset(&b->theaplock);
837 0 : goto doreturn;
838 : }
839 : /* we need to materialize b; allocate enough capacity */
840 5777 : MT_lock_unset(&b->theaplock);
841 5777 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
842 0 : goto bailout;
843 : }
844 : }
845 :
846 : /* property setting */
847 726143 : MT_lock_set(&b->theaplock);
848 726124 : r = BATcount(b);
849 :
850 726124 : if (BATcount(b) == 0) {
851 352028 : b->tsorted = ni.sorted;
852 352028 : b->trevsorted = ni.revsorted;
853 352028 : b->tseqbase = oid_nil;
854 352028 : b->tnonil = ni.nonil;
855 352028 : b->tnil = ni.nil && ci.ncand == BATcount(n);
856 352028 : if (ci.tpe == cand_dense) {
857 351876 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
858 351876 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
859 351876 : if (BATtdensebi(&ni)) {
860 1931 : b->tseqbase = n->tseqbase + ci.seq - hseq;
861 : }
862 : } else {
863 152 : b->tnosorted = 0;
864 152 : b->tnorevsorted = 0;
865 : }
866 352028 : b->tkey = ni.key;
867 352028 : if (ci.ncand == BATcount(n)) {
868 351304 : b->tnokey[0] = ni.nokey[0];
869 351304 : b->tnokey[1] = ni.nokey[1];
870 : } else {
871 724 : b->tnokey[0] = b->tnokey[1] = 0;
872 : }
873 : } else {
874 374096 : BUN last = r - 1;
875 374096 : BATiter bi = bat_iterator_nolock(b);
876 374096 : int xx = ATOMcmp(b->ttype,
877 : BUNtail(ni, ci.seq - hseq),
878 : BUNtail(bi, last));
879 374064 : if (b->tsorted && (!ni.sorted || xx < 0)) {
880 14613 : b->tsorted = false;
881 14613 : b->tnosorted = 0;
882 14613 : b->tseqbase = oid_nil;
883 : }
884 374064 : if (b->trevsorted &&
885 36628 : (!ni.revsorted || xx > 0)) {
886 10950 : b->trevsorted = false;
887 10950 : b->tnorevsorted = 0;
888 : }
889 374064 : if (b->tkey &&
890 42362 : (!(b->tsorted || b->trevsorted) ||
891 33475 : !ni.key || xx == 0)) {
892 13200 : BATkey(b, false);
893 : }
894 374063 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
895 5814 : (!BATtdensebi(&ni) ||
896 421 : ci.tpe != cand_dense ||
897 421 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
898 5410 : b->tseqbase = oid_nil;
899 : }
900 374063 : b->tnonil &= ni.nonil;
901 740295 : b->tnil |= ni.nil && ci.ncand == ni.count;
902 : }
903 726091 : MT_lock_unset(&b->theaplock);
904 726157 : if (b->ttype == TYPE_str) {
905 59147 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 667010 : } else if (ATOMvarsized(b->ttype)) {
909 448 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
910 0 : goto bailout;
911 : }
912 666562 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
913 : /* no bounds and NOT_NULL property on MSK bats */
914 127 : assert(minbound == NULL && maxbound == NULL && !notnull);
915 127 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
916 0 : goto bailout;
917 : }
918 : } else {
919 666435 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
920 : /* if needed space exceeds a normal growth
921 : * extend just with what's needed */
922 4681 : BUN ncap = BATcount(b) + ci.ncand;
923 4681 : BUN grows = BATgrows(b);
924 :
925 4688 : if (ncap > grows)
926 : grows = ncap;
927 4688 : if (BATextend(b, grows) != GDK_SUCCEED) {
928 0 : goto bailout;
929 : }
930 : }
931 666445 : MT_rwlock_wrlock(&b->thashlock);
932 666434 : hlocked = true;
933 666434 : if (b->ttype != TYPE_void &&
934 666340 : ni.type != TYPE_void &&
935 659170 : ci.tpe == cand_dense) {
936 : /* use fast memcpy if we can */
937 659101 : memcpy(Tloc(b, BATcount(b)),
938 659101 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
939 659101 : ci.ncand << ni.shift);
940 659114 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
941 280 : HASHappend_locked(b, r, Tloc(b, r));
942 13 : r++;
943 : }
944 : } else {
945 7333 : const void *atomnil = ATOMnilptr(b->ttype);
946 11179290 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
947 11164602 : BUN p = canditer_next(&ci) - hseq;
948 11164858 : const void *t = BUNtail(ni, p);
949 11164885 : bool isnil = atomcmp(t, atomnil) == 0;
950 11164722 : if (notnull && isnil) {
951 0 : assert(0);
952 : GDKerror("NULL value not within bounds\n");
953 : goto bailout;
954 11164722 : } else if (minbound &&
955 11164722 : !isnil &&
956 0 : atomcmp(t, minbound) < 0) {
957 0 : assert(0);
958 : GDKerror("value not within bounds\n");
959 : goto bailout;
960 11164722 : } else if (maxbound &&
961 0 : !isnil &&
962 0 : atomcmp(t, maxbound) >= 0) {
963 0 : assert(0);
964 : GDKerror("value not within bounds\n");
965 : goto bailout;
966 11164722 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
967 0 : goto bailout;
968 : }
969 11164573 : if (b->thash)
970 0 : HASHappend_locked(b, r, t);
971 11164602 : r++;
972 : }
973 7333 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
974 : }
975 666167 : BUN nunique;
976 666167 : nunique = b->thash ? b->thash->nunique : 0;
977 666167 : MT_lock_set(&b->theaplock);
978 666210 : BATsetcount(b, b->batCount + ci.ncand);
979 666229 : if (nunique != 0)
980 5 : b->tunique_est = (double) nunique;
981 666229 : MT_lock_unset(&b->theaplock);
982 666250 : assert(hlocked);
983 666250 : MT_rwlock_wrunlock(&b->thashlock);
984 666250 : hlocked = false;
985 : }
986 :
987 787291 : doreturn:
988 787291 : bat_iterator_end(&ni);
989 787109 : if (minbound)
990 48 : VALclear(&minprop);
991 787115 : if (maxbound)
992 40 : VALclear(&maxprop);
993 787115 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
994 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
995 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
996 : GDKusec() - t0);
997 :
998 : return GDK_SUCCEED;
999 : bailout:
1000 0 : if (hlocked)
1001 0 : MT_rwlock_wrunlock(&b->thashlock);
1002 0 : if (minbound)
1003 0 : VALclear(&minprop);
1004 0 : if (maxbound)
1005 0 : VALclear(&maxprop);
1006 0 : bat_iterator_end(&ni);
1007 0 : return GDK_FAIL;
1008 : }
1009 :
1010 : gdk_return
1011 1184109 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1012 : {
1013 1184109 : return BATappend2(b, n, s, force, true);
1014 : }
1015 :
1016 : gdk_return
1017 4 : BATdel(BAT *b, BAT *d)
1018 : {
1019 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1020 4 : MT_lock_set(&b->theaplock);
1021 4 : BATiter bi = bat_iterator_nolock(b);
1022 4 : MT_lock_unset(&b->theaplock);
1023 :
1024 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1025 4 : assert(d->tsorted);
1026 4 : assert(d->tkey);
1027 4 : if (BATcount(d) == 0)
1028 : return GDK_SUCCEED;
1029 4 : OIDXdestroy(b);
1030 4 : HASHdestroy(b);
1031 4 : PROPdestroy(b);
1032 4 : STRMPdestroy(b);
1033 4 : RTREEdestroy(b);
1034 4 : if (BATtdense(d)) {
1035 2 : oid o = d->tseqbase;
1036 2 : BUN c = BATcount(d);
1037 :
1038 2 : if (o + c <= b->hseqbase)
1039 : return GDK_SUCCEED;
1040 2 : if (o < b->hseqbase) {
1041 0 : c -= b->hseqbase - o;
1042 0 : o = b->hseqbase;
1043 : }
1044 2 : if (o - b->hseqbase < b->batInserted) {
1045 0 : GDKerror("cannot delete committed values\n");
1046 0 : return GDK_FAIL;
1047 : }
1048 2 : if (o + c > b->hseqbase + BATcount(b))
1049 0 : c = b->hseqbase + BATcount(b) - o;
1050 2 : if (c == 0)
1051 : return GDK_SUCCEED;
1052 2 : if (atmdel) {
1053 0 : BUN p = o - b->hseqbase;
1054 0 : BUN q = p + c;
1055 0 : while (p < q) {
1056 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1057 0 : p++;
1058 : }
1059 : }
1060 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1061 : return GDK_FAIL;
1062 2 : MT_lock_set(&b->theaplock);
1063 2 : if (o + c < b->hseqbase + BATcount(b)) {
1064 0 : o -= b->hseqbase;
1065 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1066 0 : BUN n = BATcount(b) - (o + c);
1067 : /* not very efficient, but first see
1068 : * how much this is used */
1069 0 : for (BUN i = 0; i < n; i++)
1070 0 : mskSetVal(b, o + i,
1071 0 : mskGetVal(b, o + c + i));
1072 : } else {
1073 0 : memmove(Tloc(b, o),
1074 0 : Tloc(b, o + c),
1075 0 : b->twidth * (BATcount(b) - (o + c)));
1076 : }
1077 0 : b->theap->dirty = true;
1078 : // o += b->hseqbase; // if this were to be used again
1079 : }
1080 2 : b->batCount -= c;
1081 : } else {
1082 2 : BATiter di = bat_iterator(d);
1083 2 : const oid *o = (const oid *) di.base;
1084 2 : const oid *s;
1085 2 : BUN c = di.count;
1086 2 : BUN nd = 0;
1087 2 : BUN pos;
1088 2 : char *p = NULL;
1089 :
1090 2 : if (o[c - 1] <= b->hseqbase) {
1091 0 : bat_iterator_end(&di);
1092 0 : return GDK_SUCCEED;
1093 : }
1094 2 : while (*o < b->hseqbase) {
1095 0 : o++;
1096 0 : c--;
1097 : }
1098 2 : if (*o - b->hseqbase < b->batInserted) {
1099 0 : bat_iterator_end(&di);
1100 0 : GDKerror("cannot delete committed values\n");
1101 0 : return GDK_FAIL;
1102 : }
1103 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1104 0 : bat_iterator_end(&di);
1105 0 : return GDK_FAIL;
1106 : }
1107 2 : s = o;
1108 2 : pos = *o - b->hseqbase;
1109 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1110 2 : p = Tloc(b, pos);
1111 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1112 4 : size_t n;
1113 4 : if (atmdel)
1114 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1115 4 : o++;
1116 4 : c--;
1117 4 : nd++;
1118 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1119 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1120 2 : else if ((oid) (o - s) < *o - *s)
1121 2 : n = o[0] - o[-1] - 1;
1122 : else
1123 : n = 0;
1124 4 : if (n > 0) {
1125 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1126 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1127 : /* not very efficient, but
1128 : * first see how much this is
1129 : * used */
1130 0 : for (BUN i = 0; i < n; i++) {
1131 0 : mskSetVal(b, pos + i,
1132 0 : mskGetVal(b, opos + i));
1133 : }
1134 0 : pos += n;
1135 : } else {
1136 2 : n *= b->twidth;
1137 2 : memmove(p,
1138 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1139 : n);
1140 2 : p += n;
1141 : }
1142 : s = o;
1143 : }
1144 : }
1145 2 : bat_iterator_end(&di);
1146 2 : MT_lock_set(&b->theaplock);
1147 2 : b->theap->dirty = true;
1148 2 : b->batCount -= nd;
1149 : }
1150 4 : if (b->batCount <= 1) {
1151 : /* some trivial properties */
1152 2 : b->tkey = true;
1153 2 : b->tsorted = b->trevsorted = true;
1154 2 : if (b->batCount == 0) {
1155 2 : b->tnil = false;
1156 2 : b->tnonil = true;
1157 : }
1158 : }
1159 : /* not sure about these anymore */
1160 4 : b->tnosorted = b->tnorevsorted = 0;
1161 4 : b->tnokey[0] = b->tnokey[1] = 0;
1162 4 : b->tminpos = BUN_NONE;
1163 4 : b->tmaxpos = BUN_NONE;
1164 4 : b->tunique_est = 0.0;
1165 4 : MT_lock_unset(&b->theaplock);
1166 :
1167 4 : return GDK_SUCCEED;
1168 : }
1169 :
1170 : /*
1171 : * Replace all values in b with values from n whose location is given by
1172 : * the oid in either p or positions.
1173 : * If positions is used, autoincr specifies whether it is the first of a
1174 : * dense range of positions or whether it is a full-blown array of
1175 : * position.
1176 : * If mayappend is set, the position in p/positions may refer to
1177 : * locations beyond the end of b.
1178 : */
1179 : static gdk_return
1180 117566 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1181 : bool mayappend, bool autoincr, bool force)
1182 : {
1183 117566 : lng t0 = GDKusec();
1184 117560 : oid pos = oid_nil;
1185 117560 : BUN nunique = 0;
1186 :
1187 117560 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1188 : return GDK_SUCCEED;
1189 : }
1190 : /* either p or positions */
1191 117560 : assert((p == NULL) != (positions == NULL));
1192 117560 : if (p != NULL) {
1193 117384 : if (BATcount(p) != BATcount(n)) {
1194 0 : GDKerror("update BATs not the same size\n");
1195 0 : return GDK_FAIL;
1196 : }
1197 117384 : if (ATOMtype(p->ttype) != TYPE_oid) {
1198 0 : GDKerror("positions BAT not type OID\n");
1199 0 : return GDK_FAIL;
1200 : }
1201 117384 : if (BATtdense(p)) {
1202 108568 : pos = p->tseqbase;
1203 108568 : positions = &pos;
1204 108568 : autoincr = true;
1205 108568 : p = NULL;
1206 8816 : } else if (p->ttype != TYPE_void) {
1207 8814 : positions = (const oid *) Tloc(p, 0);
1208 8814 : autoincr = false;
1209 : } else {
1210 : autoincr = false;
1211 : }
1212 176 : } else if (autoincr) {
1213 176 : pos = *positions;
1214 : }
1215 117560 : if (BATcount(n) == 0) {
1216 : return GDK_SUCCEED;
1217 : }
1218 :
1219 25060 : BATiter ni = bat_iterator(n);
1220 :
1221 25060 : OIDXdestroy(b);
1222 25059 : STRMPdestroy(b);
1223 25060 : RTREEdestroy(b);
1224 : /* load hash so that we can maintain it */
1225 25060 : (void) BATcheckhash(b);
1226 :
1227 25060 : MT_lock_set(&b->theaplock);
1228 25060 : if (!force && (b->batRestricted != BAT_WRITE ||
1229 17 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1230 0 : MT_lock_unset(&b->theaplock);
1231 0 : bat_iterator_end(&ni);
1232 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1233 0 : return GDK_FAIL;
1234 : }
1235 25060 : BATiter bi = bat_iterator_nolock(b);
1236 25060 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1237 24497 : b->tunique_est = 0;
1238 : }
1239 :
1240 25060 : b->tsorted = b->trevsorted = false;
1241 25060 : b->tnosorted = b->tnorevsorted = 0;
1242 25060 : b->tseqbase = oid_nil;
1243 25060 : b->tkey = false;
1244 25060 : b->tnokey[0] = b->tnokey[1] = 0;
1245 :
1246 25060 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1247 25060 : const void *nil = ATOMnilptr(b->ttype);
1248 25060 : oid hseqend = b->hseqbase + BATcount(b);
1249 :
1250 25060 : MT_lock_unset(&b->theaplock);
1251 :
1252 25558 : bool anynil = false;
1253 25558 : bool locked = false;
1254 :
1255 25558 : if (b->tvheap) {
1256 1067718 : for (BUN i = 0; i < ni.count; i++) {
1257 1065458 : oid updid;
1258 1065458 : if (positions) {
1259 1064445 : updid = autoincr ? pos++ : *positions++;
1260 : } else {
1261 1013 : updid = BUNtoid(p, i);
1262 : }
1263 :
1264 1065458 : if (updid < b->hseqbase ||
1265 1065458 : (!mayappend && updid >= hseqend)) {
1266 0 : GDKerror("id out of range\n");
1267 0 : goto bailout;
1268 : }
1269 1065458 : updid -= b->hseqbase;
1270 1065458 : if (!force && updid < b->batInserted) {
1271 0 : GDKerror("updating committed value\n");
1272 0 : goto bailout;
1273 : }
1274 :
1275 1065458 : const void *new = BUNtvar(ni, i);
1276 :
1277 1065458 : if (updid >= BATcount(b)) {
1278 23534 : assert(mayappend);
1279 23534 : if (locked) {
1280 4 : MT_rwlock_wrunlock(&b->thashlock);
1281 4 : locked = false;
1282 : }
1283 23534 : if (b->tminpos != bi.minpos ||
1284 23533 : b->tmaxpos != bi.maxpos) {
1285 1 : MT_lock_set(&b->theaplock);
1286 1 : b->tminpos = bi.minpos;
1287 1 : b->tmaxpos = bi.maxpos;
1288 1 : MT_lock_unset(&b->theaplock);
1289 : }
1290 23534 : if (BATcount(b) < updid &&
1291 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1292 0 : bat_iterator_end(&ni);
1293 0 : return GDK_FAIL;
1294 : }
1295 23534 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1296 0 : bat_iterator_end(&ni);
1297 0 : return GDK_FAIL;
1298 : }
1299 23534 : bi = bat_iterator_nolock(b);
1300 41031 : continue;
1301 : }
1302 :
1303 : /* it is possible that a previous run was killed
1304 : * after an update (with a mmapped tail file)
1305 : * but before that was committed, then the
1306 : * offset may point outside of the vheap */
1307 1041924 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1308 :
1309 1041416 : if (old && atomcmp(old, new) == 0) {
1310 : /* replacing with the same value:
1311 : * nothing to do */
1312 17497 : continue;
1313 : }
1314 :
1315 1024414 : bool isnil = atomcmp(new, nil) == 0;
1316 1023911 : anynil |= isnil;
1317 1023911 : MT_lock_set(&b->theaplock);
1318 1023918 : if (old == NULL ||
1319 1023918 : (b->tnil &&
1320 543 : !anynil &&
1321 543 : atomcmp(old, nil) == 0)) {
1322 : /* if old value is nil and no new
1323 : * value is, we're not sure anymore
1324 : * about the nil property, so we must
1325 : * clear it */
1326 541 : b->tnil = false;
1327 : }
1328 1023918 : b->tnonil &= !isnil;
1329 1023918 : b->tnil |= isnil;
1330 1023918 : MT_lock_unset(&b->theaplock);
1331 1023925 : if (bi.maxpos != BUN_NONE) {
1332 218 : if (!isnil &&
1333 109 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1334 : /* new value is larger than
1335 : * previous largest */
1336 22 : bi.maxpos = updid;
1337 174 : } else if (old == NULL ||
1338 100 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1339 13 : atomcmp(new, old) != 0)) {
1340 : /* old value is equal to
1341 : * largest and new value is
1342 : * smaller, so we don't know
1343 : * anymore which is the
1344 : * largest */
1345 13 : bi.maxpos = BUN_NONE;
1346 : }
1347 : }
1348 1023925 : if (bi.minpos != BUN_NONE) {
1349 212 : if (!isnil &&
1350 106 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1351 : /* new value is smaller than
1352 : * previous smallest */
1353 16 : bi.minpos = updid;
1354 180 : } else if (old == NULL ||
1355 112 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1356 22 : atomcmp(new, old) != 0)) {
1357 : /* old value is equal to
1358 : * smallest and new value is
1359 : * larger, so we don't know
1360 : * anymore which is the
1361 : * smallest */
1362 22 : bi.minpos = BUN_NONE;
1363 : }
1364 : }
1365 1023925 : if (!locked) {
1366 1941 : MT_rwlock_wrlock(&b->thashlock);
1367 1941 : locked = true;
1368 : }
1369 1023925 : if (old)
1370 1023925 : HASHdelete_locked(&bi, updid, old);
1371 0 : else if (b->thash) {
1372 0 : doHASHdestroy(b, b->thash);
1373 0 : b->thash = NULL;
1374 : }
1375 :
1376 1023922 : var_t d;
1377 1023922 : switch (b->twidth) {
1378 1020473 : case 1:
1379 1020473 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1380 1020473 : break;
1381 3413 : case 2:
1382 3413 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1383 3413 : break;
1384 1 : case 4:
1385 1 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1386 1 : break;
1387 : #if SIZEOF_VAR_T == 8
1388 35 : case 8:
1389 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1390 35 : break;
1391 : #endif
1392 : default:
1393 0 : MT_UNREACHABLE();
1394 : }
1395 1023922 : MT_lock_set(&b->theaplock);
1396 1023912 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1397 1023912 : MT_lock_unset(&b->theaplock);
1398 1023929 : if (rc != GDK_SUCCEED) {
1399 0 : goto bailout;
1400 : }
1401 1023929 : if (b->twidth < SIZEOF_VAR_T &&
1402 1023884 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1403 : /* doesn't fit in current heap, upgrade it */
1404 16 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1405 0 : goto bailout;
1406 : }
1407 : }
1408 : /* in case ATOMreplaceVAR and/or
1409 : * GDKupgradevarheap replaces a heap, we need to
1410 : * reinitialize the iterator */
1411 : {
1412 : /* save and restore minpos/maxpos */
1413 1023929 : BUN minpos = bi.minpos;
1414 1023929 : BUN maxpos = bi.maxpos;
1415 1023929 : bi = bat_iterator_nolock(b);
1416 1023929 : bi.minpos = minpos;
1417 1023929 : bi.maxpos = maxpos;
1418 : }
1419 1023929 : switch (b->twidth) {
1420 1020464 : case 1:
1421 1020464 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1422 1020464 : break;
1423 3429 : case 2:
1424 3429 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1425 3429 : break;
1426 1 : case 4:
1427 1 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1428 1 : break;
1429 : #if SIZEOF_VAR_T == 8
1430 35 : case 8:
1431 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1432 35 : break;
1433 : #endif
1434 : default:
1435 0 : MT_UNREACHABLE();
1436 : }
1437 1023929 : HASHinsert_locked(&bi, updid, new);
1438 :
1439 : }
1440 2260 : if (locked) {
1441 1937 : if (b->thash)
1442 2 : nunique = b->thash->nunique;
1443 1937 : MT_rwlock_wrunlock(&b->thashlock);
1444 1937 : locked = false;
1445 : }
1446 2260 : MT_lock_set(&b->theaplock);
1447 2259 : b->tvheap->dirty = true;
1448 2259 : MT_lock_unset(&b->theaplock);
1449 22798 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1450 0 : assert(b->thash == NULL);
1451 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1452 0 : for (BUN i = 0; i < ni.count; i++) {
1453 0 : oid updid;
1454 0 : if (positions) {
1455 0 : updid = autoincr ? pos++ : *positions++;
1456 : } else {
1457 0 : updid = BUNtoid(p, i);
1458 : }
1459 :
1460 0 : if (updid < b->hseqbase ||
1461 0 : (!mayappend && updid >= hseqend)) {
1462 0 : GDKerror("id out of range\n");
1463 0 : bat_iterator_end(&ni);
1464 0 : return GDK_FAIL;
1465 : }
1466 0 : updid -= b->hseqbase;
1467 0 : if (!force && updid < b->batInserted) {
1468 0 : GDKerror("updating committed value\n");
1469 0 : bat_iterator_end(&ni);
1470 0 : return GDK_FAIL;
1471 : }
1472 0 : if (updid >= BATcount(b)) {
1473 0 : assert(mayappend);
1474 0 : if (BATcount(b) < updid &&
1475 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1476 0 : bat_iterator_end(&ni);
1477 0 : return GDK_FAIL;
1478 : }
1479 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1480 0 : bat_iterator_end(&ni);
1481 0 : return GDK_FAIL;
1482 : }
1483 0 : continue;
1484 : }
1485 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1486 : }
1487 0 : bi = bat_iterator_nolock(b);
1488 22798 : } else if (autoincr) {
1489 14120 : if (pos < b->hseqbase ||
1490 13298 : (!mayappend && pos + ni.count > hseqend)) {
1491 0 : GDKerror("id out of range\n");
1492 0 : bat_iterator_end(&ni);
1493 0 : return GDK_FAIL;
1494 : }
1495 14120 : pos -= b->hseqbase;
1496 14120 : if (!force && pos < b->batInserted) {
1497 0 : GDKerror("updating committed value\n");
1498 0 : bat_iterator_end(&ni);
1499 0 : return GDK_FAIL;
1500 : }
1501 :
1502 14120 : if (pos >= BATcount(b)) {
1503 444 : assert(mayappend);
1504 444 : bat_iterator_end(&ni);
1505 444 : if (BATcount(b) < pos &&
1506 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1507 : return GDK_FAIL;
1508 : }
1509 444 : return BATappend(b, n, NULL, force);
1510 : }
1511 13682 : if (pos + ni.count > BATcount(b) &&
1512 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1513 0 : bat_iterator_end(&ni);
1514 0 : return GDK_FAIL;
1515 : }
1516 13676 : bi = bat_iterator_nolock(b);
1517 :
1518 : /* we copy all of n, so if there are nils in n we get
1519 : * nils in b (and else we don't know) */
1520 13676 : b->tnil = ni.nil;
1521 : /* we may not copy over all of b, so we only know that
1522 : * there are no nils in b afterward if there weren't
1523 : * any in either b or n to begin with */
1524 13676 : b->tnonil &= ni.nonil;
1525 : /* if there is no hash, we don't start the loop, if
1526 : * there is only a persisted hash, it will get destroyed
1527 : * in the first iteration, after which there is no hash
1528 : * and the loop ends */
1529 13676 : MT_rwlock_wrlock(&b->thashlock);
1530 13676 : locked = true;
1531 13676 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1532 1 : HASHdelete_locked(&bi, i, Tloc(b, i));
1533 13675 : if (ni.type == TYPE_void) {
1534 0 : assert(b->ttype == TYPE_oid);
1535 0 : oid *o = Tloc(b, pos);
1536 0 : if (is_oid_nil(ni.tseq)) {
1537 : /* we may or may not overwrite the old
1538 : * min/max values */
1539 0 : bi.minpos = BUN_NONE;
1540 0 : bi.maxpos = BUN_NONE;
1541 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1542 0 : o[i] = oid_nil;
1543 0 : b->tnil = true;
1544 : } else {
1545 0 : oid v = ni.tseq;
1546 : /* we know min/max of n, so we know
1547 : * the new min/max of b if those of n
1548 : * are smaller/larger than the old */
1549 0 : if (bi.minpos != BUN_NONE) {
1550 0 : if (v <= BUNtoid(b, bi.minpos))
1551 0 : bi.minpos = pos;
1552 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1553 0 : bi.minpos = BUN_NONE;
1554 : }
1555 0 : if (complex_cand(n)) {
1556 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1557 0 : o[i] = *(oid *)Tpos(&ni, i);
1558 : /* last value */
1559 0 : v = o[ni.count - 1];
1560 : } else {
1561 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1562 0 : o[i] = v++;
1563 : /* last value added (not one beyond) */
1564 0 : v--;
1565 : }
1566 0 : if (bi.maxpos != BUN_NONE) {
1567 0 : if (v >= BUNtoid(b, bi.maxpos))
1568 0 : bi.maxpos = pos + ni.count - 1;
1569 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1570 0 : bi.maxpos = BUN_NONE;
1571 : }
1572 : }
1573 : } else {
1574 : /* if the extremes of n are at least as
1575 : * extreme as those of b, we can replace b's
1576 : * min/max, else we don't know what b's new
1577 : * min/max are*/
1578 13944 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1579 269 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1580 159 : bi.minpos = pos + ni.minpos;
1581 : } else {
1582 13516 : bi.minpos = BUN_NONE;
1583 : }
1584 13972 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1585 297 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1586 216 : bi.maxpos = pos + ni.maxpos;
1587 : } else {
1588 13459 : bi.maxpos = BUN_NONE;
1589 : }
1590 13675 : memcpy(Tloc(b, pos), ni.base,
1591 13675 : ni.count << b->tshift);
1592 : }
1593 : /* either we have a hash that was updated above, or we
1594 : * have no hash; we cannot have the case where there is
1595 : * only a persisted (unloaded) hash since it would have
1596 : * been destroyed above */
1597 13675 : if (b->thash != NULL) {
1598 1 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1599 1 : HASHinsert_locked(&bi, i, Tloc(b, i));
1600 0 : if (b->thash)
1601 0 : nunique = b->thash->nunique;
1602 : }
1603 13674 : MT_rwlock_wrunlock(&b->thashlock);
1604 13675 : locked = false;
1605 13675 : if (ni.count == BATcount(b)) {
1606 : /* if we replaced all values of b by values
1607 : * from n, we can also copy the min/max
1608 : * properties */
1609 6113 : bi.minpos = ni.minpos;
1610 6113 : bi.maxpos = ni.maxpos;
1611 6113 : if (BATtdensebi(&ni)) {
1612 : /* replaced all of b with a dense sequence */
1613 47 : MT_lock_set(&b->theaplock);
1614 47 : BATtseqbase(b, ni.tseq);
1615 47 : MT_lock_unset(&b->theaplock);
1616 : }
1617 : }
1618 : } else {
1619 157611277 : for (BUN i = 0; i < ni.count; i++) {
1620 157602597 : oid updid;
1621 157602597 : if (positions) {
1622 : /* assert(!autoincr) */
1623 157602597 : updid = *positions++;
1624 : } else {
1625 0 : updid = BUNtoid(p, i);
1626 : }
1627 :
1628 157602597 : if (updid < b->hseqbase ||
1629 157602597 : (!mayappend && updid >= hseqend)) {
1630 0 : GDKerror("id out of range\n");
1631 0 : goto bailout;
1632 : }
1633 157602597 : updid -= b->hseqbase;
1634 157602597 : if (!force && updid < b->batInserted) {
1635 0 : GDKerror("updating committed value\n");
1636 0 : goto bailout;
1637 : }
1638 :
1639 157602597 : const void *new = BUNtloc(ni, i);
1640 :
1641 157602597 : if (updid >= BATcount(b)) {
1642 16059 : assert(mayappend);
1643 16059 : if (locked) {
1644 10 : MT_rwlock_wrunlock(&b->thashlock);
1645 10 : locked = false;
1646 : }
1647 16059 : if (b->tminpos != bi.minpos ||
1648 16057 : b->tmaxpos != bi.maxpos) {
1649 3 : MT_lock_set(&b->theaplock);
1650 3 : b->tminpos = bi.minpos;
1651 3 : b->tmaxpos = bi.maxpos;
1652 3 : MT_lock_unset(&b->theaplock);
1653 : }
1654 16059 : if (BATcount(b) < updid &&
1655 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1656 0 : goto bailout;
1657 : }
1658 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1659 0 : bat_iterator_end(&ni);
1660 0 : return GDK_FAIL;
1661 : }
1662 16059 : bi = bat_iterator_nolock(b);
1663 16059 : continue;
1664 : }
1665 :
1666 157586538 : const void *old = BUNtloc(bi, updid);
1667 157586538 : bool isnil = atomcmp(new, nil) == 0;
1668 157714482 : anynil |= isnil;
1669 157714482 : if (b->tnil &&
1670 1915 : !anynil &&
1671 1915 : atomcmp(old, nil) == 0) {
1672 : /* if old value is nil and no new
1673 : * value is, we're not sure anymore
1674 : * about the nil property, so we must
1675 : * clear it */
1676 1911 : b->tnil = false;
1677 : }
1678 157714482 : b->tnonil &= !isnil;
1679 157714482 : b->tnil |= isnil;
1680 157714482 : if (bi.maxpos != BUN_NONE) {
1681 3664 : if (!isnil &&
1682 1830 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1683 : /* new value is larger than
1684 : * previous largest */
1685 137 : bi.maxpos = updid;
1686 1704 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1687 7 : atomcmp(new, old) != 0) {
1688 : /* old value is equal to
1689 : * largest and new value is
1690 : * smaller, so we don't know
1691 : * anymore which is the
1692 : * largest */
1693 7 : bi.maxpos = BUN_NONE;
1694 : }
1695 : }
1696 157714482 : if (bi.minpos != BUN_NONE) {
1697 3664 : if (!isnil &&
1698 1830 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1699 : /* new value is smaller than
1700 : * previous smallest */
1701 6 : bi.minpos = updid;
1702 1834 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1703 6 : atomcmp(new, old) != 0) {
1704 : /* old value is equal to
1705 : * smallest and new value is
1706 : * larger, so we don't know
1707 : * anymore which is the
1708 : * smallest */
1709 6 : bi.minpos = BUN_NONE;
1710 : }
1711 : }
1712 :
1713 157714482 : if (!locked) {
1714 8679 : MT_rwlock_wrlock(&b->thashlock);
1715 8679 : locked = true;
1716 : }
1717 157714482 : HASHdelete_locked(&bi, updid, old);
1718 157897273 : switch (b->twidth) {
1719 30165726 : case 1:
1720 30165726 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1721 30165726 : break;
1722 527203 : case 2:
1723 527203 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1724 527203 : break;
1725 21785262 : case 4:
1726 21785262 : ((int *) b->theap->base)[updid] = * (int *) new;
1727 21785262 : break;
1728 100023753 : case 8:
1729 100023753 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1730 100023753 : break;
1731 5395329 : case 16:
1732 : #ifdef HAVE_HGE
1733 5395329 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1734 : #else
1735 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1736 : #endif
1737 5395329 : break;
1738 0 : default:
1739 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1740 0 : break;
1741 : }
1742 157897273 : HASHinsert_locked(&bi, updid, new);
1743 : }
1744 8680 : if (locked) {
1745 8670 : if (b->thash)
1746 0 : nunique = b->thash->nunique;
1747 8670 : MT_rwlock_wrunlock(&b->thashlock);
1748 8670 : locked = false;
1749 : }
1750 : }
1751 24615 : bat_iterator_end(&ni);
1752 24614 : MT_lock_set(&b->theaplock);
1753 24615 : if (nunique != 0)
1754 2 : b->tunique_est = (double) nunique;
1755 24615 : b->tminpos = bi.minpos;
1756 24615 : b->tmaxpos = bi.maxpos;
1757 24615 : b->theap->dirty = true;
1758 24615 : MT_lock_unset(&b->theaplock);
1759 24616 : TRC_DEBUG(ALGO,
1760 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1761 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1762 : GDKusec() - t0);
1763 : return GDK_SUCCEED;
1764 :
1765 0 : bailout:
1766 0 : bat_iterator_end(&ni);
1767 0 : if (locked) {
1768 0 : Hash *h = b->thash;
1769 0 : b->thash = NULL;
1770 0 : MT_rwlock_wrunlock(&b->thashlock);
1771 0 : doHASHdestroy(b, h);
1772 : }
1773 : return GDK_FAIL;
1774 : }
1775 :
1776 : /* replace values from b at locations specified in p with values in n */
1777 : gdk_return
1778 116466 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1779 : {
1780 116466 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1781 : }
1782 :
1783 : /* like BATreplace, but p may specify locations beyond the end of b */
1784 : gdk_return
1785 925 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1786 : {
1787 925 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1788 : }
1789 :
1790 : #if 0 /* not used */
1791 : /* like BATreplace, but the positions are given by an array of oid values */
1792 : gdk_return
1793 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1794 : {
1795 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1796 : }
1797 : #endif
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid
1800 : * values, and they may specify locations beyond the end of b */
1801 : gdk_return
1802 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1803 : {
1804 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1805 : }
1806 :
1807 : /*
1808 : * BAT Selections
1809 : * The BAT selectors are among the most heavily used operators.
1810 : * Their efficient implementation is therefore mandatory.
1811 : *
1812 : * BAT slice
1813 : * This function returns a horizontal slice from a BAT. It optimizes
1814 : * execution by avoiding to copy when the BAT is memory mapped (in
1815 : * this case, an independent submap is created) or else when it is
1816 : * read-only, then a VIEW bat is created as a result.
1817 : *
1818 : * If a new copy has to be created, this function takes care to
1819 : * preserve void-columns (in this case, the seqbase has to be
1820 : * recomputed in the result).
1821 : *
1822 : * The selected range is excluding the high value.
1823 : */
1824 : BAT *
1825 9163261 : BATslice(BAT *b, BUN l, BUN h)
1826 : {
1827 9163261 : BUN low = l;
1828 9163261 : BAT *bn = NULL;
1829 :
1830 9163261 : BATcheck(b, NULL);
1831 9163261 : BATiter bi = bat_iterator(b);
1832 9163597 : if (l > bi.count)
1833 : l = bi.count;
1834 9163597 : if (h > bi.count)
1835 : h = bi.count;
1836 9163597 : if (h < l)
1837 : h = l;
1838 :
1839 9163597 : if (complex_cand(b)) {
1840 : /* slicing a candidate list with exceptions */
1841 78 : struct canditer ci;
1842 78 : canditer_init(&ci, NULL, b);
1843 78 : if (b->hseqbase + l >= ci.hseq) {
1844 78 : l = b->hseqbase + l - ci.hseq;
1845 78 : h = b->hseqbase + h - ci.hseq;
1846 : } else {
1847 0 : l = 0;
1848 0 : if (b->hseqbase + h >= ci.hseq)
1849 0 : h = b->hseqbase + h - ci.hseq;
1850 : else
1851 : h = 0;
1852 : }
1853 78 : bn = canditer_slice(&ci, l, h);
1854 78 : goto doreturn;
1855 : }
1856 : /* If the source BAT is readonly, then we can obtain a VIEW
1857 : * that just reuses the memory of the source. */
1858 9163519 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1859 : /* forget about slices for bit masks: we can't deal
1860 : * with difference in alignment, so we'll just make a
1861 : * copy */
1862 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1863 : /* we use BATappend with a candidate list to easily
1864 : * copy the part of b that we need */
1865 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1866 0 : if (bn == NULL ||
1867 0 : s == NULL ||
1868 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1869 0 : BBPreclaim(bn);
1870 0 : BBPreclaim(s);
1871 0 : bn = NULL;
1872 0 : goto doreturn;
1873 : }
1874 0 : BBPunfix(s->batCacheid);
1875 0 : goto doreturn;
1876 : }
1877 9163519 : restrict_t prestricted;
1878 10203712 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1879 1040381 : BAT *pb = BBP_desc(VIEWtparent(b));
1880 1040381 : MT_lock_set(&pb->theaplock);
1881 1039933 : prestricted = pb->batRestricted;
1882 1039933 : MT_lock_unset(&pb->theaplock);
1883 : } else {
1884 : prestricted = BAT_WRITE; /* just initialize with anything */
1885 : }
1886 9163331 : if (bi.restricted == BAT_READ &&
1887 9126156 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1888 9126158 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1889 9126158 : if (bn == NULL)
1890 : goto doreturn;
1891 : } else {
1892 : /* create a new BAT and put everything into it */
1893 37173 : BUN p = l;
1894 37173 : BUN q = h;
1895 :
1896 37173 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1897 37169 : if (bn == NULL)
1898 0 : goto doreturn;
1899 :
1900 37169 : if (bn->ttype == TYPE_void) {
1901 23707 : BATsetcount(bn, h - l);
1902 23707 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1903 13462 : } else if (bn->tvheap == NULL) {
1904 9033 : assert(BATatoms[bn->ttype].atomPut == NULL);
1905 9033 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1906 9033 : (q - p) << bn->tshift);
1907 9033 : bn->theap->dirty = true;
1908 9033 : BATsetcount(bn, h - l);
1909 : } else {
1910 1208491 : for (; p < q; p++) {
1911 1204058 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1912 0 : BBPreclaim(bn);
1913 0 : bn = NULL;
1914 0 : goto doreturn;
1915 : }
1916 : }
1917 : }
1918 37174 : bn->theap->dirty = true;
1919 37174 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1920 37174 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1921 37174 : bn->tkey = bi.key || bn->batCount <= 1;
1922 37174 : bn->tnonil = bi.nonil;
1923 37174 : bn->tnil = false; /* we don't know */
1924 37174 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1925 2271 : bn->tnosorted = bi.nosorted - l;
1926 : else
1927 34903 : bn->tnosorted = 0;
1928 37174 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1929 4516 : bn->tnorevsorted = bi.norevsorted - l;
1930 : else
1931 32658 : bn->tnorevsorted = 0;
1932 37174 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1933 31322 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1934 252 : bi.nokey[0] != bi.nokey[1] &&
1935 : !bn->tkey) {
1936 252 : bn->tnokey[0] = bi.nokey[0] - l;
1937 252 : bn->tnokey[1] = bi.nokey[1] - l;
1938 : } else {
1939 36922 : bn->tnokey[0] = bn->tnokey[1] = 0;
1940 : }
1941 : }
1942 9163163 : doreturn:
1943 9163163 : bat_iterator_end(&bi);
1944 9163284 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1945 : ALGOOPTBATFMT "\n",
1946 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1947 : return bn;
1948 : }
1949 :
1950 : #define BAT_ORDERED(TPE) \
1951 : do { \
1952 : const TPE *restrict vals = Tloc(b, 0); \
1953 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1954 : if (vals[p - 1] > vals[p]) { \
1955 : b->tnosorted = p; \
1956 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1957 : goto doreturn; \
1958 : } else if (vals[p - 1] < vals[p]) { \
1959 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1960 : b->tnorevsorted = p; \
1961 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1962 : } \
1963 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1964 : b->tnokey[0] = p - 1; \
1965 : b->tnokey[1] = p; \
1966 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1967 : } \
1968 : } \
1969 : } while (0)
1970 :
1971 : #define BAT_ORDERED_FP(TPE) \
1972 : do { \
1973 : const TPE *restrict vals = Tloc(b, 0); \
1974 : TPE prev = vals[0]; \
1975 : bool prevnil = is_##TPE##_nil(prev); \
1976 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1977 : TPE next = vals[p]; \
1978 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1979 : prev = next; \
1980 : if (cmp > 0) { \
1981 : b->tnosorted = bi.nosorted = p; \
1982 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1983 : goto doreturn; \
1984 : } else if (cmp < 0) { \
1985 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1986 : b->tnorevsorted = bi.norevsorted = p; \
1987 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1988 : } \
1989 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1990 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1991 : b->tnokey[1] = bi.nokey[1] = p; \
1992 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1993 : } \
1994 : } \
1995 : } while (0)
1996 :
1997 : /* Return whether the BAT is ordered or not. If we don't know, invest
1998 : * in a scan and record the results in the bat descriptor. If during
1999 : * the scan we happen to find evidence that the BAT is not reverse
2000 : * sorted, we record the location. */
2001 : bool
2002 2876686 : BATordered(BAT *b)
2003 : {
2004 2876686 : lng t0 = GDKusec();
2005 2876686 : bool sorted;
2006 :
2007 2876686 : MT_lock_set(&b->theaplock);
2008 2876658 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2009 447970 : MT_lock_unset(&b->theaplock);
2010 447959 : return true;
2011 : }
2012 2428688 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2013 2035385 : MT_lock_unset(&b->theaplock);
2014 2035370 : return false;
2015 : }
2016 :
2017 : /* There are a few reasons why we need a lock here. It may be
2018 : * that multiple threads call this functions at the same time
2019 : * (happens a lot with mitosis/mergetable), but we only need to
2020 : * scan the bat in one thread: the others can reap the rewards
2021 : * when that one thread is done. Also, we need the heap to
2022 : * remain accessible (could have used bat_iterator for that),
2023 : * and, and this is the killer argument, we may need to make
2024 : * changes to the bat descriptor. */
2025 393303 : BATiter bi = bat_iterator_nolock(b);
2026 393303 : if (!b->tsorted && b->tnosorted == 0) {
2027 722443 : switch (ATOMbasetype(b->ttype)) {
2028 61885 : case TYPE_bte:
2029 127348635 : BAT_ORDERED(bte);
2030 : break;
2031 6802 : case TYPE_sht:
2032 1569215 : BAT_ORDERED(sht);
2033 : break;
2034 284889 : case TYPE_int:
2035 118483952 : BAT_ORDERED(int);
2036 : break;
2037 6450 : case TYPE_lng:
2038 62007423 : BAT_ORDERED(lng);
2039 : break;
2040 : #ifdef HAVE_HGE
2041 376 : case TYPE_hge:
2042 8002679 : BAT_ORDERED(hge);
2043 : break;
2044 : #endif
2045 963 : case TYPE_flt:
2046 8007194 : BAT_ORDERED_FP(flt);
2047 : break;
2048 736 : case TYPE_dbl:
2049 8228106 : BAT_ORDERED_FP(dbl);
2050 : break;
2051 : case TYPE_str:
2052 21099580 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2053 21085845 : int c;
2054 21085845 : const char *p1 = BUNtvar(bi, p - 1);
2055 21085845 : const char *p2 = BUNtvar(bi, p);
2056 21085845 : if (p1 == p2)
2057 : c = 0;
2058 2199322 : else if (p1[0] == '\200') {
2059 1404 : if (p2[0] == '\200')
2060 : c = 0;
2061 : else
2062 : c = -1;
2063 2197918 : } else if (p2[0] == '\200')
2064 : c = 1;
2065 : else
2066 2197046 : c = strcmp(p1, p2);
2067 2197046 : if (c > 0) {
2068 17286 : b->tnosorted = bi.nosorted = p;
2069 17286 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2070 17286 : goto doreturn;
2071 21068559 : } else if (c < 0) {
2072 2181982 : assert(!b->trevsorted);
2073 2181982 : if (b->tnorevsorted == 0) {
2074 6825 : b->tnorevsorted = bi.norevsorted = p;
2075 6825 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2076 : }
2077 18886577 : } else if (b->tnokey[1] == 0) {
2078 2303 : assert(!b->tkey);
2079 2303 : b->tnokey[0] = bi.nokey[0] = p - 1;
2080 2303 : b->tnokey[1] = bi.nokey[1] = p;
2081 21068559 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2082 : }
2083 : }
2084 : break;
2085 178 : default: {
2086 178 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2087 2458 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 2421 : int c;
2089 2421 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2090 141 : b->tnosorted = bi.nosorted = p;
2091 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2092 141 : goto doreturn;
2093 2280 : } else if (c < 0) {
2094 2230 : if (!b->trevsorted && b->tnorevsorted == 0) {
2095 86 : b->tnorevsorted = bi.norevsorted = p;
2096 86 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2097 : }
2098 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2099 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2100 8 : b->tnokey[1] = bi.nokey[1] = p;
2101 2280 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2102 : }
2103 : }
2104 : break;
2105 : }
2106 : }
2107 : /* we only get here if we completed the scan; note that
2108 : * if we didn't record evidence about *reverse*
2109 : * sortedness, we know that the BAT is also reverse
2110 : * sorted; similarly, if we didn't record evidence about
2111 : * keyness, we know the BAT is key */
2112 106333 : b->tsorted = bi.sorted = true;
2113 106333 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2114 106323 : if (!b->trevsorted && b->tnorevsorted == 0) {
2115 44026 : b->trevsorted = bi.revsorted = true;
2116 44026 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2117 : }
2118 106323 : if (!b->tkey && b->tnokey[1] == 0) {
2119 42927 : b->tkey = bi.key = true;
2120 42927 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 : }
2123 106323 : doreturn:
2124 393290 : sorted = b->tsorted;
2125 393290 : bat pbid = VIEWtparent(b);
2126 393290 : MT_lock_unset(&b->theaplock);
2127 393304 : if (pbid) {
2128 224408 : BAT *pb = BBP_desc(pbid);
2129 224408 : MT_lock_set(&pb->theaplock);
2130 224407 : if (bi.count == BATcount(pb) &&
2131 192173 : bi.h == pb->theap &&
2132 192173 : bi.type == pb->ttype) {
2133 : /* add to knowledge in parent bat */
2134 192166 : pb->tsorted |= bi.sorted;
2135 192166 : if (pb->tnosorted == 0)
2136 192166 : pb->tnosorted = bi.nosorted;
2137 192166 : pb->trevsorted |= bi.revsorted;
2138 192166 : if (pb->tnorevsorted == 0)
2139 24025 : pb->tnorevsorted = bi.norevsorted;
2140 192166 : pb->tkey |= bi.key;
2141 192166 : if (pb->tnokey[1] == 0) {
2142 169432 : pb->tnokey[0] = bi.nokey[0];
2143 169432 : pb->tnokey[1] = bi.nokey[1];
2144 : }
2145 : }
2146 224407 : MT_lock_unset(&pb->theaplock);
2147 : }
2148 : return sorted;
2149 : }
2150 :
2151 : #define BAT_REVORDERED(TPE) \
2152 : do { \
2153 : const TPE *restrict vals = Tloc(b, 0); \
2154 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2155 : if (vals[p - 1] < vals[p]) { \
2156 : b->tnorevsorted = p; \
2157 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2158 : goto doreturn; \
2159 : } \
2160 : } \
2161 : } while (0)
2162 :
2163 : #define BAT_REVORDERED_FP(TPE) \
2164 : do { \
2165 : const TPE *restrict vals = Tloc(b, 0); \
2166 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2167 : TPE prev = vals[p - 1], next = vals[p]; \
2168 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2169 : if (cmp < 0) { \
2170 : b->tnorevsorted = bi.norevsorted = p; \
2171 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2172 : goto doreturn; \
2173 : } \
2174 : } \
2175 : } while (0)
2176 :
2177 : /* Return whether the BAT is reverse ordered or not. If we don't
2178 : * know, invest in a scan and record the results in the bat
2179 : * descriptor. */
2180 : bool
2181 2668106 : BATordered_rev(BAT *b)
2182 : {
2183 2668106 : lng t0 = GDKusec();
2184 2668127 : bool revsorted;
2185 :
2186 2668127 : if (b == NULL || !ATOMlinear(b->ttype))
2187 : return false;
2188 2668105 : MT_lock_set(&b->theaplock);
2189 2668114 : if (BATcount(b) <= 1 || b->trevsorted) {
2190 198306 : MT_lock_unset(&b->theaplock);
2191 198304 : return true;
2192 : }
2193 2469808 : if (b->ttype == TYPE_void) {
2194 16031 : MT_lock_unset(&b->theaplock);
2195 16031 : return is_oid_nil(b->tseqbase);
2196 : }
2197 2453777 : if (BATtdense(b) || b->tnorevsorted > 0) {
2198 2381973 : MT_lock_unset(&b->theaplock);
2199 2381947 : return false;
2200 : }
2201 71804 : BATiter bi = bat_iterator_nolock(b);
2202 71804 : if (!b->trevsorted && b->tnorevsorted == 0) {
2203 110088 : switch (ATOMbasetype(b->ttype)) {
2204 32084 : case TYPE_bte:
2205 10030677 : BAT_REVORDERED(bte);
2206 : break;
2207 3210 : case TYPE_sht:
2208 3129506 : BAT_REVORDERED(sht);
2209 : break;
2210 21142 : case TYPE_int:
2211 1473022 : BAT_REVORDERED(int);
2212 : break;
2213 3725 : case TYPE_lng:
2214 2743358 : BAT_REVORDERED(lng);
2215 : break;
2216 : #ifdef HAVE_HGE
2217 135 : case TYPE_hge:
2218 1048 : BAT_REVORDERED(hge);
2219 : break;
2220 : #endif
2221 512 : case TYPE_flt:
2222 1994 : BAT_REVORDERED_FP(flt);
2223 : break;
2224 282 : case TYPE_dbl:
2225 500614 : BAT_REVORDERED_FP(dbl);
2226 : break;
2227 10714 : default: {
2228 10714 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2229 539765 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2230 536617 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2231 7565 : b->tnorevsorted = p;
2232 7565 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2233 7565 : goto doreturn;
2234 : }
2235 : }
2236 : break;
2237 : }
2238 : }
2239 18462 : b->trevsorted = bi.revsorted = true;
2240 18462 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2241 : }
2242 18462 : doreturn:
2243 71803 : revsorted = b->trevsorted;
2244 71803 : bat pbid = VIEWtparent(b);
2245 71803 : MT_lock_unset(&b->theaplock);
2246 71804 : if (pbid) {
2247 15648 : BAT *pb = BBP_desc(pbid);
2248 15648 : MT_lock_set(&pb->theaplock);
2249 15648 : if (bi.count == BATcount(pb) &&
2250 3272 : bi.h == pb->theap &&
2251 3272 : bi.type == pb->ttype) {
2252 : /* add to knowledge in parent bat */
2253 3272 : pb->trevsorted |= bi.revsorted;
2254 3272 : if (pb->tnorevsorted == 0)
2255 3272 : pb->tnorevsorted = bi.norevsorted;
2256 : }
2257 15648 : MT_lock_unset(&pb->theaplock);
2258 : }
2259 : return revsorted;
2260 : }
2261 :
2262 : /* figure out which sort function is to be called
2263 : * stable sort can produce an error (not enough memory available),
2264 : * "quick" sort does not produce errors */
2265 : static gdk_return
2266 3131813 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2267 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2268 : bool stable)
2269 : {
2270 3131813 : if (n <= 1) /* trivially sorted */
2271 : return GDK_SUCCEED;
2272 2701321 : switch (tpe) {
2273 2676050 : case TYPE_bte:
2274 : case TYPE_sht:
2275 : case TYPE_int:
2276 : case TYPE_lng:
2277 : #ifdef HAVE_HGE
2278 : case TYPE_hge:
2279 : #endif
2280 : case TYPE_date:
2281 : case TYPE_daytime:
2282 : case TYPE_timestamp:
2283 2676050 : assert(base == NULL);
2284 2676050 : if (nilslast == reverse && (stable || n > 100))
2285 20142 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2286 : break;
2287 4 : case TYPE_uuid:
2288 4 : assert(base == NULL);
2289 4 : if (nilslast == reverse && (stable || n > 100))
2290 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2291 : break;
2292 : default:
2293 : break;
2294 : }
2295 25360 : if (stable) {
2296 27 : if (reverse)
2297 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2298 : else
2299 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2300 : } else {
2301 2681151 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2302 : }
2303 2681151 : return GDK_SUCCEED;
2304 : }
2305 :
2306 : /* Sort the bat b according to both o and g. The stable and reverse
2307 : * parameters indicate whether the sort should be stable or descending
2308 : * respectively. The parameter b is required, o and g are optional
2309 : * (i.e., they may be NULL).
2310 : *
2311 : * A sorted copy is returned through the sorted parameter, the new
2312 : * ordering is returned through the order parameter, group information
2313 : * is returned through the groups parameter. All three output
2314 : * parameters may be NULL. If they're all NULL, this function does
2315 : * nothing.
2316 : *
2317 : * If o is specified, it is used to first rearrange b according to the
2318 : * order specified in o, after which b is sorted taking g into
2319 : * account.
2320 : *
2321 : * If g is specified, it indicates groups which should be individually
2322 : * ordered. Each row of consecutive equal values in g indicates a
2323 : * group which is sorted according to stable and reverse. g is used
2324 : * after the order in b was rearranged according to o.
2325 : *
2326 : * The outputs order and groups can be used in subsequent calls to
2327 : * this function. This can be used if multiple BATs need to be sorted
2328 : * together. The BATs should then be sorted in order of significance,
2329 : * and each following call should use the original unordered BAT plus
2330 : * the order and groups bat from the previous call. In this case, the
2331 : * sorted BATs are not of much use, so the sorted output parameter
2332 : * does not need to be specified.
2333 : * Apart from error checking and maintaining reference counts, sorting
2334 : * three columns (col1, col2, col3) could look like this with the
2335 : * sorted results in (col1s, col2s, col3s):
2336 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2337 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2338 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2339 : * Note that the "reverse" parameter can be different for each call.
2340 : */
2341 : gdk_return
2342 24152 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2343 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2344 : {
2345 24152 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2346 24152 : BATiter pbi;
2347 24152 : oid *restrict grps, *restrict ords, prev;
2348 24152 : BUN p, q, r;
2349 24152 : lng t0 = GDKusec();
2350 24152 : bool mkorderidx, orderidxlock = false;
2351 24152 : Heap *oidxh = NULL;
2352 :
2353 : /* we haven't implemented NILs as largest value for stable
2354 : * sort, so NILs come first for ascending and last for
2355 : * descending */
2356 24152 : assert(!stable || reverse == nilslast);
2357 :
2358 24152 : if (b == NULL) {
2359 0 : GDKerror("b must exist\n");
2360 0 : return GDK_FAIL;
2361 : }
2362 24152 : if (stable && reverse != nilslast) {
2363 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2364 0 : return GDK_FAIL;
2365 : }
2366 24152 : if (!ATOMlinear(b->ttype)) {
2367 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2368 0 : return GDK_FAIL;
2369 : }
2370 24152 : MT_lock_set(&b->theaplock);
2371 24149 : if (b->ttype == TYPE_void) {
2372 115 : b->tsorted = true;
2373 168 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2374 0 : b->trevsorted = !b->trevsorted;
2375 : }
2376 230 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2377 0 : b->tkey = !b->tkey;
2378 : }
2379 24034 : } else if (b->batCount <= 1) {
2380 6592 : if (!b->tsorted || !b->trevsorted) {
2381 23 : b->tsorted = b->trevsorted = true;
2382 : }
2383 : }
2384 24149 : MT_lock_unset(&b->theaplock);
2385 24152 : if (o != NULL &&
2386 12992 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2387 12992 : BATcount(o) != BATcount(b) || /* same size as b */
2388 4784 : (o->ttype == TYPE_void && /* no nil tail */
2389 2230 : BATcount(o) != 0 &&
2390 2230 : is_oid_nil(o->tseqbase)))) {
2391 0 : GDKerror("o must have type oid and same size as b\n");
2392 0 : return GDK_FAIL;
2393 : }
2394 24152 : if (g != NULL &&
2395 12992 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2396 12992 : !g->tsorted || /* sorted */
2397 12992 : BATcount(g) != BATcount(b) || /* same size as b */
2398 5063 : (g->ttype == TYPE_void && /* no nil tail */
2399 2509 : BATcount(g) != 0 &&
2400 2509 : is_oid_nil(g->tseqbase)))) {
2401 0 : GDKerror("g must have type oid, sorted on the tail, "
2402 : "and same size as b\n");
2403 0 : return GDK_FAIL;
2404 : }
2405 24152 : if (sorted == NULL && order == NULL) {
2406 : /* no place to put result, so we're done quickly */
2407 0 : GDKerror("no place to put the result.\n");
2408 0 : return GDK_FAIL;
2409 : }
2410 24152 : if (g == NULL && !stable) {
2411 : /* pre-ordering doesn't make sense if we're not
2412 : * subsorting and the sort is not stable */
2413 10878 : o = NULL;
2414 : }
2415 24152 : if (b->tnonil) {
2416 : /* if there are no nils, placement of nils doesn't
2417 : * matter, so set nilslast such that ordered bits can
2418 : * be used */
2419 17792 : nilslast = reverse;
2420 : }
2421 24152 : pbi = bat_iterator(NULL);
2422 25084 : if (BATcount(b) <= 1 ||
2423 17398 : (reverse == nilslast &&
2424 : (reverse ? b->trevsorted : b->tsorted) &&
2425 4715 : o == NULL && g == NULL &&
2426 2242 : (groups == NULL || BATtkey(b) ||
2427 : (reverse ? b->tsorted : b->trevsorted)))) {
2428 : /* trivially (sub)sorted, and either we don't need to
2429 : * return group information, or we can trivially
2430 : * deduce the groups */
2431 8322 : if (sorted) {
2432 7027 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2433 7027 : if (bn == NULL)
2434 0 : goto error;
2435 7027 : *sorted = bn;
2436 : }
2437 8322 : if (order) {
2438 7477 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2439 7479 : if (on == NULL)
2440 0 : goto error;
2441 7479 : *order = on;
2442 : }
2443 8324 : if (groups) {
2444 4371 : if (BATtkey(b)) {
2445 : /* singleton groups */
2446 4002 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2447 4002 : if (gn == NULL)
2448 0 : goto error;
2449 : } else {
2450 : /* single group */
2451 369 : const oid *o = 0;
2452 369 : assert(BATcount(b) == 1 ||
2453 : (b->tsorted && b->trevsorted));
2454 369 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2455 369 : if (gn == NULL)
2456 0 : goto error;
2457 : }
2458 4371 : *groups = gn;
2459 : }
2460 8324 : bat_iterator_end(&pbi);
2461 8324 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2462 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2463 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2464 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2465 : ALGOOPTBATFMT " -- trivial (" LLFMT
2466 : " usec)\n",
2467 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2468 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2469 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2470 : ALGOOPTBATPAR(on), GDKusec() - t0);
2471 8324 : return GDK_SUCCEED;
2472 : }
2473 15828 : if (VIEWtparent(b)) {
2474 3349 : pb = BATdescriptor(VIEWtparent(b));
2475 3349 : if (pb != NULL &&
2476 3349 : (b->tbaseoff != pb->tbaseoff ||
2477 2658 : BATcount(b) != BATcount(pb) ||
2478 2442 : b->hseqbase != pb->hseqbase ||
2479 2433 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2480 916 : BBPunfix(pb->batCacheid);
2481 916 : pb = NULL;
2482 : }
2483 : } else {
2484 : pb = b;
2485 : }
2486 15828 : bat_iterator_end(&pbi);
2487 15828 : pbi = bat_iterator(pb);
2488 : /* when we will create an order index if it doesn't already exist */
2489 15828 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2490 15828 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2491 5322 : (void) BATcheckorderidx(pb);
2492 5322 : MT_lock_set(&pb->batIdxLock);
2493 5322 : if (pb->torderidx) {
2494 61 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2495 : /* we can use the order index */
2496 61 : oidxh = pb->torderidx;
2497 61 : HEAPincref(oidxh);
2498 : }
2499 : mkorderidx = false;
2500 5261 : } else if (b != pb) {
2501 : /* don't build orderidx on parent bat */
2502 : mkorderidx = false;
2503 4395 : } else if (mkorderidx) {
2504 : /* keep lock when going to create */
2505 3875 : orderidxlock = true;
2506 : }
2507 5322 : if (!orderidxlock)
2508 1447 : MT_lock_unset(&pb->batIdxLock);
2509 : }
2510 15828 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2511 : /* there is an order index that we can use */
2512 61 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2513 61 : if (on == NULL)
2514 0 : goto error;
2515 61 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2516 61 : BATsetcount(on, BATcount(b));
2517 61 : HEAPdecref(oidxh, false);
2518 61 : oidxh = NULL;
2519 61 : on->tkey = true;
2520 61 : on->tnil = false;
2521 61 : on->tnonil = true;
2522 61 : on->tsorted = on->trevsorted = false;
2523 61 : on->tseqbase = oid_nil;
2524 61 : if (sorted || groups) {
2525 61 : bn = BATproject(on, b);
2526 61 : if (bn == NULL)
2527 0 : goto error;
2528 61 : bn->tsorted = true;
2529 61 : if (groups) {
2530 6 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2531 0 : goto error;
2532 6 : if (sorted &&
2533 6 : (*groups)->tkey &&
2534 : g == NULL) {
2535 : /* if new groups bat is key
2536 : * and since there is no input
2537 : * groups bat, we know the
2538 : * result bat is key */
2539 4 : bn->tkey = true;
2540 : }
2541 : }
2542 61 : if (sorted)
2543 61 : *sorted = bn;
2544 : else {
2545 0 : BBPunfix(bn->batCacheid);
2546 0 : bn = NULL;
2547 : }
2548 : }
2549 61 : if (order)
2550 11 : *order = on;
2551 : else {
2552 50 : BBPunfix(on->batCacheid);
2553 50 : on = NULL;
2554 : }
2555 61 : bat_iterator_end(&pbi);
2556 61 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2557 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2558 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2559 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2560 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2561 : " usec)\n",
2562 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2563 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2564 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2565 : ALGOOPTBATPAR(on), GDKusec() - t0);
2566 61 : if (pb != NULL && pb != b)
2567 53 : BBPunfix(pb->batCacheid);
2568 61 : return GDK_SUCCEED;
2569 10036 : } else if (oidxh) {
2570 0 : HEAPdecref(oidxh, false);
2571 0 : oidxh = NULL;
2572 : }
2573 15767 : if (o) {
2574 9499 : bn = BATproject(o, b);
2575 9499 : if (bn == NULL)
2576 0 : goto error;
2577 9499 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2578 3840 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2579 1920 : BBPunfix(bn->batCacheid);
2580 1920 : bn = b2;
2581 : }
2582 9499 : if (pb) {
2583 9074 : bat_iterator_end(&pbi);
2584 9074 : if (pb != b)
2585 1176 : BBPunfix(pb->batCacheid);
2586 9074 : pbi = bat_iterator(NULL);
2587 9074 : pb = NULL;
2588 : }
2589 : } else {
2590 6268 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2591 : }
2592 15767 : if (bn == NULL)
2593 0 : goto error;
2594 15767 : if (order) {
2595 : /* prepare order bat */
2596 13104 : if (o) {
2597 : /* make copy of input so that we can refine it;
2598 : * copy can be read-only if we take the shortcut
2599 : * below in the case g is "key" */
2600 7883 : on = COLcopy(o, TYPE_oid,
2601 7883 : g == NULL ||
2602 7883 : !(g->tkey || g->ttype == TYPE_void),
2603 : TRANSIENT);
2604 7883 : if (on == NULL)
2605 0 : goto error;
2606 7883 : BAThseqbase(on, b->hseqbase);
2607 7883 : on->tminpos = BUN_NONE;
2608 7883 : on->tmaxpos = BUN_NONE;
2609 : } else {
2610 : /* create new order */
2611 5221 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2612 5221 : if (on == NULL)
2613 0 : goto error;
2614 5221 : ords = (oid *) Tloc(on, 0);
2615 31336808 : for (p = 0, q = BATcount(bn); p < q; p++)
2616 31331587 : ords[p] = p + b->hseqbase;
2617 5221 : BATsetcount(on, BATcount(bn));
2618 5221 : on->tkey = true;
2619 5221 : on->tnil = false;
2620 5221 : on->tnonil = true;
2621 : }
2622 : /* COLcopy above can create TYPE_void */
2623 13104 : if (on->ttype != TYPE_void) {
2624 12376 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2625 12376 : on->tseqbase = oid_nil; /* and hence not dense */
2626 12376 : on->tnosorted = on->tnorevsorted = 0;
2627 : }
2628 13104 : *order = on;
2629 13104 : ords = (oid *) Tloc(on, 0);
2630 : } else {
2631 : ords = NULL;
2632 : }
2633 15767 : if (g) {
2634 9499 : if (g->tkey || g->ttype == TYPE_void) {
2635 : /* if g is "key", all groups are size 1, so no
2636 : * subsorting needed */
2637 4811 : if (sorted) {
2638 4479 : *sorted = bn;
2639 : } else {
2640 332 : BBPunfix(bn->batCacheid);
2641 332 : bn = NULL;
2642 : }
2643 4811 : if (order) {
2644 3640 : *order = on;
2645 3640 : if (o) {
2646 : /* we can inherit sortedness
2647 : * after all */
2648 3640 : on->tsorted = o->tsorted;
2649 3640 : on->trevsorted = o->trevsorted;
2650 3640 : if (o->tnosorted)
2651 48 : on->tnosorted = o->tnosorted;
2652 3640 : if (o->tnorevsorted)
2653 62 : on->tnorevsorted = o->tnorevsorted;
2654 : } else {
2655 : /* we didn't rearrange, so
2656 : * still sorted */
2657 0 : on->tsorted = true;
2658 0 : on->trevsorted = false;
2659 : }
2660 3640 : if (BATcount(on) <= 1) {
2661 0 : on->tsorted = true;
2662 0 : on->trevsorted = true;
2663 : }
2664 : }
2665 4811 : if (groups) {
2666 2809 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2667 2809 : if (gn == NULL)
2668 0 : goto error;
2669 2809 : *groups = gn;
2670 : }
2671 4811 : bat_iterator_end(&pbi);
2672 4811 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2673 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2674 : ",reverse=%d,nilslast=%d,stable=%d"
2675 : ") = (" ALGOOPTBATFMT ","
2676 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2677 : " -- key group (" LLFMT " usec)\n",
2678 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2679 : ALGOBATPAR(g), reverse, nilslast,
2680 : stable, ALGOOPTBATPAR(bn),
2681 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2682 : GDKusec() - t0);
2683 4811 : if (pb != NULL && pb != b)
2684 0 : BBPunfix(pb->batCacheid);
2685 4811 : return GDK_SUCCEED;
2686 : }
2687 4688 : assert(g->ttype == TYPE_oid);
2688 4688 : grps = (oid *) Tloc(g, 0);
2689 4688 : prev = grps[0];
2690 4688 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2691 0 : goto error;
2692 44645610 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2693 44640922 : if (grps[p] != prev) {
2694 : /* sub sort [r,p) */
2695 5974112 : if (do_sort(Tloc(bn, r),
2696 2852661 : ords ? ords + r : NULL,
2697 3121451 : bn->tvheap ? bn->tvheap->base : NULL,
2698 3121451 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2699 3121451 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2700 0 : goto error;
2701 3121451 : r = p;
2702 3121451 : prev = grps[p];
2703 : }
2704 : }
2705 : /* sub sort [r,q) */
2706 8931 : if (do_sort(Tloc(bn, r),
2707 4243 : ords ? ords + r : NULL,
2708 4688 : bn->tvheap ? bn->tvheap->base : NULL,
2709 4688 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2710 4688 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2711 0 : goto error;
2712 : /* if single group (r==0) the result is (rev)sorted,
2713 : * otherwise (maybe) not */
2714 4688 : bn->tsorted = r == 0 && !reverse && !nilslast;
2715 9351 : bn->trevsorted = r == 0 && reverse && nilslast;
2716 : } else {
2717 6268 : Heap *m = NULL;
2718 : /* only invest in creating an order index if the BAT
2719 : * is persistent */
2720 6268 : if (mkorderidx) {
2721 3875 : assert(orderidxlock);
2722 3875 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2723 : ords == NULL) {
2724 0 : ords = (oid *) m->base + ORDERIDXOFF;
2725 0 : if (o && o->ttype != TYPE_void)
2726 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2727 0 : else if (o)
2728 0 : for (p = 0, q = BATcount(o); p < q; p++)
2729 0 : ords[p] = p + o->tseqbase;
2730 : else
2731 0 : for (p = 0, q = BATcount(b); p < q; p++)
2732 0 : ords[p] = p + b->hseqbase;
2733 : }
2734 : }
2735 6268 : if ((reverse != nilslast ||
2736 11850 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2737 11348 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2738 5674 : do_sort(Tloc(bn, 0),
2739 : ords,
2740 5674 : bn->tvheap ? bn->tvheap->base : NULL,
2741 5674 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2742 5674 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2743 0 : if (m != NULL) {
2744 0 : HEAPfree(m, true);
2745 0 : GDKfree(m);
2746 : }
2747 0 : goto error;
2748 : }
2749 6268 : bn->tsorted = !reverse && !nilslast;
2750 6268 : bn->trevsorted = reverse && nilslast;
2751 6268 : if (m != NULL) {
2752 3875 : assert(orderidxlock);
2753 3875 : if (pb->torderidx == NULL) {
2754 3875 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2755 3875 : memcpy((oid *) m->base + ORDERIDXOFF,
2756 : ords,
2757 3875 : pbi.count * sizeof(oid));
2758 : }
2759 3875 : pb->torderidx = m;
2760 3875 : persistOIDX(pb);
2761 : } else {
2762 0 : HEAPfree(m, true);
2763 0 : GDKfree(m);
2764 : }
2765 : }
2766 : }
2767 10956 : if (orderidxlock) {
2768 3875 : MT_lock_unset(&pb->batIdxLock);
2769 3875 : orderidxlock = false;
2770 : }
2771 10956 : bn->theap->dirty = true;
2772 10956 : bn->tnosorted = 0;
2773 10956 : bn->tnorevsorted = 0;
2774 10956 : bn->tnokey[0] = bn->tnokey[1] = 0;
2775 10956 : bn->tminpos = BUN_NONE;
2776 10956 : bn->tmaxpos = BUN_NONE;
2777 10956 : if (groups) {
2778 6481 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2779 0 : goto error;
2780 6481 : if ((*groups)->tkey &&
2781 862 : (g == NULL || (g->tsorted && g->trevsorted))) {
2782 : /* if new groups bat is key and the input
2783 : * group bat has a single value (both sorted
2784 : * and revsorted), we know the result bat is
2785 : * key */
2786 1084 : bn->tkey = true;
2787 : }
2788 : }
2789 :
2790 10956 : bat_iterator_end(&pbi);
2791 10956 : if (sorted)
2792 8172 : *sorted = bn;
2793 : else {
2794 2784 : BBPunfix(bn->batCacheid);
2795 2784 : bn = NULL;
2796 : }
2797 :
2798 10956 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2799 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2800 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2801 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2802 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2803 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2804 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2805 : g ? "grouped " : "", GDKusec() - t0);
2806 10956 : if (pb && pb != b)
2807 1204 : BBPunfix(pb->batCacheid);
2808 : return GDK_SUCCEED;
2809 :
2810 0 : error:
2811 0 : bat_iterator_end(&pbi);
2812 0 : if (orderidxlock)
2813 0 : MT_lock_unset(&pb->batIdxLock);
2814 0 : if (oidxh)
2815 0 : HEAPdecref(oidxh, false);
2816 0 : BBPreclaim(bn);
2817 0 : if (pb && pb != b)
2818 0 : BBPunfix(pb->batCacheid);
2819 0 : BBPreclaim(on);
2820 0 : if (sorted)
2821 0 : *sorted = NULL;
2822 0 : if (order)
2823 0 : *order = NULL;
2824 0 : if (groups)
2825 0 : *groups = NULL;
2826 : return GDK_FAIL;
2827 : }
2828 :
2829 : /* return a new BAT of length n with seqbase hseq, and the constant v
2830 : * in the tail */
2831 : BAT *
2832 946725 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2833 : {
2834 946725 : BAT *bn;
2835 946725 : void *restrict p;
2836 946725 : BUN i;
2837 946725 : lng t0 = 0;
2838 :
2839 946725 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2840 946725 : if (v == NULL)
2841 : return NULL;
2842 946725 : bn = COLnew(hseq, tailtype, n, role);
2843 946664 : if (bn != NULL && n > 0) {
2844 69571 : p = Tloc(bn, 0);
2845 69571 : switch (ATOMstorage(tailtype)) {
2846 24 : case TYPE_void:
2847 24 : v = &oid_nil;
2848 24 : BATtseqbase(bn, oid_nil);
2849 24 : break;
2850 0 : case TYPE_msk:
2851 0 : if (*(msk*)v) {
2852 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2853 0 : if (n & 31) {
2854 0 : uint32_t *m = p;
2855 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2856 : }
2857 : } else
2858 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2859 : break;
2860 8680 : case TYPE_bte:
2861 8680 : memset(p, *(bte*)v, n);
2862 8680 : break;
2863 : case TYPE_sht:
2864 7151606 : for (i = 0; i < n; i++)
2865 7135961 : ((sht *) p)[i] = *(sht *) v;
2866 : break;
2867 : case TYPE_int:
2868 : case TYPE_flt:
2869 : assert(sizeof(int) == sizeof(flt));
2870 225631135 : for (i = 0; i < n; i++)
2871 225626390 : ((int *) p)[i] = *(int *) v;
2872 : break;
2873 : case TYPE_lng:
2874 : case TYPE_dbl:
2875 : assert(sizeof(lng) == sizeof(dbl));
2876 236901357 : for (i = 0; i < n; i++)
2877 236874683 : ((lng *) p)[i] = *(lng *) v;
2878 : break;
2879 : #ifdef HAVE_HGE
2880 : case TYPE_hge:
2881 29121772 : for (i = 0; i < n; i++)
2882 29120561 : ((hge *) p)[i] = *(hge *) v;
2883 : break;
2884 : #endif
2885 : case TYPE_uuid:
2886 200047 : for (i = 0; i < n; i++)
2887 200038 : ((uuid *) p)[i] = *(uuid *) v;
2888 : break;
2889 12441 : case TYPE_str:
2890 : /* insert the first value, then just copy the
2891 : * offset lots of times */
2892 12441 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2893 0 : BBPreclaim(bn);
2894 0 : return NULL;
2895 : }
2896 12440 : char val[sizeof(var_t)];
2897 12440 : memcpy(val, Tloc(bn, 0), bn->twidth);
2898 12440 : if (bn->twidth == 1 && n > 1) {
2899 : /* single byte value: we have a
2900 : * function for that */
2901 5978 : memset(Tloc(bn, 1), val[0], n - 1);
2902 : } else {
2903 6462 : char *p = Tloc(bn, 0);
2904 6482 : for (i = 1; i < n; i++) {
2905 20 : p += bn->twidth;
2906 20 : memcpy(p, val, bn->twidth);
2907 : }
2908 : }
2909 : break;
2910 : default:
2911 333705 : for (i = 0; i < n; i++)
2912 333563 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2913 0 : BBPreclaim(bn);
2914 0 : return NULL;
2915 : }
2916 : break;
2917 : }
2918 69570 : bn->theap->dirty = true;
2919 69570 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2920 69568 : BATsetcount(bn, n);
2921 69566 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2922 69566 : bn->tnonil = !bn->tnil;
2923 69566 : bn->tkey = BATcount(bn) <= 1;
2924 : }
2925 946659 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2926 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2927 : return bn;
2928 : }
2929 :
2930 : /*
2931 : * BAT Aggregates
2932 : *
2933 : * We retain the size() and card() aggregate results in the column
2934 : * descriptor. We would like to have such functionality in an
2935 : * extensible way for many aggregates, for DD (1) we do not want to
2936 : * change the binary BAT format on disk and (2) aggr and size are the
2937 : * most relevant aggregates.
2938 : *
2939 : * It is all hacked into the aggr[3] records; three adjacent integers
2940 : * that were left over in the column record. We refer to these as if
2941 : * it where an int aggr[3] array. The below routines set and retrieve
2942 : * the aggregate values from the tail of the BAT, as many
2943 : * aggregate-manipulating BAT functions work on tail.
2944 : *
2945 : * The rules are as follows: aggr[0] contains the alignment ID of the
2946 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2947 : * equal to b->talign, the precomputed aggregate values in
2948 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2949 : * of them may be set at the time. This is encoded by the value
2950 : * int_nil, which cannot occur in these two aggregates.
2951 : *
2952 : * This was now extended to record the property whether we know there
2953 : * is a nil value present by mis-using the highest bits of both
2954 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2955 : */
2956 :
2957 : void
2958 36155402 : PROPdestroy_nolock(BAT *b)
2959 : {
2960 36155402 : PROPrec *p = b->tprops;
2961 36155402 : PROPrec *n;
2962 :
2963 36155402 : b->tprops = NULL;
2964 36158276 : while (p) {
2965 3020 : n = p->next;
2966 3020 : assert(p->id != (enum prop_t) 20);
2967 3020 : VALclear(&p->v);
2968 3020 : GDKfree(p);
2969 3020 : p = n;
2970 : }
2971 36155256 : }
2972 :
2973 : void
2974 434 : PROPdestroy(BAT *b)
2975 : {
2976 434 : MT_lock_set(&b->theaplock);
2977 434 : PROPdestroy_nolock(b);
2978 434 : MT_lock_unset(&b->theaplock);
2979 434 : }
2980 :
2981 : ValPtr
2982 186436512 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2983 : {
2984 186436512 : PROPrec *p;
2985 :
2986 186436512 : p = b->tprops;
2987 186440647 : while (p && p->id != idx)
2988 4135 : p = p->next;
2989 186436512 : return p ? &p->v : NULL;
2990 : }
2991 :
2992 : void
2993 412913 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2994 : {
2995 412913 : PROPrec *prop = b->tprops, *prev = NULL;
2996 :
2997 413132 : while (prop) {
2998 413021 : if (prop->id == idx) {
2999 412802 : if (prev)
3000 106 : prev->next = prop->next;
3001 : else
3002 412696 : b->tprops = prop->next;
3003 412802 : VALclear(&prop->v);
3004 412802 : GDKfree(prop);
3005 412802 : return;
3006 : }
3007 219 : prev = prop;
3008 219 : prop = prop->next;
3009 : }
3010 : }
3011 :
3012 : ValPtr
3013 416459 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3014 : {
3015 416459 : PROPrec *p;
3016 :
3017 416459 : p = b->tprops;
3018 423198 : while (p && p->id != idx)
3019 6739 : p = p->next;
3020 416459 : if (p == NULL) {
3021 416452 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3022 : /* properties are hints, so if we can't create
3023 : * one we ignore the error */
3024 0 : GDKclrerr();
3025 0 : return NULL;
3026 : }
3027 416452 : p->id = idx;
3028 416452 : p->next = b->tprops;
3029 416452 : p->v.vtype = 0;
3030 416452 : b->tprops = p;
3031 : } else {
3032 7 : VALclear(&p->v);
3033 : }
3034 416459 : if (VALinit(&p->v, type, v) == NULL) {
3035 : /* failed to initialize, so remove property */
3036 0 : BATrmprop_nolock(b, idx);
3037 0 : GDKclrerr();
3038 0 : p = NULL;
3039 : }
3040 0 : return p ? &p->v : NULL;
3041 : }
3042 :
3043 : ValPtr
3044 2785978 : BATgetprop(BAT *b, enum prop_t idx)
3045 : {
3046 2785978 : ValPtr p;
3047 :
3048 2785978 : MT_lock_set(&b->theaplock);
3049 2785883 : p = BATgetprop_nolock(b, idx);
3050 2785922 : MT_lock_unset(&b->theaplock);
3051 2785903 : return p;
3052 : }
3053 :
3054 : ValPtr
3055 3472 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3056 : {
3057 3472 : ValPtr p;
3058 3472 : MT_lock_set(&b->theaplock);
3059 3472 : p = BATsetprop_nolock(b, idx, type, v);
3060 3472 : MT_lock_unset(&b->theaplock);
3061 3472 : return p;
3062 : }
3063 :
3064 : void
3065 2 : BATrmprop(BAT *b, enum prop_t idx)
3066 : {
3067 2 : MT_lock_set(&b->theaplock);
3068 2 : BATrmprop_nolock(b, idx);
3069 2 : MT_lock_unset(&b->theaplock);
3070 2 : }
3071 :
3072 : /*
3073 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3074 : * non-nil tail value.
3075 : * This function does not fail (the callers currently don't check for failure).
3076 : */
3077 : BUN
3078 2205 : BATcount_no_nil(BAT *b, BAT *s)
3079 : {
3080 2205 : BUN cnt = 0;
3081 2205 : const void *restrict p, *restrict nil;
3082 2205 : const char *restrict base;
3083 2205 : int t;
3084 2205 : int (*cmp)(const void *, const void *);
3085 2205 : struct canditer ci;
3086 2205 : oid hseq;
3087 :
3088 2205 : BATcheck(b, 0);
3089 :
3090 2205 : hseq = b->hseqbase;
3091 2205 : canditer_init(&ci, b, s);
3092 2205 : BATiter bi = bat_iterator(b);
3093 2205 : if (bi.nonil) {
3094 2001 : bat_iterator_end(&bi);
3095 2001 : return ci.ncand;
3096 : }
3097 204 : p = bi.base;
3098 204 : t = ATOMbasetype(bi.type);
3099 204 : switch (t) {
3100 0 : case TYPE_void:
3101 0 : cnt = ci.ncand * BATtdensebi(&bi);
3102 0 : break;
3103 0 : case TYPE_msk:
3104 0 : cnt = ci.ncand;
3105 0 : break;
3106 15 : case TYPE_bte:
3107 31 : CAND_LOOP(&ci)
3108 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3109 : break;
3110 0 : case TYPE_sht:
3111 0 : CAND_LOOP(&ci)
3112 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3113 : break;
3114 79 : case TYPE_int:
3115 5548817 : CAND_LOOP(&ci)
3116 5548738 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3117 : break;
3118 85 : case TYPE_lng:
3119 156873 : CAND_LOOP(&ci)
3120 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3121 : break;
3122 : #ifdef HAVE_HGE
3123 0 : case TYPE_hge:
3124 0 : CAND_LOOP(&ci)
3125 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3126 : break;
3127 : #endif
3128 0 : case TYPE_flt:
3129 0 : CAND_LOOP(&ci)
3130 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 0 : case TYPE_dbl:
3133 0 : CAND_LOOP(&ci)
3134 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3135 : break;
3136 0 : case TYPE_uuid:
3137 0 : CAND_LOOP(&ci)
3138 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3139 : break;
3140 25 : case TYPE_str:
3141 25 : base = bi.vh->base;
3142 25 : switch (bi.width) {
3143 23 : case 1:
3144 4809 : CAND_LOOP(&ci)
3145 4786 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3146 : break;
3147 2 : case 2:
3148 312 : CAND_LOOP(&ci)
3149 310 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3150 : break;
3151 0 : case 4:
3152 0 : CAND_LOOP(&ci)
3153 0 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3154 : break;
3155 : #if SIZEOF_VAR_T == 8
3156 0 : case 8:
3157 0 : CAND_LOOP(&ci)
3158 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3159 : break;
3160 : #endif
3161 : default:
3162 0 : MT_UNREACHABLE();
3163 : }
3164 : break;
3165 0 : default:
3166 0 : nil = ATOMnilptr(t);
3167 0 : cmp = ATOMcompare(t);
3168 0 : if (nil == NULL) {
3169 0 : cnt = ci.ncand;
3170 0 : } else if (b->tvheap) {
3171 0 : base = b->tvheap->base;
3172 0 : CAND_LOOP(&ci)
3173 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3174 : } else {
3175 0 : CAND_LOOP(&ci)
3176 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3177 : }
3178 : break;
3179 : }
3180 204 : if (cnt == bi.count) {
3181 24 : MT_lock_set(&b->theaplock);
3182 24 : if (cnt == BATcount(b) && bi.h == b->theap) {
3183 : /* we learned something */
3184 24 : b->tnonil = true;
3185 24 : assert(!b->tnil);
3186 24 : b->tnil = false;
3187 : }
3188 24 : bat pbid = VIEWtparent(b);
3189 24 : MT_lock_unset(&b->theaplock);
3190 24 : if (pbid) {
3191 15 : BAT *pb = BATdescriptor(pbid);
3192 15 : if (pb) {
3193 15 : MT_lock_set(&pb->theaplock);
3194 15 : if (cnt == BATcount(pb) &&
3195 0 : bi.h == pb->theap &&
3196 0 : !pb->tnonil) {
3197 0 : pb->tnonil = true;
3198 0 : assert(!pb->tnil);
3199 0 : pb->tnil = false;
3200 : }
3201 15 : MT_lock_unset(&pb->theaplock);
3202 15 : BBPunfix(pb->batCacheid);
3203 : }
3204 : }
3205 : }
3206 204 : bat_iterator_end(&bi);
3207 204 : return cnt;
3208 : }
|