Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 63811013 : unshare_varsized_heap(BAT *b)
27 : {
28 63811013 : if (ATOMvarsized(b->ttype) &&
29 29481581 : b->tvheap->parentid != b->batCacheid) {
30 322 : Heap *h = GDKmalloc(sizeof(Heap));
31 322 : if (h == NULL)
32 : return GDK_FAIL;
33 322 : MT_thread_setalgorithm("unshare vheap");
34 644 : *h = (Heap) {
35 322 : .parentid = b->batCacheid,
36 322 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : };
38 322 : strconcat_len(h->filename, sizeof(h->filename),
39 322 : BBP_physical(b->batCacheid), ".theap", NULL);
40 322 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
41 0 : HEAPfree(h, true);
42 0 : GDKfree(h);
43 0 : return GDK_FAIL;
44 : }
45 322 : ATOMIC_INIT(&h->refs, 1);
46 322 : MT_lock_set(&b->theaplock);
47 322 : Heap *oh = b->tvheap;
48 322 : b->tvheap = h;
49 322 : MT_lock_unset(&b->theaplock);
50 322 : BBPrelease(oh->parentid);
51 322 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 87303 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, lng timeoffset)
64 : {
65 87303 : size_t toff = ~(size_t) 0; /* tail offset */
66 87303 : BUN p, r; /* loop variables */
67 87303 : const void *tp = NULL; /* tail value pointer */
68 87303 : var_t v;
69 87303 : size_t off; /* offset within n's string heap */
70 87303 : BUN cnt = ci->ncand;
71 87303 : BUN oldcnt = BATcount(b);
72 :
73 87303 : assert(b->ttype == TYPE_str);
74 87303 : assert(b->tbaseoff == 0);
75 87303 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 87303 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 87303 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 87302 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 23948 : toff = 0;
85 23948 : MT_thread_setalgorithm("shared vheap");
86 63354 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 52507 : MT_lock_set(&b->theaplock);
90 52500 : bat bid = b->tvheap->parentid;
91 52500 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 52511 : HEAPincref(ni->vh);
93 52510 : b->tvheap = ni->vh;
94 52510 : MT_lock_unset(&b->theaplock);
95 52510 : BBPretain(ni->vh->parentid);
96 52509 : if (bid != b->batCacheid)
97 0 : BBPrelease(bid);
98 52509 : toff = 0;
99 52509 : MT_thread_setalgorithm("share vheap");
100 : } else {
101 : /* no heap sharing, so also make sure the heap isn't
102 : * shared currently (we're not allowed to write in
103 : * another bat's heap) */
104 11169 : if (b->tvheap->parentid != b->batCacheid &&
105 322 : unshare_varsized_heap(b) != GDK_SUCCEED) {
106 : return GDK_FAIL;
107 : }
108 10847 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
109 64 : !GDK_ELIMDOUBLES(ni->vh))) {
110 : /* we'll consider copying the string heap completely
111 : *
112 : * we first estimate how much space the string heap
113 : * should occupy, given the number of rows we need to
114 : * insert, then, if that is way smaller than the actual
115 : * space occupied, we will skip the copy and just insert
116 : * one by one */
117 : size_t len = 0;
118 5246732 : for (int i = 0; i < 1024; i++) {
119 5241605 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
120 5242849 : p = canditer_idx(ci, p) - ni->b->hseqbase;
121 5243636 : len += strlen(BUNtvar(*ni, p)) + 1;
122 : }
123 5127 : len = (len + 512) / 1024; /* rounded average length */
124 5127 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
125 : /* r is estimate of number of strings in
126 : * double-eliminated area */
127 5127 : if (r < ci->ncand)
128 885 : len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
129 : else
130 4242 : len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
131 : /* len is total estimated expected size of vheap */
132 :
133 5127 : if (len > ni->vhfree / 2) {
134 : /* we copy the string heap, perhaps appending */
135 5108 : if (oldcnt == 0) {
136 5072 : toff = 0;
137 5072 : MT_thread_setalgorithm("copy vheap");
138 : } else {
139 36 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
140 36 : MT_thread_setalgorithm("append vheap");
141 : }
142 :
143 5108 : MT_lock_set(&b->theaplock);
144 5108 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
145 0 : MT_lock_unset(&b->theaplock);
146 0 : return GDK_FAIL;
147 : }
148 5108 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
149 5108 : b->tvheap->free = toff + ni->vhfree;
150 5108 : b->tvheap->dirty = true;
151 5108 : MT_lock_unset(&b->theaplock);
152 : }
153 : }
154 : }
155 : /* if toff has the initial value of ~0, we insert strings
156 : * individually, otherwise we only copy (insert) offsets */
157 81563 : if (toff == ~(size_t) 0)
158 : v = GDK_VAROFFSET;
159 : else
160 81564 : v = b->tvheap->free - 1;
161 :
162 : /* make sure there is (vertical) space in the offset heap, we
163 : * may also widen thanks to v, set above */
164 87303 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
165 : return GDK_FAIL;
166 : }
167 :
168 87302 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
169 : /* we don't need to do any translation of offset
170 : * values, so we can use fast memcpy */
171 81058 : MT_thread_setalgorithm("memcpy offsets");
172 81054 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
173 6244 : } else if (toff != ~(size_t) 0) {
174 : /* we don't need to insert any actual strings since we
175 : * have already made sure that they are all in b's
176 : * string heap at known locations (namely the offset
177 : * in n added to toff), so insert offsets from n after
178 : * adding toff into b */
179 : /* note the use of the "restrict" qualifier here: all
180 : * four pointers below point to the same value, but
181 : * only one of them will actually be used, hence we
182 : * still obey the rule for restrict-qualified
183 : * pointers */
184 506 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
185 506 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
186 506 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
187 : #if SIZEOF_VAR_T == 8
188 506 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
189 : #endif
190 :
191 506 : MT_thread_setalgorithm("copy offset values");
192 506 : r = b->batCount;
193 3358018 : TIMEOUT_LOOP(cnt, timeoffset) {
194 3356802 : p = canditer_next(ci) - ni->b->hseqbase;
195 3356802 : switch (ni->width) {
196 267 : case 1:
197 267 : v = (var_t) tbp[p] + GDK_VAROFFSET;
198 267 : break;
199 3605 : case 2:
200 3605 : v = (var_t) tsp[p] + GDK_VAROFFSET;
201 3605 : break;
202 3352903 : case 4:
203 3352903 : v = (var_t) tip[p];
204 3352903 : break;
205 : #if SIZEOF_VAR_T == 8
206 27 : case 8:
207 27 : v = (var_t) tlp[p];
208 27 : break;
209 : #endif
210 : default:
211 0 : MT_UNREACHABLE();
212 : }
213 3356802 : v = (var_t) ((size_t) v + toff);
214 3356802 : assert(v >= GDK_VAROFFSET);
215 3356802 : assert((size_t) v < b->tvheap->free);
216 3356802 : switch (b->twidth) {
217 937 : case 1:
218 937 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
219 937 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
220 937 : break;
221 2964 : case 2:
222 2964 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
223 2964 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
224 2964 : break;
225 3352901 : case 4:
226 : #if SIZEOF_VAR_T == 8
227 3352901 : assert(v < ((var_t) 1 << 32));
228 : #endif
229 3352901 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
230 3352901 : break;
231 : #if SIZEOF_VAR_T == 8
232 0 : case 8:
233 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
234 0 : break;
235 : #endif
236 : default:
237 3356802 : MT_UNREACHABLE();
238 : }
239 : }
240 5738 : } else if (b->tvheap->free < ni->vhfree / 2 ||
241 : GDK_ELIMDOUBLES(b->tvheap)) {
242 : /* if b's string heap is much smaller than n's string
243 : * heap, don't bother checking whether n's string
244 : * values occur in b's string heap; also, if b is
245 : * (still) fully double eliminated, we must continue
246 : * to use the double elimination mechanism */
247 5711 : r = b->batCount;
248 5711 : oid hseq = ni->b->hseqbase;
249 5711 : MT_thread_setalgorithm("insert string values");
250 7857481 : TIMEOUT_LOOP(cnt, timeoffset) {
251 7845776 : p = canditer_next(ci) - hseq;
252 7778912 : tp = BUNtvar(*ni, p);
253 7691269 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
254 : return GDK_FAIL;
255 : }
256 7845778 : r++;
257 : }
258 : } else {
259 : /* Insert values from n individually into b; however,
260 : * we check whether there is a string in b's string
261 : * heap at the same offset as the string is in n's
262 : * string heap (in case b's string heap is a copy of
263 : * n's). If this is the case, we just copy the
264 : * offset, otherwise we insert normally. */
265 27 : r = b->batCount;
266 27 : MT_thread_setalgorithm("insert string values with check");
267 29120 : TIMEOUT_LOOP(cnt, timeoffset) {
268 29065 : p = canditer_next(ci) - ni->b->hseqbase;
269 29065 : off = BUNtvaroff(*ni, p); /* the offset */
270 29065 : tp = ni->vh->base + off; /* the string */
271 29065 : if (off < b->tvheap->free &&
272 29065 : strcmp(b->tvheap->base + off, tp) == 0) {
273 : /* we found the string at the same
274 : * offset in b's string heap as it was
275 : * in n's string heap, so we don't
276 : * have to insert a new string into b:
277 : * we can just copy the offset */
278 24676 : v = (var_t) off;
279 24676 : switch (b->twidth) {
280 0 : case 1:
281 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
282 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
283 0 : break;
284 0 : case 2:
285 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
286 0 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
287 0 : break;
288 24676 : case 4:
289 : #if SIZEOF_VAR_T == 8
290 24676 : assert(v < ((var_t) 1 << 32));
291 : #endif
292 24676 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
293 24676 : break;
294 : #if SIZEOF_VAR_T == 8
295 0 : case 8:
296 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
297 0 : break;
298 : #endif
299 : default:
300 0 : MT_UNREACHABLE();
301 : }
302 : } else {
303 4389 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
304 : return GDK_FAIL;
305 : }
306 : }
307 29065 : r++;
308 : }
309 : }
310 87298 : TIMEOUT_CHECK(timeoffset, TIMEOUT_HANDLER(GDK_FAIL));
311 87298 : MT_rwlock_wrlock(&b->thashlock);
312 87307 : MT_lock_set(&b->theaplock);
313 87299 : BATsetcount(b, oldcnt + ci->ncand);
314 87299 : assert(b->batCapacity >= b->batCount);
315 87299 : MT_lock_unset(&b->theaplock);
316 : /* maintain hash */
317 90295 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
318 2989 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
319 : }
320 87306 : BUN nunique = b->thash ? b->thash->nunique : 0;
321 87306 : MT_rwlock_wrunlock(&b->thashlock);
322 87302 : if (nunique != 0) {
323 9 : MT_lock_set(&b->theaplock);
324 9 : b->tunique_est = (double) nunique;
325 9 : MT_lock_unset(&b->theaplock);
326 : }
327 : return GDK_SUCCEED;
328 : }
329 :
330 : static gdk_return
331 437 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
332 : {
333 437 : BUN cnt = ci->ncand, r;
334 437 : oid hseq = ni->b->hseqbase;
335 :
336 : /* only transient bats can use some other bat's vheap */
337 437 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
338 : /* make sure the bats use var_t */
339 437 : assert(b->twidth == ni->width);
340 437 : assert(b->twidth == SIZEOF_VAR_T);
341 437 : if (cnt == 0)
342 : return GDK_SUCCEED;
343 437 : if (cnt > BATcapacity(b) - BATcount(b)) {
344 : /* if needed space exceeds a normal growth extend just
345 : * with what's needed */
346 18 : BUN ncap = BATcount(b) + cnt;
347 18 : BUN grows = BATgrows(b);
348 :
349 18 : if (ncap > grows)
350 : grows = ncap;
351 18 : if (BATextend(b, grows) != GDK_SUCCEED)
352 : return GDK_FAIL;
353 : }
354 437 : if (mayshare &&
355 437 : BATcount(b) == 0 &&
356 222 : b->batRole == TRANSIENT &&
357 144 : ni->restricted == BAT_READ &&
358 144 : b->tvheap != ni->vh) {
359 : /* if b is still empty, in the transient farm, and n
360 : * is read-only, we replace b's vheap with a reference
361 : * to n's */
362 144 : MT_lock_set(&b->theaplock);
363 144 : bat bid = b->tvheap->parentid;
364 144 : HEAPdecref(b->tvheap, true);
365 144 : HEAPincref(ni->vh);
366 144 : b->tvheap = ni->vh;
367 144 : MT_lock_unset(&b->theaplock);
368 144 : BBPretain(ni->vh->parentid);
369 144 : if (bid != b->batCacheid)
370 0 : BBPrelease(bid);
371 : }
372 437 : if (b->tvheap == ni->vh) {
373 : /* if b and n use the same vheap, we only need to copy
374 : * the offsets from n to b */
375 304 : if (ci->tpe == cand_dense) {
376 : /* fast memcpy since we copy a consecutive
377 : * chunk of memory */
378 304 : memcpy(Tloc(b, BATcount(b)),
379 304 : (const var_t *) ni->base + (ci->seq - hseq),
380 304 : cnt << b->tshift);
381 : } else {
382 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
383 0 : const var_t *restrict src = (const var_t *) ni->base;
384 0 : while (cnt > 0) {
385 0 : cnt--;
386 0 : *dst++ = src[canditer_next(ci) - hseq];
387 : }
388 : }
389 304 : MT_rwlock_wrlock(&b->thashlock);
390 304 : MT_lock_set(&b->theaplock);
391 304 : BATsetcount(b, BATcount(b) + ci->ncand);
392 304 : MT_lock_unset(&b->theaplock);
393 : /* maintain hash table */
394 304 : for (BUN i = BATcount(b) - ci->ncand;
395 304 : b->thash && i < BATcount(b);
396 0 : i++) {
397 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
398 : }
399 304 : BUN nunique = b->thash ? b->thash->nunique : 0;
400 304 : MT_rwlock_wrunlock(&b->thashlock);
401 304 : if (nunique != 0) {
402 0 : MT_lock_set(&b->theaplock);
403 0 : b->tunique_est = (double) nunique;
404 0 : MT_lock_unset(&b->theaplock);
405 : }
406 304 : return GDK_SUCCEED;
407 : }
408 : /* b and n do not share their vheap, so we need to copy data */
409 133 : if (b->tvheap->parentid != b->batCacheid) {
410 : /* if b shares its vheap with some other bat, unshare it */
411 14 : Heap *h = GDKmalloc(sizeof(Heap));
412 14 : if (h == NULL) {
413 : return GDK_FAIL;
414 : }
415 28 : *h = (Heap) {
416 14 : .parentid = b->batCacheid,
417 14 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
418 : };
419 14 : strconcat_len(h->filename, sizeof(h->filename),
420 14 : BBP_physical(b->batCacheid), ".theap", NULL);
421 14 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
422 0 : HEAPfree(h, true);
423 0 : GDKfree(h);
424 0 : return GDK_FAIL;
425 : }
426 14 : ATOMIC_INIT(&h->refs, 1);
427 14 : MT_lock_set(&b->theaplock);
428 14 : Heap *oh = b->tvheap;
429 14 : b->tvheap = h;
430 14 : MT_lock_unset(&b->theaplock);
431 14 : if (oh->parentid != b->batCacheid)
432 14 : BBPrelease(oh->parentid);
433 14 : HEAPdecref(oh, false);
434 : }
435 133 : if (BATcount(b) == 0 && BATatoms[b->ttype].atomFix == NULL &&
436 78 : ci->tpe == cand_dense && ci->ncand == ni->count) {
437 : /* just copy the heaps */
438 78 : MT_lock_set(&b->theaplock);
439 78 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
440 0 : MT_lock_unset(&b->theaplock);
441 0 : return GDK_FAIL;
442 : }
443 78 : memcpy(b->theap->base, ni->base, ni->hfree);
444 78 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
445 78 : b->theap->free = ni->hfree;
446 78 : b->theap->dirty = true;
447 78 : b->tvheap->free = ni->vhfree;
448 78 : b->tvheap->dirty = true;
449 78 : BATsetcount(b, ni->count);
450 78 : b->tnil = ni->nil;
451 78 : b->tnonil = ni->nonil;
452 78 : b->tsorted = ni->sorted;
453 78 : b->tnosorted = ni->nosorted;
454 78 : b->trevsorted = ni->revsorted;
455 78 : b->tnorevsorted = ni->norevsorted;
456 78 : b->tkey = ni->key;
457 78 : b->tnokey[0] = ni->nokey[0];
458 78 : b->tnokey[1] = ni->nokey[1];
459 78 : b->tminpos = ni->minpos;
460 78 : b->tmaxpos = ni->maxpos;
461 78 : b->tunique_est = ni->unique_est;
462 78 : MT_lock_unset(&b->theaplock);
463 78 : return GDK_SUCCEED;
464 : }
465 : /* copy data from n to b */
466 : r = BATcount(b);
467 333541 : for (BUN i = 0; i < cnt; i++) {
468 333486 : BUN p = canditer_next(ci) - hseq;
469 333486 : const void *t = BUNtvar(*ni, p);
470 333486 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
471 : return GDK_FAIL;
472 : }
473 333486 : r++;
474 : }
475 55 : MT_rwlock_wrlock(&b->thashlock);
476 55 : if (b->thash) {
477 0 : r -= cnt;
478 0 : BATiter bi = bat_iterator_nolock(b);
479 0 : for (BUN i = 0; i < cnt; i++) {
480 0 : const void *t = BUNtvar(bi, r);
481 0 : HASHappend_locked(b, r, t);
482 0 : r++;
483 : }
484 : }
485 55 : BUN nunique = b->thash ? b->thash->nunique : 0;
486 55 : MT_lock_set(&b->theaplock);
487 55 : BATsetcount(b, r);
488 55 : if (nunique != 0)
489 0 : b->tunique_est = (double) nunique;
490 55 : MT_lock_unset(&b->theaplock);
491 55 : MT_rwlock_wrunlock(&b->thashlock);
492 55 : return GDK_SUCCEED;
493 : }
494 :
495 : static gdk_return
496 156 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
497 : {
498 156 : if (ci->ncand == 0)
499 : return GDK_SUCCEED;
500 156 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
501 : return GDK_FAIL;
502 :
503 156 : MT_lock_set(&b->theaplock);
504 :
505 156 : uint32_t boff = b->batCount % 32;
506 156 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
507 156 : b->batCount += ci->ncand;
508 156 : b->theap->dirty = true;
509 156 : b->theap->free = ((b->batCount + 31) / 32) * 4;
510 156 : if (ci->tpe == cand_dense) {
511 156 : const uint32_t *np;
512 156 : uint32_t noff, mask;
513 156 : BUN cnt;
514 156 : noff = (ci->seq - ni->b->hseqbase) % 32;
515 156 : cnt = ci->ncand;
516 156 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
517 156 : if (boff == noff) {
518 : /* words of b and n are aligned, so we don't
519 : * need to shift bits around */
520 65 : if (boff + cnt <= 32) {
521 : /* all new bits within one word */
522 59 : if (cnt == 32) {
523 0 : *bp = *np;
524 : } else {
525 59 : mask = ((1U << cnt) - 1) << boff;
526 59 : *bp &= ~mask;
527 59 : *bp |= *np & mask;
528 : }
529 : } else {
530 : /* multiple words of b are affected */
531 6 : if (boff != 0) {
532 : /* first fill up the rest of the first
533 : * word */
534 0 : mask = ~0U << boff;
535 0 : *bp &= ~mask;
536 0 : *bp++ |= *np++ & mask;
537 0 : cnt -= 32 - boff;
538 : }
539 6 : if (cnt >= 32) {
540 : /* copy an integral number of words fast */
541 6 : BUN nw = cnt / 32;
542 6 : memcpy(bp, np, nw*sizeof(int));
543 6 : bp += nw;
544 6 : np += nw;
545 6 : cnt %= 32;
546 : }
547 6 : if (cnt > 0) {
548 : /* do the left over bits */
549 5 : mask = (1U << cnt) - 1;
550 5 : *bp = *np & mask;
551 : }
552 : }
553 91 : } else if (boff > noff) {
554 91 : if (boff + cnt <= 32) {
555 : /* we only need to copy bits from a
556 : * single word of n to a single word
557 : * of b */
558 : /* boff > 0, so cnt < 32, hence the
559 : * shift is ok */
560 64 : mask = (1U << cnt) - 1;
561 64 : *bp &= ~(mask << boff);
562 64 : *bp |= (*np & (mask << noff)) << (boff - noff);
563 : } else {
564 : /* first fill the rest of the last partial
565 : * word of b, so that's 32-boff bits */
566 27 : mask = (1U << (32 - boff)) - 1;
567 27 : *bp &= ~(mask << boff);
568 27 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
569 27 : cnt -= 32 - boff;
570 :
571 : /* set boff and noff to the amount we need to
572 : * shift bits in consecutive words of n around
573 : * to fit into the next word of b; set mask to
574 : * the mask of the bottom bits of n that fit
575 : * in a word of b (and the complement are the
576 : * top bits that go to another word of b) */
577 27 : boff -= noff;
578 27 : noff = 32 - boff;
579 27 : mask = (1U << noff) - 1;
580 221 : while (cnt >= 32) {
581 194 : *bp = (*np++ & ~mask) >> noff;
582 194 : *bp++ |= (*np & mask) << boff;
583 194 : cnt -= 32;
584 : }
585 27 : if (cnt > noff) {
586 : /* the last bits come from two words
587 : * in n */
588 7 : *bp = (*np++ & ~mask) >> noff;
589 7 : cnt -= noff;
590 7 : mask = (1U << cnt) - 1;
591 7 : *bp++ |= (*np & mask) << boff;
592 20 : } else if (cnt > 0) {
593 : /* the last bits come from a single
594 : * word in n */
595 20 : mask = ((1U << cnt) - 1) << noff;
596 20 : *bp = (*np & mask) >> noff;
597 : }
598 : }
599 : } else {
600 : /* boff < noff */
601 0 : if (noff + cnt <= 32) {
602 : /* only need part of the first word of n */
603 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
604 0 : mask = (1U << cnt) - 1;
605 0 : *bp &= ~(mask << boff);
606 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
607 0 : } else if (boff + cnt <= 32) {
608 : /* only need to fill a single word of
609 : * b, but from two of n */
610 0 : if (cnt < 32)
611 0 : *bp &= ~(((1U << cnt) - 1) << boff);
612 : else
613 0 : *bp = 0;
614 0 : mask = ~((1U << noff) - 1);
615 0 : *bp |= (*np++ & mask) >> (noff - boff);
616 0 : cnt -= 32 - noff;
617 0 : mask = (1U << cnt) - 1;
618 0 : *bp |= (*np & mask) << (32 - noff);
619 : } else {
620 0 : if (boff > 0) {
621 : /* fill the rest of the first word of b */
622 0 : cnt -= 32 - boff;
623 0 : *bp &= (1U << boff) - 1;
624 0 : mask = ~((1U << noff) - 1);
625 0 : noff -= boff;
626 0 : boff = 32 - noff;
627 0 : *bp |= (*np++ & mask) >> noff;
628 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
629 : } else {
630 0 : boff = 32 - noff;
631 : }
632 0 : mask = (1U << noff) - 1;
633 0 : while (cnt >= 32) {
634 0 : *bp = (*np++ & ~mask) >> noff;
635 0 : *bp++ |= (*np & mask) << boff;
636 0 : cnt -= 32;
637 : }
638 0 : if (cnt > 0) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : if (cnt > noff)
641 0 : *bp++ |= (*np & mask) << boff;
642 : }
643 : }
644 : }
645 : } else {
646 0 : oid o;
647 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
648 0 : do {
649 0 : for (uint32_t i = boff; i < 32; i++) {
650 0 : o = canditer_next(ci);
651 0 : if (is_oid_nil(o))
652 : break;
653 0 : o -= ni->b->hseqbase;
654 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
655 : }
656 0 : *bp++ = v;
657 0 : v = 0;
658 0 : boff = 0;
659 0 : } while (!is_oid_nil(o));
660 : }
661 156 : MT_lock_unset(&b->theaplock);
662 156 : return GDK_SUCCEED;
663 : }
664 :
665 : /* Append the contents of BAT n (subject to the optional candidate
666 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
667 : * was passed in, and else the seqbase of n. */
668 : static gdk_return
669 1260454 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
670 : {
671 1260454 : struct canditer ci;
672 1260454 : BUN r;
673 1260454 : oid hseq = n->hseqbase;
674 1260454 : char buf[64];
675 1260454 : lng t0 = 0;
676 1260454 : const ValRecord *prop = NULL;
677 1260454 : ValRecord minprop, maxprop;
678 1260454 : const void *minbound = NULL, *maxbound = NULL;
679 1260454 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
680 1260454 : bool hlocked = false;
681 :
682 1260454 : if (b == NULL || n == NULL || BATcount(n) == 0) {
683 : return GDK_SUCCEED;
684 : }
685 817500 : assert(b->theap->parentid == b->batCacheid);
686 :
687 817500 : TRC_DEBUG_IF(ALGO) {
688 0 : t0 = GDKusec();
689 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
690 : }
691 :
692 817500 : ALIGNapp(b, force, GDK_FAIL);
693 :
694 2424219 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
695 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
696 0 : return GDK_FAIL;
697 : }
698 :
699 817527 : if (BATttype(b) != BATttype(n) &&
700 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
701 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
702 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
703 : }
704 :
705 817527 : lng timeoffset = 0;
706 817527 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
707 817422 : if (qry_ctx != NULL) {
708 809529 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
709 : }
710 :
711 817422 : BATiter ni = bat_iterator(n);
712 :
713 817362 : canditer_init(&ci, n, s);
714 817218 : if (ci.ncand == 0) {
715 0 : goto doreturn;
716 : }
717 :
718 817218 : if (BATcount(b) + ci.ncand > BUN_MAX) {
719 0 : bat_iterator_end(&ni);
720 0 : GDKerror("combined BATs too large\n");
721 0 : return GDK_FAIL;
722 : }
723 :
724 817218 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
725 0 : bat_iterator_end(&ni);
726 0 : GDKerror("overflow of head value\n");
727 0 : return GDK_FAIL;
728 : }
729 :
730 817218 : IMPSdestroy(b); /* imprints do not support updates yet */
731 817355 : OIDXdestroy(b);
732 817345 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
733 817310 : RTREEdestroy(b);
734 :
735 817305 : MT_lock_set(&b->theaplock);
736 816967 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
737 816964 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
738 48 : VALcopy(&minprop, prop) != NULL) {
739 48 : minbound = VALptr(&minprop);
740 48 : if (ci.ncand == BATcount(n) &&
741 62 : ni.minpos != BUN_NONE &&
742 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
743 0 : assert(0);
744 : GDKerror("value out of bounds\n");
745 : MT_lock_unset(&b->theaplock);
746 : goto bailout;
747 : }
748 : }
749 816919 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
750 40 : VALcopy(&maxprop, prop) != NULL) {
751 40 : maxbound = VALptr(&maxprop);
752 40 : if (ci.ncand == BATcount(n) &&
753 52 : ni.maxpos != BUN_NONE &&
754 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
755 0 : assert(0);
756 : GDKerror("value out of bounds\n");
757 : MT_lock_unset(&b->theaplock);
758 : goto bailout;
759 : }
760 : }
761 :
762 817160 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
763 390048 : if (ni.maxpos != BUN_NONE) {
764 148006 : BATiter bi = bat_iterator_nolock(b);
765 148006 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
766 145895 : if (s == NULL) {
767 145763 : b->tmaxpos = BATcount(b) + ni.maxpos;
768 : } else {
769 132 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 : } else {
773 242042 : b->tmaxpos = BUN_NONE;
774 : }
775 : }
776 817160 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
777 390015 : if (ni.minpos != BUN_NONE) {
778 147961 : BATiter bi = bat_iterator_nolock(b);
779 147961 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
780 145035 : if (s == NULL) {
781 144906 : b->tminpos = BATcount(b) + ni.minpos;
782 : } else {
783 129 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 : } else {
787 242054 : b->tminpos = BUN_NONE;
788 : }
789 : }
790 817160 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
791 816698 : b->tunique_est = 0;
792 : }
793 817160 : MT_lock_unset(&b->theaplock);
794 : /* load hash so that we can maintain it */
795 817307 : (void) BATcheckhash(b);
796 :
797 817540 : if (b->ttype == TYPE_void) {
798 : /* b does not have storage, keep it that way if we can */
799 13474 : HASHdestroy(b); /* we're not maintaining the hash here */
800 13475 : MT_lock_set(&b->theaplock);
801 13475 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
802 12396 : (BATcount(b) == 0 ||
803 8673 : (BATtdense(b) &&
804 8673 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
805 : /* n is also dense and consecutive with b */
806 12341 : if (BATcount(b) == 0) {
807 3723 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
808 0 : assert(0);
809 : GDKerror("value not within bounds\n");
810 : MT_lock_unset(&b->theaplock);
811 : goto bailout;
812 : }
813 3723 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
814 : }
815 12339 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
816 0 : assert(0);
817 : GDKerror("value not within bounds\n");
818 : MT_lock_unset(&b->theaplock);
819 : goto bailout;
820 : }
821 12339 : BATsetcount(b, BATcount(b) + ci.ncand);
822 12338 : MT_lock_unset(&b->theaplock);
823 12339 : goto doreturn;
824 : }
825 1134 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
826 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
827 : /* both b and n are void/nil */
828 0 : if (notnull) {
829 0 : assert(0);
830 : GDKerror("NULL value not within bounds\n");
831 : MT_lock_unset(&b->theaplock);
832 : goto bailout;
833 : }
834 0 : BATtseqbase(b, oid_nil);
835 0 : BATsetcount(b, BATcount(b) + ci.ncand);
836 0 : MT_lock_unset(&b->theaplock);
837 0 : goto doreturn;
838 : }
839 : /* we need to materialize b; allocate enough capacity */
840 1134 : MT_lock_unset(&b->theaplock);
841 1134 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
842 0 : goto bailout;
843 : }
844 : }
845 :
846 : /* property setting */
847 805200 : MT_lock_set(&b->theaplock);
848 805164 : r = BATcount(b);
849 :
850 805164 : if (BATcount(b) == 0) {
851 381633 : b->tsorted = ni.sorted;
852 381633 : b->trevsorted = ni.revsorted;
853 381633 : b->tseqbase = oid_nil;
854 381633 : b->tnonil = ni.nonil;
855 381633 : b->tnil = ni.nil && ci.ncand == BATcount(n);
856 381633 : if (ci.tpe == cand_dense) {
857 381489 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
858 381489 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
859 381489 : if (BATtdensebi(&ni)) {
860 1874 : b->tseqbase = n->tseqbase + ci.seq - hseq;
861 : }
862 : } else {
863 144 : b->tnosorted = 0;
864 144 : b->tnorevsorted = 0;
865 : }
866 381633 : b->tkey = ni.key;
867 381633 : if (ci.ncand == BATcount(n)) {
868 380960 : b->tnokey[0] = ni.nokey[0];
869 380960 : b->tnokey[1] = ni.nokey[1];
870 : } else {
871 673 : b->tnokey[0] = b->tnokey[1] = 0;
872 : }
873 : } else {
874 423531 : BUN last = r - 1;
875 423531 : BATiter bi = bat_iterator_nolock(b);
876 423624 : int xx = ATOMcmp(b->ttype,
877 : BUNtail(ni, ci.seq - hseq),
878 : BUNtail(bi, last));
879 423414 : if (b->tsorted && (!ni.sorted || xx < 0)) {
880 14502 : b->tsorted = false;
881 14502 : b->tnosorted = 0;
882 14502 : b->tseqbase = oid_nil;
883 : }
884 423414 : if (b->trevsorted &&
885 42450 : (!ni.revsorted || xx > 0)) {
886 12325 : b->trevsorted = false;
887 12325 : b->tnorevsorted = 0;
888 : }
889 423414 : if (b->tkey &&
890 41386 : (!(b->tsorted || b->trevsorted) ||
891 31587 : !ni.key || xx == 0)) {
892 14186 : BATkey(b, false);
893 : }
894 423412 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
895 1130 : (!BATtdensebi(&ni) ||
896 756 : ci.tpe != cand_dense ||
897 378 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
898 768 : b->tseqbase = oid_nil;
899 : }
900 423412 : b->tnonil &= ni.nonil;
901 839187 : b->tnil |= ni.nil && ci.ncand == ni.count;
902 : }
903 805045 : MT_lock_unset(&b->theaplock);
904 805140 : if (b->ttype == TYPE_str) {
905 87294 : if (insert_string_bat(b, &ni, &ci, force, mayshare, timeoffset) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 717846 : } else if (ATOMvarsized(b->ttype)) {
909 437 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
910 0 : goto bailout;
911 : }
912 717409 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
913 : /* no bounds and NOT_NULL property on MSK bats */
914 156 : assert(minbound == NULL && maxbound == NULL && !notnull);
915 156 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
916 0 : goto bailout;
917 : }
918 : } else {
919 717253 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
920 : /* if needed space exceeds a normal growth
921 : * extend just with what's needed */
922 9636 : BUN ncap = BATcount(b) + ci.ncand;
923 9636 : BUN grows = BATgrows(b);
924 :
925 9647 : if (ncap > grows)
926 : grows = ncap;
927 9647 : if (BATextend(b, grows) != GDK_SUCCEED) {
928 0 : goto bailout;
929 : }
930 : }
931 717268 : MT_rwlock_wrlock(&b->thashlock);
932 717271 : hlocked = true;
933 717271 : if (BATatoms[b->ttype].atomFix == NULL &&
934 717167 : b->ttype != TYPE_void &&
935 717167 : ni.type != TYPE_void &&
936 714644 : ci.tpe == cand_dense) {
937 : /* use fast memcpy if we can */
938 714585 : memcpy(Tloc(b, BATcount(b)),
939 714585 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
940 714585 : ci.ncand << ni.shift);
941 714852 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
942 280 : HASHappend_locked(b, r, Tloc(b, r));
943 267 : r++;
944 : }
945 : } else {
946 2686 : const void *atomnil = ATOMnilptr(b->ttype);
947 2162144 : TIMEOUT_LOOP(ci.ncand, timeoffset) {
948 2156743 : BUN p = canditer_next(&ci) - hseq;
949 2156965 : const void *t = BUNtail(ni, p);
950 2157362 : bool isnil = atomcmp(t, atomnil) == 0;
951 2157340 : if (notnull && isnil) {
952 0 : assert(0);
953 : GDKerror("NULL value not within bounds\n");
954 : goto bailout;
955 2157340 : } else if (minbound &&
956 2157340 : !isnil &&
957 0 : atomcmp(t, minbound) < 0) {
958 0 : assert(0);
959 : GDKerror("value not within bounds\n");
960 : goto bailout;
961 2157340 : } else if (maxbound &&
962 0 : !isnil &&
963 0 : atomcmp(t, maxbound) >= 0) {
964 0 : assert(0);
965 : GDKerror("value not within bounds\n");
966 : goto bailout;
967 2157340 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
968 0 : goto bailout;
969 : }
970 2156695 : if (b->thash)
971 0 : HASHappend_locked(b, r, t);
972 2156743 : r++;
973 : }
974 2686 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
975 : }
976 717258 : BUN nunique;
977 717258 : nunique = b->thash ? b->thash->nunique : 0;
978 717258 : MT_lock_set(&b->theaplock);
979 717099 : BATsetcount(b, b->batCount + ci.ncand);
980 717122 : if (nunique != 0)
981 44 : b->tunique_est = (double) nunique;
982 717122 : MT_lock_unset(&b->theaplock);
983 717221 : assert(hlocked);
984 717221 : MT_rwlock_wrunlock(&b->thashlock);
985 717221 : hlocked = false;
986 : }
987 :
988 817567 : doreturn:
989 817567 : bat_iterator_end(&ni);
990 817564 : if (minbound)
991 48 : VALclear(&minprop);
992 817564 : if (maxbound)
993 40 : VALclear(&maxprop);
994 817564 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
995 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
996 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
997 : GDKusec() - t0);
998 :
999 : return GDK_SUCCEED;
1000 : bailout:
1001 0 : if (hlocked)
1002 0 : MT_rwlock_wrunlock(&b->thashlock);
1003 0 : if (minbound)
1004 0 : VALclear(&minprop);
1005 0 : if (maxbound)
1006 0 : VALclear(&maxprop);
1007 0 : bat_iterator_end(&ni);
1008 0 : return GDK_FAIL;
1009 : }
1010 :
1011 : gdk_return
1012 1260505 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1013 : {
1014 1260505 : return BATappend2(b, n, s, force, true);
1015 : }
1016 :
1017 : gdk_return
1018 4 : BATdel(BAT *b, BAT *d)
1019 : {
1020 4 : gdk_return (*unfix) (const void *) = BATatoms[b->ttype].atomUnfix;
1021 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1022 4 : MT_lock_set(&b->theaplock);
1023 4 : BATiter bi = bat_iterator_nolock(b);
1024 4 : MT_lock_unset(&b->theaplock);
1025 :
1026 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1027 4 : assert(d->tsorted);
1028 4 : assert(d->tkey);
1029 4 : if (BATcount(d) == 0)
1030 : return GDK_SUCCEED;
1031 4 : IMPSdestroy(b);
1032 4 : OIDXdestroy(b);
1033 4 : HASHdestroy(b);
1034 4 : PROPdestroy(b);
1035 4 : STRMPdestroy(b);
1036 4 : RTREEdestroy(b);
1037 4 : if (BATtdense(d)) {
1038 2 : oid o = d->tseqbase;
1039 2 : BUN c = BATcount(d);
1040 :
1041 2 : if (o + c <= b->hseqbase)
1042 : return GDK_SUCCEED;
1043 2 : if (o < b->hseqbase) {
1044 0 : c -= b->hseqbase - o;
1045 0 : o = b->hseqbase;
1046 : }
1047 2 : if (o - b->hseqbase < b->batInserted) {
1048 0 : GDKerror("cannot delete committed values\n");
1049 0 : return GDK_FAIL;
1050 : }
1051 2 : if (o + c > b->hseqbase + BATcount(b))
1052 0 : c = b->hseqbase + BATcount(b) - o;
1053 2 : if (c == 0)
1054 : return GDK_SUCCEED;
1055 2 : if (unfix || atmdel) {
1056 0 : BUN p = o - b->hseqbase;
1057 0 : BUN q = p + c;
1058 0 : while (p < q) {
1059 0 : if (unfix && (*unfix)(BUNtail(bi, p)) != GDK_SUCCEED)
1060 : return GDK_FAIL;
1061 0 : if (atmdel)
1062 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1063 0 : p++;
1064 : }
1065 : }
1066 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1067 : return GDK_FAIL;
1068 2 : MT_lock_set(&b->theaplock);
1069 2 : if (o + c < b->hseqbase + BATcount(b)) {
1070 0 : o -= b->hseqbase;
1071 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1072 0 : BUN n = BATcount(b) - (o + c);
1073 : /* not very efficient, but first see
1074 : * how much this is used */
1075 0 : for (BUN i = 0; i < n; i++)
1076 0 : mskSetVal(b, o + i,
1077 0 : mskGetVal(b, o + c + i));
1078 : } else {
1079 0 : memmove(Tloc(b, o),
1080 0 : Tloc(b, o + c),
1081 0 : b->twidth * (BATcount(b) - (o + c)));
1082 : }
1083 0 : b->theap->dirty = true;
1084 : // o += b->hseqbase; // if this were to be used again
1085 : }
1086 2 : b->batCount -= c;
1087 : } else {
1088 2 : BATiter di = bat_iterator(d);
1089 2 : const oid *o = (const oid *) di.base;
1090 2 : const oid *s;
1091 2 : BUN c = di.count;
1092 2 : BUN nd = 0;
1093 2 : BUN pos;
1094 2 : char *p = NULL;
1095 :
1096 2 : if (o[c - 1] <= b->hseqbase) {
1097 0 : bat_iterator_end(&di);
1098 0 : return GDK_SUCCEED;
1099 : }
1100 2 : while (*o < b->hseqbase) {
1101 0 : o++;
1102 0 : c--;
1103 : }
1104 2 : if (*o - b->hseqbase < b->batInserted) {
1105 0 : bat_iterator_end(&di);
1106 0 : GDKerror("cannot delete committed values\n");
1107 0 : return GDK_FAIL;
1108 : }
1109 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1110 0 : bat_iterator_end(&di);
1111 0 : return GDK_FAIL;
1112 : }
1113 2 : s = o;
1114 2 : pos = *o - b->hseqbase;
1115 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1116 2 : p = Tloc(b, pos);
1117 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1118 4 : size_t n;
1119 4 : if (unfix)
1120 0 : (*unfix)(BUNtail(bi, *o - b->hseqbase));
1121 4 : if (atmdel)
1122 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1123 4 : o++;
1124 4 : c--;
1125 4 : nd++;
1126 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1127 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1128 2 : else if ((oid) (o - s) < *o - *s)
1129 2 : n = o[0] - o[-1] - 1;
1130 : else
1131 : n = 0;
1132 4 : if (n > 0) {
1133 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1134 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1135 : /* not very efficient, but
1136 : * first see how much this is
1137 : * used */
1138 0 : for (BUN i = 0; i < n; i++) {
1139 0 : mskSetVal(b, pos + i,
1140 0 : mskGetVal(b, opos + i));
1141 : }
1142 0 : pos += n;
1143 : } else {
1144 2 : n *= b->twidth;
1145 2 : memmove(p,
1146 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1147 : n);
1148 2 : p += n;
1149 : }
1150 : s = o;
1151 : }
1152 : }
1153 2 : bat_iterator_end(&di);
1154 2 : MT_lock_set(&b->theaplock);
1155 2 : b->theap->dirty = true;
1156 2 : b->batCount -= nd;
1157 : }
1158 4 : if (b->batCount <= 1) {
1159 : /* some trivial properties */
1160 2 : b->tkey = true;
1161 2 : b->tsorted = b->trevsorted = true;
1162 2 : if (b->batCount == 0) {
1163 2 : b->tnil = false;
1164 2 : b->tnonil = true;
1165 : }
1166 : }
1167 : /* not sure about these anymore */
1168 4 : b->tnosorted = b->tnorevsorted = 0;
1169 4 : b->tnokey[0] = b->tnokey[1] = 0;
1170 4 : b->tminpos = BUN_NONE;
1171 4 : b->tmaxpos = BUN_NONE;
1172 4 : b->tunique_est = 0.0;
1173 4 : MT_lock_unset(&b->theaplock);
1174 :
1175 4 : return GDK_SUCCEED;
1176 : }
1177 :
1178 : /*
1179 : * Replace all values in b with values from n whose location is given by
1180 : * the oid in either p or positions.
1181 : * If positions is used, autoincr specifies whether it is the first of a
1182 : * dense range of positions or whether it is a full-blown array of
1183 : * position.
1184 : * If mayappend is set, the position in p/positions may refer to
1185 : * locations beyond the end of b.
1186 : */
1187 : static gdk_return
1188 75234 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1189 : bool mayappend, bool autoincr, bool force)
1190 : {
1191 75234 : lng t0 = GDKusec();
1192 75228 : oid pos = oid_nil;
1193 75228 : BUN nunique = 0;
1194 :
1195 75228 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1196 : return GDK_SUCCEED;
1197 : }
1198 : /* either p or positions */
1199 75228 : assert((p == NULL) != (positions == NULL));
1200 75228 : if (p != NULL) {
1201 75052 : if (BATcount(p) != BATcount(n)) {
1202 0 : GDKerror("update BATs not the same size\n");
1203 0 : return GDK_FAIL;
1204 : }
1205 75052 : if (ATOMtype(p->ttype) != TYPE_oid) {
1206 0 : GDKerror("positions BAT not type OID\n");
1207 0 : return GDK_FAIL;
1208 : }
1209 75052 : if (BATtdense(p)) {
1210 67043 : pos = p->tseqbase;
1211 67043 : positions = &pos;
1212 67043 : autoincr = true;
1213 67043 : p = NULL;
1214 8009 : } else if (p->ttype != TYPE_void) {
1215 8007 : positions = (const oid *) Tloc(p, 0);
1216 8007 : autoincr = false;
1217 : } else {
1218 : autoincr = false;
1219 : }
1220 176 : } else if (autoincr) {
1221 176 : pos = *positions;
1222 : }
1223 75228 : if (BATcount(n) == 0) {
1224 : return GDK_SUCCEED;
1225 : }
1226 :
1227 24000 : BATiter ni = bat_iterator(n);
1228 :
1229 24004 : OIDXdestroy(b);
1230 24004 : IMPSdestroy(b);
1231 24004 : STRMPdestroy(b);
1232 24004 : RTREEdestroy(b);
1233 : /* load hash so that we can maintain it */
1234 24003 : (void) BATcheckhash(b);
1235 :
1236 24003 : MT_lock_set(&b->theaplock);
1237 24003 : if (!force && (b->batRestricted != BAT_WRITE ||
1238 15 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1239 0 : MT_lock_unset(&b->theaplock);
1240 0 : bat_iterator_end(&ni);
1241 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1242 0 : return GDK_FAIL;
1243 : }
1244 24003 : BATiter bi = bat_iterator_nolock(b);
1245 24003 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1246 23413 : b->tunique_est = 0;
1247 : }
1248 :
1249 24003 : b->tsorted = b->trevsorted = false;
1250 24003 : b->tnosorted = b->tnorevsorted = 0;
1251 24003 : b->tseqbase = oid_nil;
1252 24003 : b->tkey = false;
1253 24003 : b->tnokey[0] = b->tnokey[1] = 0;
1254 :
1255 24003 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1256 24003 : const void *nil = ATOMnilptr(b->ttype);
1257 24003 : oid hseqend = b->hseqbase + BATcount(b);
1258 :
1259 24003 : MT_lock_unset(&b->theaplock);
1260 :
1261 24004 : bool anynil = false;
1262 24004 : bool locked = false;
1263 :
1264 24004 : if (b->tvheap) {
1265 1163544 : for (BUN i = 0; i < ni.count; i++) {
1266 1161074 : oid updid;
1267 1161074 : if (positions) {
1268 1160061 : updid = autoincr ? pos++ : *positions++;
1269 : } else {
1270 1013 : updid = BUNtoid(p, i);
1271 : }
1272 :
1273 1161074 : if (updid < b->hseqbase ||
1274 1161074 : (!mayappend && updid >= hseqend)) {
1275 0 : GDKerror("id out of range\n");
1276 0 : goto bailout;
1277 : }
1278 1161074 : updid -= b->hseqbase;
1279 1161074 : if (!force && updid < b->batInserted) {
1280 0 : GDKerror("updating committed value\n");
1281 0 : goto bailout;
1282 : }
1283 :
1284 1161074 : const void *new = BUNtvar(ni, i);
1285 :
1286 1158853 : if (updid >= BATcount(b)) {
1287 29450 : assert(mayappend);
1288 29450 : if (locked) {
1289 8 : MT_rwlock_wrunlock(&b->thashlock);
1290 8 : locked = false;
1291 : }
1292 29450 : if (b->tminpos != bi.minpos ||
1293 29449 : b->tmaxpos != bi.maxpos) {
1294 1 : MT_lock_set(&b->theaplock);
1295 1 : b->tminpos = bi.minpos;
1296 1 : b->tmaxpos = bi.maxpos;
1297 1 : MT_lock_unset(&b->theaplock);
1298 : }
1299 29450 : if (BATcount(b) < updid &&
1300 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1301 0 : bat_iterator_end(&ni);
1302 0 : return GDK_FAIL;
1303 : }
1304 29450 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1305 0 : bat_iterator_end(&ni);
1306 0 : return GDK_FAIL;
1307 : }
1308 29450 : bi = bat_iterator_nolock(b);
1309 107992 : continue;
1310 : }
1311 :
1312 : /* it is possible that a previous run was killed
1313 : * after an update (with a mmapped tail file)
1314 : * but before that was committed, then the
1315 : * offset may point outside of the vheap */
1316 1129403 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1317 :
1318 1129275 : if (old && atomcmp(old, new) == 0) {
1319 : /* replacing with the same value:
1320 : * nothing to do */
1321 78542 : continue;
1322 : }
1323 :
1324 1049944 : bool isnil = atomcmp(new, nil) == 0;
1325 1052173 : anynil |= isnil;
1326 1052173 : MT_lock_set(&b->theaplock);
1327 1052716 : if (old == NULL ||
1328 1052716 : (b->tnil &&
1329 573 : !anynil &&
1330 573 : atomcmp(old, nil) == 0)) {
1331 : /* if old value is nil and no new
1332 : * value is, we're not sure anymore
1333 : * about the nil property, so we must
1334 : * clear it */
1335 573 : b->tnil = false;
1336 : }
1337 1052716 : b->tnonil &= !isnil;
1338 1052716 : b->tnil |= isnil;
1339 1052716 : MT_lock_unset(&b->theaplock);
1340 1052972 : if (bi.maxpos != BUN_NONE) {
1341 202 : if (!isnil &&
1342 101 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1343 : /* new value is larger than
1344 : * previous largest */
1345 22 : bi.maxpos = updid;
1346 158 : } else if (old == NULL ||
1347 89 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1348 10 : atomcmp(new, old) != 0)) {
1349 : /* old value is equal to
1350 : * largest and new value is
1351 : * smaller, so we don't know
1352 : * anymore which is the
1353 : * largest */
1354 10 : bi.maxpos = BUN_NONE;
1355 : }
1356 : }
1357 1052972 : if (bi.minpos != BUN_NONE) {
1358 188 : if (!isnil &&
1359 94 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1360 : /* new value is smaller than
1361 : * previous smallest */
1362 12 : bi.minpos = updid;
1363 164 : } else if (old == NULL ||
1364 104 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1365 22 : atomcmp(new, old) != 0)) {
1366 : /* old value is equal to
1367 : * smallest and new value is
1368 : * larger, so we don't know
1369 : * anymore which is the
1370 : * smallest */
1371 22 : bi.minpos = BUN_NONE;
1372 : }
1373 : }
1374 1052972 : if (!locked) {
1375 2033 : MT_rwlock_wrlock(&b->thashlock);
1376 2033 : locked = true;
1377 : }
1378 1052972 : if (old)
1379 1052972 : HASHdelete_locked(&bi, updid, old);
1380 0 : else if (b->thash) {
1381 0 : doHASHdestroy(b, b->thash);
1382 0 : b->thash = NULL;
1383 : }
1384 :
1385 1052972 : var_t d;
1386 1052972 : switch (b->twidth) {
1387 1040981 : case 1:
1388 1040981 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1389 1040981 : break;
1390 8508 : case 2:
1391 8508 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1392 8508 : break;
1393 3448 : case 4:
1394 3448 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1395 3448 : break;
1396 : #if SIZEOF_VAR_T == 8
1397 35 : case 8:
1398 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1399 35 : break;
1400 : #endif
1401 : default:
1402 0 : MT_UNREACHABLE();
1403 : }
1404 1052972 : MT_lock_set(&b->theaplock);
1405 1052707 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1406 1052720 : MT_lock_unset(&b->theaplock);
1407 1053107 : if (rc != GDK_SUCCEED) {
1408 0 : goto bailout;
1409 : }
1410 1053107 : if (b->twidth < SIZEOF_VAR_T &&
1411 1052872 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1412 : /* doesn't fit in current heap, upgrade it */
1413 20 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1414 0 : goto bailout;
1415 : }
1416 : }
1417 : /* in case ATOMreplaceVAR and/or
1418 : * GDKupgradevarheap replaces a heap, we need to
1419 : * reinitialize the iterator */
1420 : {
1421 : /* save and restore minpos/maxpos */
1422 1053107 : BUN minpos = bi.minpos;
1423 1053107 : BUN maxpos = bi.maxpos;
1424 1053107 : bi = bat_iterator_nolock(b);
1425 1053107 : bi.minpos = minpos;
1426 1053107 : bi.maxpos = maxpos;
1427 : }
1428 1053107 : switch (b->twidth) {
1429 1041098 : case 1:
1430 1041098 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1431 1041098 : break;
1432 8524 : case 2:
1433 8524 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1434 8524 : break;
1435 3450 : case 4:
1436 3450 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1437 3450 : break;
1438 : #if SIZEOF_VAR_T == 8
1439 35 : case 8:
1440 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1441 35 : break;
1442 : #endif
1443 : default:
1444 0 : MT_UNREACHABLE();
1445 : }
1446 1053107 : HASHinsert_locked(&bi, updid, new);
1447 :
1448 : }
1449 2470 : if (locked) {
1450 2025 : if (b->thash)
1451 2 : nunique = b->thash->nunique;
1452 2025 : MT_rwlock_wrunlock(&b->thashlock);
1453 2025 : locked = false;
1454 : }
1455 2470 : MT_lock_set(&b->theaplock);
1456 2470 : b->tvheap->dirty = true;
1457 2470 : MT_lock_unset(&b->theaplock);
1458 21533 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1459 0 : assert(b->thash == NULL);
1460 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1461 0 : for (BUN i = 0; i < ni.count; i++) {
1462 0 : oid updid;
1463 0 : if (positions) {
1464 0 : updid = autoincr ? pos++ : *positions++;
1465 : } else {
1466 0 : updid = BUNtoid(p, i);
1467 : }
1468 :
1469 0 : if (updid < b->hseqbase ||
1470 0 : (!mayappend && updid >= hseqend)) {
1471 0 : GDKerror("id out of range\n");
1472 0 : bat_iterator_end(&ni);
1473 0 : return GDK_FAIL;
1474 : }
1475 0 : updid -= b->hseqbase;
1476 0 : if (!force && updid < b->batInserted) {
1477 0 : GDKerror("updating committed value\n");
1478 0 : bat_iterator_end(&ni);
1479 0 : return GDK_FAIL;
1480 : }
1481 0 : if (updid >= BATcount(b)) {
1482 0 : assert(mayappend);
1483 0 : if (BATcount(b) < updid &&
1484 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1485 0 : bat_iterator_end(&ni);
1486 0 : return GDK_FAIL;
1487 : }
1488 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1489 0 : bat_iterator_end(&ni);
1490 0 : return GDK_FAIL;
1491 : }
1492 0 : continue;
1493 : }
1494 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1495 : }
1496 0 : bi = bat_iterator_nolock(b);
1497 21533 : } else if (autoincr) {
1498 13741 : if (pos < b->hseqbase ||
1499 12841 : (!mayappend && pos + ni.count > hseqend)) {
1500 0 : GDKerror("id out of range\n");
1501 0 : bat_iterator_end(&ni);
1502 0 : return GDK_FAIL;
1503 : }
1504 13741 : pos -= b->hseqbase;
1505 13741 : if (!force && pos < b->batInserted) {
1506 0 : GDKerror("updating committed value\n");
1507 0 : bat_iterator_end(&ni);
1508 0 : return GDK_FAIL;
1509 : }
1510 :
1511 13741 : if (pos >= BATcount(b)) {
1512 526 : assert(mayappend);
1513 526 : bat_iterator_end(&ni);
1514 526 : if (BATcount(b) < pos &&
1515 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1516 : return GDK_FAIL;
1517 : }
1518 526 : return BATappend(b, n, NULL, force);
1519 : }
1520 13221 : if (pos + ni.count > BATcount(b) &&
1521 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1522 0 : bat_iterator_end(&ni);
1523 0 : return GDK_FAIL;
1524 : }
1525 13215 : bi = bat_iterator_nolock(b);
1526 :
1527 : /* we copy all of n, so if there are nils in n we get
1528 : * nils in b (and else we don't know) */
1529 13215 : b->tnil = ni.nil;
1530 : /* we may not copy over all of b, so we only know that
1531 : * there are no nils in b afterward if there weren't
1532 : * any in either b or n to begin with */
1533 13215 : b->tnonil &= ni.nonil;
1534 : /* if there is no hash, we don't start the loop, if
1535 : * there is only a persisted hash, it will get destroyed
1536 : * in the first iteration, after which there is no hash
1537 : * and the loop ends */
1538 13215 : MT_rwlock_wrlock(&b->thashlock);
1539 13215 : locked = true;
1540 13215 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1541 0 : HASHdelete_locked(&bi, i, Tloc(b, i));
1542 13215 : if (ni.type == TYPE_void) {
1543 0 : assert(b->ttype == TYPE_oid);
1544 0 : oid *o = Tloc(b, pos);
1545 0 : if (is_oid_nil(ni.tseq)) {
1546 : /* we may or may not overwrite the old
1547 : * min/max values */
1548 0 : bi.minpos = BUN_NONE;
1549 0 : bi.maxpos = BUN_NONE;
1550 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1551 0 : o[i] = oid_nil;
1552 0 : b->tnil = true;
1553 : } else {
1554 0 : oid v = ni.tseq;
1555 : /* we know min/max of n, so we know
1556 : * the new min/max of b if those of n
1557 : * are smaller/larger than the old */
1558 0 : if (bi.minpos != BUN_NONE) {
1559 0 : if (v <= BUNtoid(b, bi.minpos))
1560 0 : bi.minpos = pos;
1561 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1562 0 : bi.minpos = BUN_NONE;
1563 : }
1564 0 : if (complex_cand(n)) {
1565 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1566 0 : o[i] = *(oid *)Tpos(&ni, i);
1567 : /* last value */
1568 0 : v = o[ni.count - 1];
1569 : } else {
1570 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1571 0 : o[i] = v++;
1572 : /* last value added (not one beyond) */
1573 0 : v--;
1574 : }
1575 0 : if (bi.maxpos != BUN_NONE) {
1576 0 : if (v >= BUNtoid(b, bi.maxpos))
1577 0 : bi.maxpos = pos + ni.count - 1;
1578 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1579 0 : bi.maxpos = BUN_NONE;
1580 : }
1581 : }
1582 : } else {
1583 : /* if the extremes of n are at least as
1584 : * extreme as those of b, we can replace b's
1585 : * min/max, else we don't know what b's new
1586 : * min/max are*/
1587 13512 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1588 297 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1589 181 : bi.minpos = pos + ni.minpos;
1590 : } else {
1591 13034 : bi.minpos = BUN_NONE;
1592 : }
1593 13506 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1594 291 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1595 201 : bi.maxpos = pos + ni.maxpos;
1596 : } else {
1597 13014 : bi.maxpos = BUN_NONE;
1598 : }
1599 13215 : memcpy(Tloc(b, pos), ni.base,
1600 13215 : ni.count << b->tshift);
1601 : }
1602 : /* either we have a hash that was updated above, or we
1603 : * have no hash; we cannot have the case where there is
1604 : * only a persisted (unloaded) hash since it would have
1605 : * been destroyed above */
1606 13215 : if (b->thash != NULL) {
1607 1 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1608 1 : HASHinsert_locked(&bi, i, Tloc(b, i));
1609 0 : if (b->thash)
1610 0 : nunique = b->thash->nunique;
1611 : }
1612 13214 : MT_rwlock_wrunlock(&b->thashlock);
1613 13214 : locked = false;
1614 13214 : if (ni.count == BATcount(b)) {
1615 : /* if we replaced all values of b by values
1616 : * from n, we can also copy the min/max
1617 : * properties */
1618 5596 : bi.minpos = ni.minpos;
1619 5596 : bi.maxpos = ni.maxpos;
1620 5596 : if (BATtdensebi(&ni)) {
1621 : /* replaced all of b with a dense sequence */
1622 45 : MT_lock_set(&b->theaplock);
1623 45 : BATtseqbase(b, ni.tseq);
1624 45 : MT_lock_unset(&b->theaplock);
1625 : }
1626 : }
1627 : } else {
1628 157716451 : for (BUN i = 0; i < ni.count; i++) {
1629 157708659 : oid updid;
1630 157708659 : if (positions) {
1631 : /* assert(!autoincr) */
1632 157708659 : updid = *positions++;
1633 : } else {
1634 0 : updid = BUNtoid(p, i);
1635 : }
1636 :
1637 157708659 : if (updid < b->hseqbase ||
1638 157708659 : (!mayappend && updid >= hseqend)) {
1639 0 : GDKerror("id out of range\n");
1640 0 : goto bailout;
1641 : }
1642 157708659 : updid -= b->hseqbase;
1643 157708659 : if (!force && updid < b->batInserted) {
1644 0 : GDKerror("updating committed value\n");
1645 0 : goto bailout;
1646 : }
1647 :
1648 157708659 : const void *new = BUNtloc(ni, i);
1649 :
1650 157708659 : if (updid >= BATcount(b)) {
1651 17427 : assert(mayappend);
1652 17427 : if (locked) {
1653 22 : MT_rwlock_wrunlock(&b->thashlock);
1654 22 : locked = false;
1655 : }
1656 17427 : if (b->tminpos != bi.minpos ||
1657 17425 : b->tmaxpos != bi.maxpos) {
1658 3 : MT_lock_set(&b->theaplock);
1659 3 : b->tminpos = bi.minpos;
1660 3 : b->tmaxpos = bi.maxpos;
1661 3 : MT_lock_unset(&b->theaplock);
1662 : }
1663 17427 : if (BATcount(b) < updid &&
1664 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1665 0 : goto bailout;
1666 : }
1667 17427 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1668 0 : bat_iterator_end(&ni);
1669 0 : return GDK_FAIL;
1670 : }
1671 17426 : bi = bat_iterator_nolock(b);
1672 17426 : continue;
1673 : }
1674 :
1675 157691232 : const void *old = BUNtloc(bi, updid);
1676 157691232 : bool isnil = atomcmp(new, nil) == 0;
1677 157698938 : anynil |= isnil;
1678 157698938 : if (b->tnil &&
1679 1900 : !anynil &&
1680 1900 : atomcmp(old, nil) == 0) {
1681 : /* if old value is nil and no new
1682 : * value is, we're not sure anymore
1683 : * about the nil property, so we must
1684 : * clear it */
1685 1900 : b->tnil = false;
1686 : }
1687 157698938 : b->tnonil &= !isnil;
1688 157698938 : b->tnil |= isnil;
1689 157698938 : if (bi.maxpos != BUN_NONE) {
1690 48 : if (!isnil &&
1691 22 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1692 : /* new value is larger than
1693 : * previous largest */
1694 1 : bi.maxpos = updid;
1695 32 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1696 7 : atomcmp(new, old) != 0) {
1697 : /* old value is equal to
1698 : * largest and new value is
1699 : * smaller, so we don't know
1700 : * anymore which is the
1701 : * largest */
1702 7 : bi.maxpos = BUN_NONE;
1703 : }
1704 : }
1705 157698938 : if (bi.minpos != BUN_NONE) {
1706 52 : if (!isnil &&
1707 24 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1708 : /* new value is smaller than
1709 : * previous smallest */
1710 6 : bi.minpos = updid;
1711 28 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1712 6 : atomcmp(new, old) != 0) {
1713 : /* old value is equal to
1714 : * smallest and new value is
1715 : * larger, so we don't know
1716 : * anymore which is the
1717 : * smallest */
1718 6 : bi.minpos = BUN_NONE;
1719 : }
1720 : }
1721 :
1722 157698938 : if (!locked) {
1723 7792 : MT_rwlock_wrlock(&b->thashlock);
1724 7792 : locked = true;
1725 : }
1726 157698938 : HASHdelete_locked(&bi, updid, old);
1727 157903522 : switch (b->twidth) {
1728 30394631 : case 1:
1729 30394631 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1730 30394631 : break;
1731 527268 : case 2:
1732 527268 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1733 527268 : break;
1734 22063353 : case 4:
1735 22063353 : ((int *) b->theap->base)[updid] = * (int *) new;
1736 22063353 : break;
1737 99286132 : case 8:
1738 99286132 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1739 99286132 : break;
1740 5632138 : case 16:
1741 : #ifdef HAVE_HGE
1742 5632138 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1743 : #else
1744 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1745 : #endif
1746 5632138 : break;
1747 0 : default:
1748 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1749 0 : break;
1750 : }
1751 157903522 : HASHinsert_locked(&bi, updid, new);
1752 : }
1753 7792 : if (locked) {
1754 7770 : if (b->thash)
1755 1 : nunique = b->thash->nunique;
1756 7770 : MT_rwlock_wrunlock(&b->thashlock);
1757 7770 : locked = false;
1758 : }
1759 : }
1760 23477 : bat_iterator_end(&ni);
1761 23477 : MT_lock_set(&b->theaplock);
1762 23475 : if (nunique != 0)
1763 3 : b->tunique_est = (double) nunique;
1764 23475 : b->tminpos = bi.minpos;
1765 23475 : b->tmaxpos = bi.maxpos;
1766 23475 : b->theap->dirty = true;
1767 23475 : MT_lock_unset(&b->theaplock);
1768 23477 : TRC_DEBUG(ALGO,
1769 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1770 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1771 : GDKusec() - t0);
1772 : return GDK_SUCCEED;
1773 :
1774 0 : bailout:
1775 0 : bat_iterator_end(&ni);
1776 0 : if (locked) {
1777 0 : Hash *h = b->thash;
1778 0 : b->thash = NULL;
1779 0 : MT_rwlock_wrunlock(&b->thashlock);
1780 0 : doHASHdestroy(b, h);
1781 : }
1782 : return GDK_FAIL;
1783 : }
1784 :
1785 : /* replace values from b at locations specified in p with values in n */
1786 : gdk_return
1787 73915 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1788 : {
1789 73915 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1790 : }
1791 :
1792 : /* like BATreplace, but p may specify locations beyond the end of b */
1793 : gdk_return
1794 1143 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1795 : {
1796 1143 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1797 : }
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid values */
1800 : gdk_return
1801 0 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1802 : {
1803 0 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1804 : }
1805 :
1806 : /* like BATreplace, but the positions are given by an array of oid
1807 : * values, and they may specify locations beyond the end of b */
1808 : gdk_return
1809 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1810 : {
1811 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1812 : }
1813 :
1814 : /*
1815 : * BAT Selections
1816 : * The BAT selectors are among the most heavily used operators.
1817 : * Their efficient implementation is therefore mandatory.
1818 : *
1819 : * BAT slice
1820 : * This function returns a horizontal slice from a BAT. It optimizes
1821 : * execution by avoiding to copy when the BAT is memory mapped (in
1822 : * this case, an independent submap is created) or else when it is
1823 : * read-only, then a VIEW bat is created as a result.
1824 : *
1825 : * If a new copy has to be created, this function takes care to
1826 : * preserve void-columns (in this case, the seqbase has to be
1827 : * recomputed in the result).
1828 : *
1829 : * The selected range is excluding the high value.
1830 : */
1831 : BAT *
1832 6835555 : BATslice(BAT *b, BUN l, BUN h)
1833 : {
1834 6835555 : BUN low = l;
1835 6835555 : BAT *bn = NULL;
1836 6835555 : BATiter bni;
1837 6835555 : oid foid; /* first oid value if oid column */
1838 :
1839 6835555 : BATcheck(b, NULL);
1840 6835555 : BATiter bi = bat_iterator(b);
1841 6836260 : if (l > bi.count)
1842 : l = bi.count;
1843 6836260 : if (h > bi.count)
1844 : h = bi.count;
1845 6836260 : if (h < l)
1846 : h = l;
1847 :
1848 6836260 : if (complex_cand(b)) {
1849 : /* slicing a candidate list with exceptions */
1850 78 : struct canditer ci;
1851 78 : canditer_init(&ci, NULL, b);
1852 78 : if (b->hseqbase + l >= ci.hseq) {
1853 78 : l = b->hseqbase + l - ci.hseq;
1854 78 : h = b->hseqbase + h - ci.hseq;
1855 : } else {
1856 0 : l = 0;
1857 0 : if (b->hseqbase + h >= ci.hseq)
1858 0 : h = b->hseqbase + h - ci.hseq;
1859 : else
1860 : h = 0;
1861 : }
1862 78 : bn = canditer_slice(&ci, l, h);
1863 78 : goto doreturn;
1864 : }
1865 : /* If the source BAT is readonly, then we can obtain a VIEW
1866 : * that just reuses the memory of the source. */
1867 6836182 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1868 : /* forget about slices for bit masks: we can't deal
1869 : * with difference in alignment, so we'll just make a
1870 : * copy */
1871 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1872 : /* we use BATappend with a candidate list to easily
1873 : * copy the part of b that we need */
1874 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1875 0 : if (bn == NULL ||
1876 0 : s == NULL ||
1877 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1878 0 : BBPreclaim(bn);
1879 0 : BBPreclaim(s);
1880 0 : bn = NULL;
1881 0 : goto doreturn;
1882 : }
1883 0 : BBPunfix(s->batCacheid);
1884 0 : goto doreturn;
1885 : }
1886 6836182 : restrict_t prestricted;
1887 7900612 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1888 1064279 : BAT *pb = BBP_desc(VIEWtparent(b));
1889 1064279 : MT_lock_set(&pb->theaplock);
1890 1064156 : prestricted = pb->batRestricted;
1891 1064156 : MT_lock_unset(&pb->theaplock);
1892 : } else {
1893 : prestricted = BAT_WRITE; /* just initialize with anything */
1894 : }
1895 6836333 : if (bi.restricted == BAT_READ &&
1896 6796786 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1897 6796787 : bn = VIEWcreate(b->hseqbase + low, b);
1898 6795442 : if (bn == NULL)
1899 0 : goto doreturn;
1900 6795442 : VIEWboundsbi(&bi, bn, l, h);
1901 : } else {
1902 : /* create a new BAT and put everything into it */
1903 39546 : BUN p = l;
1904 39546 : BUN q = h;
1905 :
1906 39546 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1907 39547 : if (bn == NULL)
1908 0 : goto doreturn;
1909 :
1910 39547 : if (bn->ttype == TYPE_void) {
1911 21167 : BATsetcount(bn, h - l);
1912 18380 : } else if (bn->tvheap == NULL &&
1913 11682 : BATatoms[bn->ttype].atomFix == NULL) {
1914 11681 : assert(BATatoms[bn->ttype].atomPut == NULL);
1915 11681 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1916 11681 : (q - p) << bn->tshift);
1917 11681 : bn->theap->dirty = true;
1918 11681 : BATsetcount(bn, h - l);
1919 : } else {
1920 1734550 : for (; p < q; p++) {
1921 1727851 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1922 0 : BBPreclaim(bn);
1923 0 : bn = NULL;
1924 0 : goto doreturn;
1925 : }
1926 : }
1927 : }
1928 39546 : bn->theap->dirty = true;
1929 39546 : bn->tsorted = bi.sorted;
1930 39546 : bn->trevsorted = bi.revsorted;
1931 39546 : bn->tkey = bi.key;
1932 39546 : bn->tnonil = bi.nonil;
1933 39546 : if (bi.nosorted > l && bi.nosorted < h)
1934 3920 : bn->tnosorted = bi.nosorted - l;
1935 : else
1936 35626 : bn->tnosorted = 0;
1937 39546 : if (bi.norevsorted > l && bi.norevsorted < h)
1938 8478 : bn->tnorevsorted = bi.norevsorted - l;
1939 : else
1940 31068 : bn->tnorevsorted = 0;
1941 39546 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1942 32566 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1943 : bi.nokey[0] != bi.nokey[1]) {
1944 552 : bn->tnokey[0] = bi.nokey[0] - l;
1945 552 : bn->tnokey[1] = bi.nokey[1] - l;
1946 : } else {
1947 38994 : bn->tnokey[0] = bn->tnokey[1] = 0;
1948 : }
1949 : }
1950 6833949 : bn->tnonil = bi.nonil || bn->batCount == 0;
1951 6833949 : bn->tnil = false; /* we just don't know */
1952 6833949 : bn->tnosorted = 0;
1953 6833949 : bn->tnokey[0] = bn->tnokey[1] = 0;
1954 6833949 : bni = bat_iterator_nolock(bn);
1955 6833949 : if (BATtdensebi(&bi)) {
1956 25087 : BATtseqbase(bn, (oid) (bi.tseq + low));
1957 6808862 : } else if (bn->ttype == TYPE_oid) {
1958 14458 : if (BATcount(bn) == 0) {
1959 10 : BATtseqbase(bn, 0);
1960 14448 : } else if (!is_oid_nil((foid = *(oid *) BUNtloc(bni, 0))) &&
1961 12493 : (BATcount(bn) == 1 ||
1962 12493 : (bn->tkey &&
1963 7264 : bn->tsorted &&
1964 7264 : foid + BATcount(bn) - 1 == *(oid *) BUNtloc(bni, BATcount(bn) - 1)))) {
1965 2320 : BATtseqbase(bn, foid);
1966 : }
1967 : }
1968 6836206 : if (bn->batCount <= 1) {
1969 454548 : bn->tsorted = ATOMlinear(b->ttype);
1970 454548 : bn->trevsorted = ATOMlinear(b->ttype);
1971 454548 : BATkey(bn, true);
1972 : } else {
1973 6381658 : bn->tsorted = bi.sorted;
1974 6381658 : bn->trevsorted = bi.revsorted;
1975 6381658 : BATkey(bn, bi.key);
1976 : }
1977 6835623 : doreturn:
1978 6835623 : bat_iterator_end(&bi);
1979 6835103 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1980 : ALGOOPTBATFMT "\n",
1981 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1982 : return bn;
1983 : }
1984 :
1985 : #define BAT_ORDERED(TPE) \
1986 : do { \
1987 : const TPE *restrict vals = Tloc(b, 0); \
1988 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1989 : if (vals[p - 1] > vals[p]) { \
1990 : b->tnosorted = p; \
1991 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1992 : goto doreturn; \
1993 : } else if (vals[p - 1] < vals[p]) { \
1994 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1995 : b->tnorevsorted = p; \
1996 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1997 : } \
1998 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1999 : b->tnokey[0] = p - 1; \
2000 : b->tnokey[1] = p; \
2001 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
2002 : } \
2003 : } \
2004 : } while (0)
2005 :
2006 : #define BAT_ORDERED_FP(TPE) \
2007 : do { \
2008 : const TPE *restrict vals = Tloc(b, 0); \
2009 : TPE prev = vals[0]; \
2010 : bool prevnil = is_##TPE##_nil(prev); \
2011 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2012 : TPE next = vals[p]; \
2013 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
2014 : prev = next; \
2015 : if (cmp > 0) { \
2016 : b->tnosorted = bi.nosorted = p; \
2017 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2018 : goto doreturn; \
2019 : } else if (cmp < 0) { \
2020 : if (!b->trevsorted && b->tnorevsorted == 0) { \
2021 : b->tnorevsorted = bi.norevsorted = p; \
2022 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
2023 : } \
2024 : } else if (!b->tkey && b->tnokey[1] == 0) { \
2025 : b->tnokey[0] = bi.nokey[0] = p - 1; \
2026 : b->tnokey[1] = bi.nokey[1] = p; \
2027 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
2028 : } \
2029 : } \
2030 : } while (0)
2031 :
2032 : /* Return whether the BAT is ordered or not. If we don't know, invest
2033 : * in a scan and record the results in the bat descriptor. If during
2034 : * the scan we happen to find evidence that the BAT is not reverse
2035 : * sorted, we record the location. */
2036 : bool
2037 2989352 : BATordered(BAT *b)
2038 : {
2039 2989352 : lng t0 = GDKusec();
2040 2989354 : bool sorted;
2041 :
2042 2989354 : MT_lock_set(&b->theaplock);
2043 2989362 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2044 498532 : MT_lock_unset(&b->theaplock);
2045 498526 : return true;
2046 : }
2047 2490830 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2048 1101639 : MT_lock_unset(&b->theaplock);
2049 1101637 : return false;
2050 : }
2051 :
2052 : /* There are a few reasons why we need a lock here. It may be
2053 : * that multiple threads call this functions at the same time
2054 : * (happens a lot with mitosis/mergetable), but we only need to
2055 : * scan the bat in one thread: the others can reap the rewards
2056 : * when that one thread is done. Also, we need the heap to
2057 : * remain accessible (could have used bat_iterator for that),
2058 : * and, and this is the killer argument, we may need to make
2059 : * changes to the bat descriptor. */
2060 1389191 : BATiter bi = bat_iterator_nolock(b);
2061 1389191 : if (!b->tsorted && b->tnosorted == 0) {
2062 2715202 : switch (ATOMbasetype(b->ttype)) {
2063 60974 : case TYPE_bte:
2064 127552086 : BAT_ORDERED(bte);
2065 : break;
2066 10435 : case TYPE_sht:
2067 1677434 : BAT_ORDERED(sht);
2068 : break;
2069 1005260 : case TYPE_int:
2070 137273968 : BAT_ORDERED(int);
2071 : break;
2072 7045 : case TYPE_lng:
2073 60213570 : BAT_ORDERED(lng);
2074 : break;
2075 : #ifdef HAVE_HGE
2076 635 : case TYPE_hge:
2077 8095933 : BAT_ORDERED(hge);
2078 : break;
2079 : #endif
2080 980 : case TYPE_flt:
2081 8007207 : BAT_ORDERED_FP(flt);
2082 : break;
2083 668 : case TYPE_dbl:
2084 8285675 : BAT_ORDERED_FP(dbl);
2085 : break;
2086 : case TYPE_str:
2087 17657697 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 17654826 : int c;
2089 17654826 : const char *p1 = BUNtvar(bi, p - 1);
2090 17654766 : const char *p2 = BUNtvar(bi, p);
2091 17654821 : if (p1 == p2)
2092 : c = 0;
2093 2642669 : else if (p1[0] == '\200') {
2094 1341 : if (p2[0] == '\200')
2095 : c = 0;
2096 : else
2097 : c = -1;
2098 2641328 : } else if (p2[0] == '\200')
2099 : c = 1;
2100 : else
2101 2640430 : c = strcmp(p1, p2);
2102 2640430 : if (c > 0) {
2103 300138 : b->tnosorted = bi.nosorted = p;
2104 300138 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2105 300138 : goto doreturn;
2106 17354683 : } else if (c < 0) {
2107 2342500 : assert(!b->trevsorted);
2108 2342500 : if (b->tnorevsorted == 0) {
2109 8486 : b->tnorevsorted = bi.norevsorted = p;
2110 8486 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2111 : }
2112 15012183 : } else if (b->tnokey[1] == 0) {
2113 9246 : assert(!b->tkey);
2114 9246 : b->tnokey[0] = bi.nokey[0] = p - 1;
2115 9246 : b->tnokey[1] = bi.nokey[1] = p;
2116 17354683 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2117 : }
2118 : }
2119 : break;
2120 180 : default: {
2121 180 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2122 430 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2123 400 : int c;
2124 400 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2125 150 : b->tnosorted = bi.nosorted = p;
2126 150 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2127 150 : goto doreturn;
2128 250 : } else if (c < 0) {
2129 191 : if (!b->trevsorted && b->tnorevsorted == 0) {
2130 78 : b->tnorevsorted = bi.norevsorted = p;
2131 78 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2132 : }
2133 59 : } else if (!b->tkey && b->tnokey[1] == 0) {
2134 7 : b->tnokey[0] = bi.nokey[0] = p - 1;
2135 7 : b->tnokey[1] = bi.nokey[1] = p;
2136 250 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2137 : }
2138 : }
2139 : break;
2140 : }
2141 : }
2142 : /* we only get here if we completed the scan; note that
2143 : * if we didn't record evidence about *reverse*
2144 : * sortedness, we know that the BAT is also reverse
2145 : * sorted; similarly, if we didn't record evidence about
2146 : * keyness, we know the BAT is key */
2147 68981 : b->tsorted = bi.sorted = true;
2148 68981 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2149 68974 : if (!b->trevsorted && b->tnorevsorted == 0) {
2150 22835 : b->trevsorted = bi.revsorted = true;
2151 22835 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2152 : }
2153 68974 : if (!b->tkey && b->tnokey[1] == 0) {
2154 29741 : b->tkey = bi.key = true;
2155 29741 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2156 : }
2157 : }
2158 68974 : doreturn:
2159 1389178 : sorted = b->tsorted;
2160 1389178 : bat pbid = VIEWtparent(b);
2161 1389178 : MT_lock_unset(&b->theaplock);
2162 1389186 : if (pbid) {
2163 1253096 : BAT *pb = BBP_cache(pbid);
2164 1253096 : MT_lock_set(&pb->theaplock);
2165 1253085 : if (bi.count == BATcount(pb) &&
2166 1118848 : bi.h == pb->theap &&
2167 1118850 : bi.type == pb->ttype) {
2168 : /* add to knowledge in parent bat */
2169 1118849 : pb->tsorted |= bi.sorted;
2170 1118849 : if (pb->tnosorted == 0)
2171 187802 : pb->tnosorted = bi.nosorted;
2172 1118849 : pb->trevsorted |= bi.revsorted;
2173 1118849 : if (pb->tnorevsorted == 0)
2174 137547 : pb->tnorevsorted = bi.norevsorted;
2175 1118849 : pb->tkey |= bi.key;
2176 1118849 : if (pb->tnokey[1] == 0) {
2177 830520 : pb->tnokey[0] = bi.nokey[0];
2178 830520 : pb->tnokey[1] = bi.nokey[1];
2179 : }
2180 : }
2181 1253085 : MT_lock_unset(&pb->theaplock);
2182 : }
2183 : return sorted;
2184 : }
2185 :
2186 : #define BAT_REVORDERED(TPE) \
2187 : do { \
2188 : const TPE *restrict vals = Tloc(b, 0); \
2189 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2190 : if (vals[p - 1] < vals[p]) { \
2191 : b->tnorevsorted = p; \
2192 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2193 : goto doreturn; \
2194 : } \
2195 : } \
2196 : } while (0)
2197 :
2198 : #define BAT_REVORDERED_FP(TPE) \
2199 : do { \
2200 : const TPE *restrict vals = Tloc(b, 0); \
2201 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2202 : TPE prev = vals[p - 1], next = vals[p]; \
2203 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2204 : if (cmp < 0) { \
2205 : b->tnorevsorted = bi.norevsorted = p; \
2206 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2207 : goto doreturn; \
2208 : } \
2209 : } \
2210 : } while (0)
2211 :
2212 : /* Return whether the BAT is reverse ordered or not. If we don't
2213 : * know, invest in a scan and record the results in the bat
2214 : * descriptor. */
2215 : bool
2216 2813215 : BATordered_rev(BAT *b)
2217 : {
2218 2813215 : lng t0 = GDKusec();
2219 2813241 : bool revsorted;
2220 :
2221 2813241 : if (b == NULL || !ATOMlinear(b->ttype))
2222 : return false;
2223 2813217 : MT_lock_set(&b->theaplock);
2224 2813191 : if (BATcount(b) <= 1 || b->trevsorted) {
2225 252116 : MT_lock_unset(&b->theaplock);
2226 252115 : return true;
2227 : }
2228 2561075 : if (b->ttype == TYPE_void) {
2229 18054 : MT_lock_unset(&b->theaplock);
2230 18054 : return is_oid_nil(b->tseqbase);
2231 : }
2232 2543021 : if (BATtdense(b) || b->tnorevsorted > 0) {
2233 2479872 : MT_lock_unset(&b->theaplock);
2234 2479852 : return false;
2235 : }
2236 63149 : BATiter bi = bat_iterator_nolock(b);
2237 63149 : if (!b->trevsorted && b->tnorevsorted == 0) {
2238 101646 : switch (ATOMbasetype(b->ttype)) {
2239 22824 : case TYPE_bte:
2240 8826022 : BAT_REVORDERED(bte);
2241 : break;
2242 3304 : case TYPE_sht:
2243 3131794 : BAT_REVORDERED(sht);
2244 : break;
2245 21074 : case TYPE_int:
2246 1206173 : BAT_REVORDERED(int);
2247 : break;
2248 4118 : case TYPE_lng:
2249 2737635 : BAT_REVORDERED(lng);
2250 : break;
2251 : #ifdef HAVE_HGE
2252 602 : case TYPE_hge:
2253 401863 : BAT_REVORDERED(hge);
2254 : break;
2255 : #endif
2256 515 : case TYPE_flt:
2257 2015 : BAT_REVORDERED_FP(flt);
2258 : break;
2259 257 : case TYPE_dbl:
2260 100953 : BAT_REVORDERED_FP(dbl);
2261 : break;
2262 10455 : default: {
2263 10455 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2264 848022 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2265 845356 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2266 7789 : b->tnorevsorted = p;
2267 7789 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2268 7789 : goto doreturn;
2269 : }
2270 : }
2271 : break;
2272 : }
2273 : }
2274 8845 : b->trevsorted = bi.revsorted = true;
2275 8845 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2276 : }
2277 8845 : doreturn:
2278 63149 : revsorted = b->trevsorted;
2279 63149 : bat pbid = VIEWtparent(b);
2280 63149 : MT_lock_unset(&b->theaplock);
2281 63152 : if (pbid) {
2282 16412 : BAT *pb = BBP_cache(pbid);
2283 16412 : MT_lock_set(&pb->theaplock);
2284 16412 : if (bi.count == BATcount(pb) &&
2285 3353 : bi.h == pb->theap &&
2286 3353 : bi.type == pb->ttype) {
2287 : /* add to knowledge in parent bat */
2288 3353 : pb->trevsorted |= bi.revsorted;
2289 3353 : if (pb->tnorevsorted == 0)
2290 3352 : pb->tnorevsorted = bi.norevsorted;
2291 : }
2292 16412 : MT_lock_unset(&pb->theaplock);
2293 : }
2294 : return revsorted;
2295 : }
2296 :
2297 : /* figure out which sort function is to be called
2298 : * stable sort can produce an error (not enough memory available),
2299 : * "quick" sort does not produce errors */
2300 : static gdk_return
2301 3603633 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2302 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2303 : bool stable)
2304 : {
2305 3603633 : if (n <= 1) /* trivially sorted */
2306 : return GDK_SUCCEED;
2307 2745249 : if (stable) {
2308 184 : if (reverse)
2309 1 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2310 : else
2311 183 : return GDKssort(h, t, base, n, hs, ts, tpe);
2312 : } else {
2313 2745065 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2314 : }
2315 2745065 : return GDK_SUCCEED;
2316 : }
2317 :
2318 : /* Sort the bat b according to both o and g. The stable and reverse
2319 : * parameters indicate whether the sort should be stable or descending
2320 : * respectively. The parameter b is required, o and g are optional
2321 : * (i.e., they may be NULL).
2322 : *
2323 : * A sorted copy is returned through the sorted parameter, the new
2324 : * ordering is returned through the order parameter, group information
2325 : * is returned through the groups parameter. All three output
2326 : * parameters may be NULL. If they're all NULL, this function does
2327 : * nothing.
2328 : *
2329 : * If o is specified, it is used to first rearrange b according to the
2330 : * order specified in o, after which b is sorted taking g into
2331 : * account.
2332 : *
2333 : * If g is specified, it indicates groups which should be individually
2334 : * ordered. Each row of consecutive equal values in g indicates a
2335 : * group which is sorted according to stable and reverse. g is used
2336 : * after the order in b was rearranged according to o.
2337 : *
2338 : * The outputs order and groups can be used in subsequent calls to
2339 : * this function. This can be used if multiple BATs need to be sorted
2340 : * together. The BATs should then be sorted in order of significance,
2341 : * and each following call should use the original unordered BAT plus
2342 : * the order and groups bat from the previous call. In this case, the
2343 : * sorted BATs are not of much use, so the sorted output parameter
2344 : * does not need to be specified.
2345 : * Apart from error checking and maintaining reference counts, sorting
2346 : * three columns (col1, col2, col3) could look like this with the
2347 : * sorted results in (col1s, col2s, col3s):
2348 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2349 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2350 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2351 : * Note that the "reverse" parameter can be different for each call.
2352 : */
2353 : gdk_return
2354 28170 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2355 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2356 : {
2357 28170 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2358 28170 : BATiter pbi;
2359 28170 : oid *restrict grps, *restrict ords, prev;
2360 28170 : BUN p, q, r;
2361 28170 : lng t0 = GDKusec();
2362 28170 : bool mkorderidx, orderidxlock = false;
2363 28170 : Heap *oidxh = NULL;
2364 :
2365 : /* we haven't implemented NILs as largest value for stable
2366 : * sort, so NILs come first for ascending and last for
2367 : * descending */
2368 28170 : assert(!stable || reverse == nilslast);
2369 :
2370 28170 : if (b == NULL) {
2371 0 : GDKerror("b must exist\n");
2372 0 : return GDK_FAIL;
2373 : }
2374 28170 : if (stable && reverse != nilslast) {
2375 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2376 0 : return GDK_FAIL;
2377 : }
2378 28170 : if (!ATOMlinear(b->ttype)) {
2379 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2380 0 : return GDK_FAIL;
2381 : }
2382 28170 : MT_lock_set(&b->theaplock);
2383 28170 : if (b->ttype == TYPE_void) {
2384 169 : b->tsorted = true;
2385 266 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2386 0 : b->trevsorted = !b->trevsorted;
2387 : }
2388 338 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2389 0 : b->tkey = !b->tkey;
2390 : }
2391 28001 : } else if (b->batCount <= 1) {
2392 8376 : if (!b->tsorted || !b->trevsorted) {
2393 29 : b->tsorted = b->trevsorted = true;
2394 : }
2395 : }
2396 28170 : MT_lock_unset(&b->theaplock);
2397 28170 : if (o != NULL &&
2398 15171 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2399 15171 : BATcount(o) != BATcount(b) || /* same size as b */
2400 5751 : (o->ttype == TYPE_void && /* no nil tail */
2401 2330 : BATcount(o) != 0 &&
2402 2330 : is_oid_nil(o->tseqbase)))) {
2403 0 : GDKerror("o must have type oid and same size as b\n");
2404 0 : return GDK_FAIL;
2405 : }
2406 28170 : if (g != NULL &&
2407 15171 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2408 15171 : !g->tsorted || /* sorted */
2409 15171 : BATcount(g) != BATcount(b) || /* same size as b */
2410 4486 : (g->ttype == TYPE_void && /* no nil tail */
2411 2626 : BATcount(g) != 0 &&
2412 2626 : is_oid_nil(g->tseqbase)))) {
2413 0 : GDKerror("g must have type oid, sorted on the tail, "
2414 : "and same size as b\n");
2415 0 : return GDK_FAIL;
2416 : }
2417 28170 : if (sorted == NULL && order == NULL) {
2418 : /* no place to put result, so we're done quickly */
2419 0 : GDKerror("no place to put the result.\n");
2420 0 : return GDK_FAIL;
2421 : }
2422 28170 : if (g == NULL && !stable) {
2423 : /* pre-ordering doesn't make sense if we're not
2424 : * subsorting and the sort is not stable */
2425 12718 : o = NULL;
2426 : }
2427 28170 : if (b->tnonil) {
2428 : /* if there are no nils, placement of nils doesn't
2429 : * matter, so set nilslast such that ordered bits can
2430 : * be used */
2431 12372 : nilslast = reverse;
2432 : }
2433 28170 : pbi = bat_iterator(NULL);
2434 29202 : if (BATcount(b) <= 1 ||
2435 19586 : (reverse == nilslast &&
2436 : (reverse ? b->trevsorted : b->tsorted) &&
2437 5136 : o == NULL && g == NULL &&
2438 2442 : (groups == NULL || BATtkey(b) ||
2439 : (reverse ? b->tsorted : b->trevsorted)))) {
2440 : /* trivially (sub)sorted, and either we don't need to
2441 : * return group information, or we can trivially
2442 : * deduce the groups */
2443 10275 : if (sorted) {
2444 8854 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2445 8854 : if (bn == NULL)
2446 0 : goto error;
2447 8854 : *sorted = bn;
2448 : }
2449 10275 : if (order) {
2450 9370 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2451 9370 : if (on == NULL)
2452 0 : goto error;
2453 9370 : *order = on;
2454 : }
2455 10275 : if (groups) {
2456 5373 : if (BATtkey(b)) {
2457 : /* singleton groups */
2458 3386 : gn = BATdense(0, 0, BATcount(b));
2459 3386 : if (gn == NULL)
2460 0 : goto error;
2461 : } else {
2462 : /* single group */
2463 1987 : const oid *o = 0;
2464 1987 : assert(BATcount(b) == 1 ||
2465 : (b->tsorted && b->trevsorted));
2466 1987 : gn = BATconstant(0, TYPE_oid, &o, BATcount(b), TRANSIENT);
2467 1987 : if (gn == NULL)
2468 0 : goto error;
2469 : }
2470 5373 : *groups = gn;
2471 : }
2472 10275 : bat_iterator_end(&pbi);
2473 10275 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2474 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2475 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2476 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2477 : ALGOOPTBATFMT " -- trivial (" LLFMT
2478 : " usec)\n",
2479 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2480 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2481 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2482 : ALGOOPTBATPAR(on), GDKusec() - t0);
2483 10275 : return GDK_SUCCEED;
2484 : }
2485 17895 : if (VIEWtparent(b)) {
2486 3758 : pb = BATdescriptor(VIEWtparent(b));
2487 3758 : if (pb != NULL &&
2488 3758 : (b->tbaseoff != pb->tbaseoff ||
2489 2939 : BATcount(b) != BATcount(pb) ||
2490 2657 : b->hseqbase != pb->hseqbase ||
2491 2648 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2492 1110 : BBPunfix(pb->batCacheid);
2493 1110 : pb = NULL;
2494 : }
2495 : } else {
2496 : pb = b;
2497 : }
2498 17895 : bat_iterator_end(&pbi);
2499 17895 : pbi = bat_iterator(pb);
2500 : /* when we will create an order index if it doesn't already exist */
2501 17895 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2502 17895 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2503 5977 : (void) BATcheckorderidx(pb);
2504 5977 : MT_lock_set(&pb->batIdxLock);
2505 5977 : if (pb->torderidx) {
2506 59 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2507 : /* we can use the order index */
2508 59 : oidxh = pb->torderidx;
2509 59 : HEAPincref(oidxh);
2510 : }
2511 : mkorderidx = false;
2512 5918 : } else if (b != pb) {
2513 : /* don't build orderidx on parent bat */
2514 : mkorderidx = false;
2515 4971 : } else if (mkorderidx) {
2516 : /* keep lock when going to create */
2517 4441 : orderidxlock = true;
2518 : }
2519 5977 : if (!orderidxlock)
2520 1536 : MT_lock_unset(&pb->batIdxLock);
2521 : }
2522 17895 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2523 : /* there is an order index that we can use */
2524 59 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2525 59 : if (on == NULL)
2526 0 : goto error;
2527 59 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2528 59 : BATsetcount(on, BATcount(b));
2529 59 : HEAPdecref(oidxh, false);
2530 59 : oidxh = NULL;
2531 59 : on->tkey = true;
2532 59 : on->tnil = false;
2533 59 : on->tnonil = true;
2534 59 : on->tsorted = on->trevsorted = false;
2535 59 : on->tseqbase = oid_nil;
2536 59 : if (sorted || groups) {
2537 59 : bn = BATproject(on, b);
2538 59 : if (bn == NULL)
2539 0 : goto error;
2540 59 : bn->tsorted = true;
2541 59 : if (groups) {
2542 4 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2543 0 : goto error;
2544 4 : if (sorted &&
2545 4 : (*groups)->tkey &&
2546 : g == NULL) {
2547 : /* if new groups bat is key
2548 : * and since there is no input
2549 : * groups bat, we know the
2550 : * result bat is key */
2551 4 : bn->tkey = true;
2552 : }
2553 : }
2554 59 : if (sorted)
2555 59 : *sorted = bn;
2556 : else {
2557 0 : BBPunfix(bn->batCacheid);
2558 0 : bn = NULL;
2559 : }
2560 : }
2561 59 : if (order)
2562 9 : *order = on;
2563 : else {
2564 50 : BBPunfix(on->batCacheid);
2565 50 : on = NULL;
2566 : }
2567 59 : bat_iterator_end(&pbi);
2568 59 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2569 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2570 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2571 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2572 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2573 : " usec)\n",
2574 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2575 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2576 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2577 : ALGOOPTBATPAR(on), GDKusec() - t0);
2578 59 : if (pb != NULL && pb != b)
2579 53 : BBPunfix(pb->batCacheid);
2580 59 : return GDK_SUCCEED;
2581 11317 : } else if (oidxh) {
2582 0 : HEAPdecref(oidxh, false);
2583 0 : oidxh = NULL;
2584 : }
2585 17836 : if (o) {
2586 10764 : bn = BATproject(o, b);
2587 10764 : if (bn == NULL)
2588 0 : goto error;
2589 10764 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2590 4402 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2591 2201 : BBPunfix(bn->batCacheid);
2592 2201 : bn = b2;
2593 : }
2594 10764 : if (pb) {
2595 10276 : bat_iterator_end(&pbi);
2596 10276 : if (pb != b)
2597 1308 : BBPunfix(pb->batCacheid);
2598 10276 : pbi = bat_iterator(NULL);
2599 10276 : pb = NULL;
2600 : }
2601 : } else {
2602 7072 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2603 : }
2604 17836 : if (bn == NULL)
2605 0 : goto error;
2606 17836 : if (order) {
2607 : /* prepare order bat */
2608 15091 : if (o) {
2609 : /* make copy of input so that we can refine it;
2610 : * copy can be read-only if we take the shortcut
2611 : * below in the case g is "key" */
2612 9111 : on = COLcopy(o, TYPE_oid,
2613 9111 : g == NULL ||
2614 9111 : !(g->tkey || g->ttype == TYPE_void),
2615 : TRANSIENT);
2616 9111 : if (on == NULL)
2617 0 : goto error;
2618 9111 : BAThseqbase(on, b->hseqbase);
2619 9111 : on->tminpos = BUN_NONE;
2620 9111 : on->tmaxpos = BUN_NONE;
2621 : } else {
2622 : /* create new order */
2623 5980 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2624 5980 : if (on == NULL)
2625 0 : goto error;
2626 5980 : ords = (oid *) Tloc(on, 0);
2627 31613723 : for (p = 0, q = BATcount(bn); p < q; p++)
2628 31607743 : ords[p] = p + b->hseqbase;
2629 5980 : BATsetcount(on, BATcount(bn));
2630 5980 : on->tkey = true;
2631 5980 : on->tnil = false;
2632 5980 : on->tnonil = true;
2633 : }
2634 : /* COLcopy above can create TYPE_void */
2635 15091 : if (on->ttype != TYPE_void) {
2636 14362 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2637 14362 : on->tseqbase = oid_nil; /* and hence not dense */
2638 14362 : on->tnosorted = on->tnorevsorted = 0;
2639 : }
2640 15091 : *order = on;
2641 15091 : ords = (oid *) Tloc(on, 0);
2642 : } else {
2643 : ords = NULL;
2644 : }
2645 17836 : if (g) {
2646 10764 : if (g->tkey || g->ttype == TYPE_void) {
2647 : /* if g is "key", all groups are size 1, so no
2648 : * subsorting needed */
2649 4981 : if (sorted) {
2650 4569 : *sorted = bn;
2651 : } else {
2652 412 : BBPunfix(bn->batCacheid);
2653 412 : bn = NULL;
2654 : }
2655 4981 : if (order) {
2656 3802 : *order = on;
2657 3802 : if (o) {
2658 : /* we can inherit sortedness
2659 : * after all */
2660 3802 : on->tsorted = o->tsorted;
2661 3802 : on->trevsorted = o->trevsorted;
2662 3802 : if (o->tnosorted)
2663 53 : on->tnosorted = o->tnosorted;
2664 3802 : if (o->tnorevsorted)
2665 68 : on->tnorevsorted = o->tnorevsorted;
2666 : } else {
2667 : /* we didn't rearrange, so
2668 : * still sorted */
2669 0 : on->tsorted = true;
2670 0 : on->trevsorted = false;
2671 : }
2672 3802 : if (BATcount(on) <= 1) {
2673 0 : on->tsorted = true;
2674 0 : on->trevsorted = true;
2675 : }
2676 : }
2677 4981 : if (groups) {
2678 2891 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2679 2891 : if (gn == NULL)
2680 0 : goto error;
2681 2891 : *groups = gn;
2682 : }
2683 4981 : bat_iterator_end(&pbi);
2684 4981 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2685 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2686 : ",reverse=%d,nilslast=%d,stable=%d"
2687 : ") = (" ALGOOPTBATFMT ","
2688 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2689 : " -- key group (" LLFMT " usec)\n",
2690 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2691 : ALGOBATPAR(g), reverse, nilslast,
2692 : stable, ALGOOPTBATPAR(bn),
2693 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2694 : GDKusec() - t0);
2695 4981 : if (pb != NULL && pb != b)
2696 0 : BBPunfix(pb->batCacheid);
2697 4981 : return GDK_SUCCEED;
2698 : }
2699 5783 : assert(g->ttype == TYPE_oid);
2700 5783 : grps = (oid *) Tloc(g, 0);
2701 5783 : prev = grps[0];
2702 5783 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2703 0 : goto error;
2704 45070498 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2705 45064715 : if (grps[p] != prev) {
2706 : /* sub sort [r,p) */
2707 6879364 : if (do_sort(Tloc(bn, r),
2708 3287934 : ords ? ords + r : NULL,
2709 3591430 : bn->tvheap ? bn->tvheap->base : NULL,
2710 3591430 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2711 3591430 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2712 0 : goto error;
2713 3591430 : r = p;
2714 3591430 : prev = grps[p];
2715 : }
2716 : }
2717 : /* sub sort [r,q) */
2718 11092 : if (do_sort(Tloc(bn, r),
2719 5309 : ords ? ords + r : NULL,
2720 5783 : bn->tvheap ? bn->tvheap->base : NULL,
2721 5783 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2722 5783 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2723 0 : goto error;
2724 : /* if single group (r==0) the result is (rev)sorted,
2725 : * otherwise (maybe) not */
2726 5783 : bn->tsorted = r == 0 && !reverse && !nilslast;
2727 11538 : bn->trevsorted = r == 0 && reverse && nilslast;
2728 : } else {
2729 7072 : Heap *m = NULL;
2730 : /* only invest in creating an order index if the BAT
2731 : * is persistent */
2732 7072 : if (mkorderidx) {
2733 4441 : assert(orderidxlock);
2734 4441 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2735 : ords == NULL) {
2736 0 : ords = (oid *) m->base + ORDERIDXOFF;
2737 0 : if (o && o->ttype != TYPE_void)
2738 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2739 0 : else if (o)
2740 0 : for (p = 0, q = BATcount(o); p < q; p++)
2741 0 : ords[p] = p + o->tseqbase;
2742 : else
2743 0 : for (p = 0, q = BATcount(b); p < q; p++)
2744 0 : ords[p] = p + b->hseqbase;
2745 : }
2746 : }
2747 7072 : if ((reverse != nilslast ||
2748 13412 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2749 12872 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2750 6436 : do_sort(Tloc(bn, 0),
2751 : ords,
2752 6436 : bn->tvheap ? bn->tvheap->base : NULL,
2753 6436 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2754 6436 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2755 0 : if (m != NULL) {
2756 0 : HEAPfree(m, true);
2757 0 : GDKfree(m);
2758 : }
2759 0 : goto error;
2760 : }
2761 7072 : bn->tsorted = !reverse && !nilslast;
2762 7072 : bn->trevsorted = reverse && nilslast;
2763 7072 : if (m != NULL) {
2764 4441 : assert(orderidxlock);
2765 4441 : if (pb->torderidx == NULL) {
2766 4441 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2767 4441 : memcpy((oid *) m->base + ORDERIDXOFF,
2768 : ords,
2769 4441 : pbi.count * sizeof(oid));
2770 : }
2771 4441 : ATOMIC_INIT(&m->refs, 1);
2772 4441 : pb->torderidx = m;
2773 4441 : persistOIDX(pb);
2774 : } else {
2775 0 : HEAPfree(m, true);
2776 0 : GDKfree(m);
2777 : }
2778 : }
2779 : }
2780 12855 : if (orderidxlock) {
2781 4441 : MT_lock_unset(&pb->batIdxLock);
2782 4441 : orderidxlock = false;
2783 : }
2784 12855 : bn->theap->dirty = true;
2785 12855 : bn->tnosorted = 0;
2786 12855 : bn->tnorevsorted = 0;
2787 12855 : bn->tnokey[0] = bn->tnokey[1] = 0;
2788 12855 : bn->tminpos = BUN_NONE;
2789 12855 : bn->tmaxpos = BUN_NONE;
2790 12855 : if (groups) {
2791 7751 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2792 0 : goto error;
2793 7751 : if ((*groups)->tkey &&
2794 923 : (g == NULL || (g->tsorted && g->trevsorted))) {
2795 : /* if new groups bat is key and the input
2796 : * group bat has a single value (both sorted
2797 : * and revsorted), we know the result bat is
2798 : * key */
2799 1153 : bn->tkey = true;
2800 : }
2801 : }
2802 :
2803 12855 : bat_iterator_end(&pbi);
2804 12854 : if (sorted)
2805 9205 : *sorted = bn;
2806 : else {
2807 3649 : BBPunfix(bn->batCacheid);
2808 3649 : bn = NULL;
2809 : }
2810 :
2811 12854 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2812 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2813 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2814 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2815 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2816 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2817 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2818 : g ? "grouped " : "", GDKusec() - t0);
2819 12854 : if (pb && pb != b)
2820 1287 : BBPunfix(pb->batCacheid);
2821 : return GDK_SUCCEED;
2822 :
2823 0 : error:
2824 0 : bat_iterator_end(&pbi);
2825 0 : if (orderidxlock)
2826 0 : MT_lock_unset(&pb->batIdxLock);
2827 0 : if (oidxh)
2828 0 : HEAPdecref(oidxh, false);
2829 0 : BBPreclaim(bn);
2830 0 : if (pb && pb != b)
2831 0 : BBPunfix(pb->batCacheid);
2832 0 : BBPreclaim(on);
2833 0 : if (sorted)
2834 0 : *sorted = NULL;
2835 0 : if (order)
2836 0 : *order = NULL;
2837 0 : if (groups)
2838 0 : *groups = NULL;
2839 : return GDK_FAIL;
2840 : }
2841 :
2842 : /* return a new BAT of length n with seqbase hseq, and the constant v
2843 : * in the tail */
2844 : BAT *
2845 862586 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2846 : {
2847 862586 : BAT *bn;
2848 862586 : void *restrict p;
2849 862586 : BUN i;
2850 862586 : lng t0 = 0;
2851 :
2852 862586 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2853 862586 : if (v == NULL)
2854 : return NULL;
2855 862586 : bn = COLnew(hseq, tailtype, n, role);
2856 862593 : if (bn != NULL && n > 0) {
2857 66438 : p = Tloc(bn, 0);
2858 66438 : switch (ATOMstorage(tailtype)) {
2859 6 : case TYPE_void:
2860 6 : v = &oid_nil;
2861 6 : BATtseqbase(bn, oid_nil);
2862 6 : break;
2863 0 : case TYPE_msk:
2864 0 : if (*(msk*)v) {
2865 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2866 0 : if (n & 31) {
2867 0 : uint32_t *m = p;
2868 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2869 : }
2870 : } else
2871 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2872 : break;
2873 8679 : case TYPE_bte:
2874 8679 : memset(p, *(bte*)v, n);
2875 8679 : break;
2876 : case TYPE_sht:
2877 7120449 : for (i = 0; i < n; i++)
2878 7105774 : ((sht *) p)[i] = *(sht *) v;
2879 : break;
2880 : case TYPE_int:
2881 : case TYPE_flt:
2882 : assert(sizeof(int) == sizeof(flt));
2883 224469506 : for (i = 0; i < n; i++)
2884 224464966 : ((int *) p)[i] = *(int *) v;
2885 : break;
2886 : case TYPE_lng:
2887 : case TYPE_dbl:
2888 : assert(sizeof(lng) == sizeof(dbl));
2889 236954212 : for (i = 0; i < n; i++)
2890 236924968 : ((lng *) p)[i] = *(lng *) v;
2891 : break;
2892 : #ifdef HAVE_HGE
2893 : case TYPE_hge:
2894 26920292 : for (i = 0; i < n; i++)
2895 26918739 : ((hge *) p)[i] = *(hge *) v;
2896 : break;
2897 : #endif
2898 : case TYPE_uuid:
2899 200047 : for (i = 0; i < n; i++)
2900 200038 : ((uuid *) p)[i] = *(uuid *) v;
2901 : break;
2902 7576 : case TYPE_str:
2903 : /* insert the first value, then just copy the
2904 : * offset lots of times */
2905 7576 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2906 0 : BBPreclaim(bn);
2907 0 : return NULL;
2908 : }
2909 7575 : char val[sizeof(var_t)];
2910 7575 : memcpy(val, Tloc(bn, 0), bn->twidth);
2911 7575 : if (bn->twidth == 1 && n > 1) {
2912 : /* single byte value: we have a
2913 : * function for that */
2914 4285 : memset(Tloc(bn, 1), val[0], n - 1);
2915 : } else {
2916 3290 : char *p = Tloc(bn, 0);
2917 3310 : for (i = 1; i < n; i++) {
2918 20 : p += bn->twidth;
2919 20 : memcpy(p, val, bn->twidth);
2920 : }
2921 : }
2922 : break;
2923 : default:
2924 333791 : for (i = 0; i < n; i++)
2925 333635 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2926 0 : BBPreclaim(bn);
2927 0 : return NULL;
2928 : }
2929 : break;
2930 : }
2931 66437 : bn->theap->dirty = true;
2932 66437 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2933 66435 : BATsetcount(bn, n);
2934 66433 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2935 66433 : bn->tnonil = !bn->tnil;
2936 66433 : bn->tkey = BATcount(bn) <= 1;
2937 : }
2938 862588 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2939 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2940 : return bn;
2941 : }
2942 :
2943 : /*
2944 : * BAT Aggregates
2945 : *
2946 : * We retain the size() and card() aggregate results in the column
2947 : * descriptor. We would like to have such functionality in an
2948 : * extensible way for many aggregates, for DD (1) we do not want to
2949 : * change the binary BAT format on disk and (2) aggr and size are the
2950 : * most relevant aggregates.
2951 : *
2952 : * It is all hacked into the aggr[3] records; three adjacent integers
2953 : * that were left over in the column record. We refer to these as if
2954 : * it where an int aggr[3] array. The below routines set and retrieve
2955 : * the aggregate values from the tail of the BAT, as many
2956 : * aggregate-manipulating BAT functions work on tail.
2957 : *
2958 : * The rules are as follows: aggr[0] contains the alignment ID of the
2959 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2960 : * equal to b->talign, the precomputed aggregate values in
2961 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2962 : * of them may be set at the time. This is encoded by the value
2963 : * int_nil, which cannot occur in these two aggregates.
2964 : *
2965 : * This was now extended to record the property whether we know there
2966 : * is a nil value present by mis-using the highest bits of both
2967 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2968 : */
2969 :
2970 : void
2971 31448479 : PROPdestroy_nolock(BAT *b)
2972 : {
2973 31448479 : PROPrec *p = b->tprops;
2974 31448479 : PROPrec *n;
2975 :
2976 31448479 : b->tprops = NULL;
2977 31452393 : while (p) {
2978 4225 : n = p->next;
2979 4225 : assert(p->id != (enum prop_t) 20);
2980 4225 : VALclear(&p->v);
2981 4225 : GDKfree(p);
2982 4225 : p = n;
2983 : }
2984 31448168 : }
2985 :
2986 : void
2987 416 : PROPdestroy(BAT *b)
2988 : {
2989 416 : MT_lock_set(&b->theaplock);
2990 416 : PROPdestroy_nolock(b);
2991 416 : MT_lock_unset(&b->theaplock);
2992 416 : }
2993 :
2994 : ValPtr
2995 220701852 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2996 : {
2997 220701852 : PROPrec *p;
2998 :
2999 220701852 : p = b->tprops;
3000 220776181 : while (p && p->id != idx)
3001 74329 : p = p->next;
3002 220701852 : return p ? &p->v : NULL;
3003 : }
3004 :
3005 : void
3006 361208 : BATrmprop_nolock(BAT *b, enum prop_t idx)
3007 : {
3008 361208 : PROPrec *prop = b->tprops, *prev = NULL;
3009 :
3010 361435 : while (prop) {
3011 361316 : if (prop->id == idx) {
3012 361089 : if (prev)
3013 106 : prev->next = prop->next;
3014 : else
3015 360983 : b->tprops = prop->next;
3016 361089 : VALclear(&prop->v);
3017 361089 : GDKfree(prop);
3018 361089 : return;
3019 : }
3020 227 : prev = prop;
3021 227 : prop = prop->next;
3022 : }
3023 : }
3024 :
3025 : ValPtr
3026 365351 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3027 : {
3028 365351 : PROPrec *p;
3029 :
3030 365351 : p = b->tprops;
3031 371607 : while (p && p->id != idx)
3032 6256 : p = p->next;
3033 365351 : if (p == NULL) {
3034 365344 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3035 : /* properties are hints, so if we can't create
3036 : * one we ignore the error */
3037 0 : GDKclrerr();
3038 0 : return NULL;
3039 : }
3040 365344 : p->id = idx;
3041 365344 : p->next = b->tprops;
3042 365344 : p->v.vtype = 0;
3043 365344 : b->tprops = p;
3044 : } else {
3045 7 : VALclear(&p->v);
3046 : }
3047 365351 : if (VALinit(&p->v, type, v) == NULL) {
3048 : /* failed to initialize, so remove property */
3049 0 : BATrmprop_nolock(b, idx);
3050 0 : GDKclrerr();
3051 0 : p = NULL;
3052 : }
3053 0 : return p ? &p->v : NULL;
3054 : }
3055 :
3056 : ValPtr
3057 2759928 : BATgetprop(BAT *b, enum prop_t idx)
3058 : {
3059 2759928 : ValPtr p;
3060 :
3061 2759928 : MT_lock_set(&b->theaplock);
3062 2759823 : p = BATgetprop_nolock(b, idx);
3063 2759868 : MT_lock_unset(&b->theaplock);
3064 2759889 : return p;
3065 : }
3066 :
3067 : ValPtr
3068 4055 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3069 : {
3070 4055 : ValPtr p;
3071 4055 : MT_lock_set(&b->theaplock);
3072 4055 : p = BATsetprop_nolock(b, idx, type, v);
3073 4055 : MT_lock_unset(&b->theaplock);
3074 4055 : return p;
3075 : }
3076 :
3077 : void
3078 2 : BATrmprop(BAT *b, enum prop_t idx)
3079 : {
3080 2 : MT_lock_set(&b->theaplock);
3081 2 : BATrmprop_nolock(b, idx);
3082 2 : MT_lock_unset(&b->theaplock);
3083 2 : }
3084 :
3085 : /*
3086 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3087 : * non-nil tail value.
3088 : * This function does not fail (the callers currently don't check for failure).
3089 : */
3090 : BUN
3091 2384 : BATcount_no_nil(BAT *b, BAT *s)
3092 : {
3093 2384 : BUN cnt = 0;
3094 2384 : const void *restrict p, *restrict nil;
3095 2384 : const char *restrict base;
3096 2384 : int t;
3097 2384 : int (*cmp)(const void *, const void *);
3098 2384 : struct canditer ci;
3099 2384 : oid hseq;
3100 :
3101 2384 : BATcheck(b, 0);
3102 :
3103 2384 : hseq = b->hseqbase;
3104 2384 : canditer_init(&ci, b, s);
3105 2387 : if (b->tnonil)
3106 1761 : return ci.ncand;
3107 626 : BATiter bi = bat_iterator(b);
3108 626 : p = bi.base;
3109 626 : t = ATOMbasetype(bi.type);
3110 626 : switch (t) {
3111 0 : case TYPE_void:
3112 0 : cnt = ci.ncand * BATtdensebi(&bi);
3113 0 : break;
3114 0 : case TYPE_msk:
3115 0 : cnt = ci.ncand;
3116 0 : break;
3117 37 : case TYPE_bte:
3118 23453 : CAND_LOOP(&ci)
3119 23416 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3120 : break;
3121 30 : case TYPE_sht:
3122 4409 : CAND_LOOP(&ci)
3123 4379 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3124 : break;
3125 332 : case TYPE_int:
3126 6451128 : CAND_LOOP(&ci)
3127 6450796 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3128 : break;
3129 103 : case TYPE_lng:
3130 156929 : CAND_LOOP(&ci)
3131 156826 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3132 : break;
3133 : #ifdef HAVE_HGE
3134 0 : case TYPE_hge:
3135 0 : CAND_LOOP(&ci)
3136 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3137 : break;
3138 : #endif
3139 2 : case TYPE_flt:
3140 10 : CAND_LOOP(&ci)
3141 8 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3142 : break;
3143 27 : case TYPE_dbl:
3144 68 : CAND_LOOP(&ci)
3145 41 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3146 : break;
3147 1 : case TYPE_uuid:
3148 4 : CAND_LOOP(&ci)
3149 3 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3150 : break;
3151 94 : case TYPE_str:
3152 94 : base = bi.vh->base;
3153 94 : switch (bi.width) {
3154 61 : case 1:
3155 5921 : CAND_LOOP(&ci)
3156 5860 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3157 : break;
3158 31 : case 2:
3159 25510 : CAND_LOOP(&ci)
3160 25479 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3161 : break;
3162 2 : case 4:
3163 2683 : CAND_LOOP(&ci)
3164 2681 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3165 : break;
3166 : #if SIZEOF_VAR_T == 8
3167 0 : case 8:
3168 0 : CAND_LOOP(&ci)
3169 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3170 : break;
3171 : #endif
3172 : default:
3173 0 : MT_UNREACHABLE();
3174 : }
3175 : break;
3176 0 : default:
3177 0 : nil = ATOMnilptr(t);
3178 0 : cmp = ATOMcompare(t);
3179 0 : if (nil == NULL) {
3180 0 : cnt = ci.ncand;
3181 0 : } else if (b->tvheap) {
3182 0 : base = b->tvheap->base;
3183 0 : CAND_LOOP(&ci)
3184 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3185 : } else {
3186 0 : CAND_LOOP(&ci)
3187 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3188 : }
3189 : break;
3190 : }
3191 626 : if (cnt == bi.count) {
3192 450 : MT_lock_set(&b->theaplock);
3193 450 : if (cnt == BATcount(b) && bi.h == b->theap) {
3194 : /* we learned something */
3195 450 : b->tnonil = true;
3196 450 : assert(!b->tnil);
3197 450 : b->tnil = false;
3198 : }
3199 450 : bat pbid = VIEWtparent(b);
3200 450 : MT_lock_unset(&b->theaplock);
3201 450 : if (pbid) {
3202 179 : BAT *pb = BATdescriptor(pbid);
3203 179 : if (pb) {
3204 179 : MT_lock_set(&pb->theaplock);
3205 179 : if (cnt == BATcount(pb) &&
3206 13 : bi.h == pb->theap &&
3207 13 : !pb->tnonil) {
3208 13 : pb->tnonil = true;
3209 13 : assert(!pb->tnil);
3210 13 : pb->tnil = false;
3211 : }
3212 179 : MT_lock_unset(&pb->theaplock);
3213 179 : BBPunfix(pb->batCacheid);
3214 : }
3215 : }
3216 : }
3217 626 : bat_iterator_end(&bi);
3218 626 : return cnt;
3219 : }
|