Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 61880516 : unshare_varsized_heap(BAT *b)
27 : {
28 61880516 : if (ATOMvarsized(b->ttype) &&
29 29061893 : b->tvheap->parentid != b->batCacheid) {
30 385 : Heap *h = GDKmalloc(sizeof(Heap));
31 386 : if (h == NULL)
32 : return GDK_FAIL;
33 386 : MT_thread_setalgorithm("unshare vheap");
34 770 : *h = (Heap) {
35 384 : .parentid = b->batCacheid,
36 386 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 384 : strconcat_len(h->filename, sizeof(h->filename),
40 384 : BBP_physical(b->batCacheid), ".theap", NULL);
41 385 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 384 : MT_lock_set(&b->theaplock);
47 382 : Heap *oh = b->tvheap;
48 382 : b->tvheap = h;
49 382 : MT_lock_unset(&b->theaplock);
50 386 : BBPrelease(oh->parentid);
51 385 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 107183 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 107183 : size_t toff = ~(size_t) 0; /* tail offset */
66 107183 : BUN p, r; /* loop variables */
67 107183 : const void *tp = NULL; /* tail value pointer */
68 107183 : var_t v;
69 107183 : size_t off; /* offset within n's string heap */
70 107183 : BUN cnt = ci->ncand;
71 107183 : BUN oldcnt = BATcount(b);
72 :
73 107183 : assert(b->ttype == TYPE_str);
74 107183 : assert(b->tbaseoff == 0);
75 107183 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 107183 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 107183 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 107243 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 43526 : toff = 0;
85 43526 : MT_thread_setalgorithm("shared vheap");
86 63717 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 52525 : MT_lock_set(&b->theaplock);
90 52642 : bat bid = b->tvheap->parentid;
91 52642 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 52703 : HEAPincref(ni->vh);
93 52750 : b->tvheap = ni->vh;
94 52750 : MT_lock_unset(&b->theaplock);
95 52752 : BBPretain(ni->vh->parentid);
96 52711 : if (bid != b->batCacheid)
97 0 : BBPrelease(bid);
98 52711 : toff = 0;
99 52711 : MT_thread_setalgorithm("share vheap");
100 : } else {
101 : /* no heap sharing, so also make sure the heap isn't
102 : * shared currently (we're not allowed to write in
103 : * another bat's heap) */
104 11578 : if (b->tvheap->parentid != b->batCacheid &&
105 386 : unshare_varsized_heap(b) != GDK_SUCCEED) {
106 : return GDK_FAIL;
107 : }
108 11192 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
109 78 : !GDK_ELIMDOUBLES(ni->vh))) {
110 : /* we'll consider copying the string heap completely
111 : *
112 : * we first estimate how much space the string heap
113 : * should occupy, given the number of rows we need to
114 : * insert, then, if that is way smaller than the actual
115 : * space occupied, we will skip the copy and just insert
116 : * one by one */
117 : size_t len = 0;
118 5124997 : for (int i = 0; i < 1024; i++) {
119 5119965 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
120 5085424 : p = canditer_idx(ci, p) - ni->b->hseqbase;
121 5088386 : len += strlen(BUNtvar(*ni, p)) + 1;
122 : }
123 5032 : len = (len + 512) / 1024; /* rounded average length */
124 5032 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
125 : /* r is estimate of number of strings in
126 : * double-eliminated area */
127 5032 : if (r < ci->ncand)
128 892 : len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
129 : else
130 4140 : len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
131 : /* len is total estimated expected size of vheap */
132 :
133 5032 : if (len > ni->vhfree / 2) {
134 : /* we copy the string heap, perhaps appending */
135 5023 : if (oldcnt == 0) {
136 4973 : toff = 0;
137 4973 : MT_thread_setalgorithm("copy vheap");
138 : } else {
139 50 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
140 50 : MT_thread_setalgorithm("append vheap");
141 : }
142 :
143 5023 : MT_lock_set(&b->theaplock);
144 5023 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
145 0 : MT_lock_unset(&b->theaplock);
146 0 : return GDK_FAIL;
147 : }
148 5020 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
149 5020 : b->tvheap->free = toff + ni->vhfree;
150 5020 : b->tvheap->dirty = true;
151 5020 : MT_lock_unset(&b->theaplock);
152 : }
153 : }
154 : }
155 : /* if toff has the initial value of ~0, we insert strings
156 : * individually, otherwise we only copy (insert) offsets */
157 101257 : if (toff == ~(size_t) 0)
158 : v = GDK_VAROFFSET;
159 : else
160 101218 : v = b->tvheap->free - 1;
161 :
162 : /* make sure there is (vertical) space in the offset heap, we
163 : * may also widen thanks to v, set above */
164 107480 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
165 : return GDK_FAIL;
166 : }
167 :
168 107398 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
169 : /* we don't need to do any translation of offset
170 : * values, so we can use fast memcpy */
171 100752 : MT_thread_setalgorithm("memcpy offsets");
172 100884 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
173 6646 : } else if (toff != ~(size_t) 0) {
174 : /* we don't need to insert any actual strings since we
175 : * have already made sure that they are all in b's
176 : * string heap at known locations (namely the offset
177 : * in n added to toff), so insert offsets from n after
178 : * adding toff into b */
179 : /* note the use of the "restrict" qualifier here: all
180 : * four pointers below point to the same value, but
181 : * only one of them will actually be used, hence we
182 : * still obey the rule for restrict-qualified
183 : * pointers */
184 366 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
185 366 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
186 366 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
187 : #if SIZEOF_VAR_T == 8
188 366 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
189 : #endif
190 :
191 366 : MT_thread_setalgorithm("copy offset values");
192 367 : r = b->batCount;
193 3859007 : TIMEOUT_LOOP(cnt, qry_ctx) {
194 3858045 : p = canditer_next(ci) - ni->b->hseqbase;
195 3858045 : switch (ni->width) {
196 196 : case 1:
197 196 : v = (var_t) tbp[p] + GDK_VAROFFSET;
198 196 : break;
199 5325 : case 2:
200 5325 : v = (var_t) tsp[p] + GDK_VAROFFSET;
201 5325 : break;
202 3852571 : case 4:
203 3852571 : v = (var_t) tip[p];
204 3852571 : break;
205 : #if SIZEOF_VAR_T == 8
206 0 : case 8:
207 0 : v = (var_t) tlp[p];
208 0 : break;
209 : #endif
210 : default:
211 0 : MT_UNREACHABLE();
212 : }
213 3858045 : v = (var_t) ((size_t) v + toff);
214 3858045 : assert(v >= GDK_VAROFFSET);
215 3858045 : assert((size_t) v < b->tvheap->free);
216 3858045 : switch (b->twidth) {
217 3056 : case 1:
218 3056 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
219 3056 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
220 3056 : break;
221 5869 : case 2:
222 5869 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
223 5869 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
224 5869 : break;
225 3849120 : case 4:
226 : #if SIZEOF_VAR_T == 8
227 3849120 : assert(v < ((var_t) 1 << 32));
228 : #endif
229 3849120 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
230 3849120 : break;
231 : #if SIZEOF_VAR_T == 8
232 0 : case 8:
233 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
234 0 : break;
235 : #endif
236 : default:
237 3858045 : MT_UNREACHABLE();
238 : }
239 : }
240 6280 : } else if (b->tvheap->free < ni->vhfree / 2 ||
241 : GDK_ELIMDOUBLES(b->tvheap)) {
242 : /* if b's string heap is much smaller than n's string
243 : * heap, don't bother checking whether n's string
244 : * values occur in b's string heap; also, if b is
245 : * (still) fully double eliminated, we must continue
246 : * to use the double elimination mechanism */
247 6253 : r = b->batCount;
248 6253 : oid hseq = ni->b->hseqbase;
249 6253 : MT_thread_setalgorithm("insert string values");
250 7597301 : TIMEOUT_LOOP(cnt, qry_ctx) {
251 7584422 : p = canditer_next(ci) - hseq;
252 7456765 : tp = BUNtvar(*ni, p);
253 7484904 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
254 : return GDK_FAIL;
255 : }
256 7584487 : r++;
257 : }
258 : } else {
259 : /* Insert values from n individually into b; however,
260 : * we check whether there is a string in b's string
261 : * heap at the same offset as the string is in n's
262 : * string heap (in case b's string heap is a copy of
263 : * n's). If this is the case, we just copy the
264 : * offset, otherwise we insert normally. */
265 27 : r = b->batCount;
266 27 : MT_thread_setalgorithm("insert string values with check");
267 27955 : TIMEOUT_LOOP(cnt, qry_ctx) {
268 27900 : p = canditer_next(ci) - ni->b->hseqbase;
269 27900 : off = BUNtvaroff(*ni, p); /* the offset */
270 27900 : tp = ni->vh->base + off; /* the string */
271 27900 : if (off < b->tvheap->free &&
272 27900 : strcmp(b->tvheap->base + off, tp) == 0) {
273 : /* we found the string at the same
274 : * offset in b's string heap as it was
275 : * in n's string heap, so we don't
276 : * have to insert a new string into b:
277 : * we can just copy the offset */
278 23511 : v = (var_t) off;
279 23511 : switch (b->twidth) {
280 0 : case 1:
281 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
282 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
283 0 : break;
284 5 : case 2:
285 5 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
286 5 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
287 5 : break;
288 23506 : case 4:
289 : #if SIZEOF_VAR_T == 8
290 23506 : assert(v < ((var_t) 1 << 32));
291 : #endif
292 23506 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
293 23506 : break;
294 : #if SIZEOF_VAR_T == 8
295 0 : case 8:
296 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
297 0 : break;
298 : #endif
299 : default:
300 0 : MT_UNREACHABLE();
301 : }
302 : } else {
303 4389 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
304 : return GDK_FAIL;
305 : }
306 : }
307 27900 : r++;
308 : }
309 : }
310 107576 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
311 107228 : MT_rwlock_wrlock(&b->thashlock);
312 107614 : MT_lock_set(&b->theaplock);
313 107509 : BATsetcount(b, oldcnt + ci->ncand);
314 107538 : assert(b->batCapacity >= b->batCount);
315 107538 : MT_lock_unset(&b->theaplock);
316 : /* maintain hash */
317 107570 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
318 14 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
319 : }
320 107556 : BUN nunique = b->thash ? b->thash->nunique : 0;
321 107556 : MT_rwlock_wrunlock(&b->thashlock);
322 107591 : if (nunique != 0) {
323 6 : MT_lock_set(&b->theaplock);
324 6 : b->tunique_est = (double) nunique;
325 6 : MT_lock_unset(&b->theaplock);
326 : }
327 : return GDK_SUCCEED;
328 : }
329 :
330 : static gdk_return
331 517 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
332 : {
333 517 : BUN cnt = ci->ncand, r;
334 517 : oid hseq = ni->b->hseqbase;
335 :
336 : /* only transient bats can use some other bat's vheap */
337 517 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
338 : /* make sure the bats use var_t */
339 517 : assert(b->twidth == ni->width);
340 517 : assert(b->twidth == SIZEOF_VAR_T);
341 517 : if (cnt == 0)
342 : return GDK_SUCCEED;
343 518 : if (cnt > BATcapacity(b) - BATcount(b)) {
344 : /* if needed space exceeds a normal growth extend just
345 : * with what's needed */
346 18 : BUN ncap = BATcount(b) + cnt;
347 18 : BUN grows = BATgrows(b);
348 :
349 18 : if (ncap > grows)
350 : grows = ncap;
351 18 : if (BATextend(b, grows) != GDK_SUCCEED)
352 : return GDK_FAIL;
353 : }
354 518 : if (mayshare &&
355 518 : BATcount(b) == 0 &&
356 229 : b->batRole == TRANSIENT &&
357 144 : ni->restricted == BAT_READ &&
358 144 : b->tvheap != ni->vh) {
359 : /* if b is still empty, in the transient farm, and n
360 : * is read-only, we replace b's vheap with a reference
361 : * to n's */
362 144 : MT_lock_set(&b->theaplock);
363 144 : bat bid = b->tvheap->parentid;
364 144 : HEAPdecref(b->tvheap, true);
365 144 : HEAPincref(ni->vh);
366 144 : b->tvheap = ni->vh;
367 144 : MT_lock_unset(&b->theaplock);
368 144 : BBPretain(ni->vh->parentid);
369 144 : if (bid != b->batCacheid)
370 0 : BBPrelease(bid);
371 : }
372 518 : if (b->tvheap == ni->vh) {
373 : /* if b and n use the same vheap, we only need to copy
374 : * the offsets from n to b */
375 330 : if (ci->tpe == cand_dense) {
376 : /* fast memcpy since we copy a consecutive
377 : * chunk of memory */
378 330 : memcpy(Tloc(b, BATcount(b)),
379 330 : (const var_t *) ni->base + (ci->seq - hseq),
380 330 : cnt << b->tshift);
381 : } else {
382 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
383 0 : const var_t *restrict src = (const var_t *) ni->base;
384 0 : while (cnt > 0) {
385 0 : cnt--;
386 0 : *dst++ = src[canditer_next(ci) - hseq];
387 : }
388 : }
389 330 : MT_rwlock_wrlock(&b->thashlock);
390 330 : MT_lock_set(&b->theaplock);
391 330 : BATsetcount(b, BATcount(b) + ci->ncand);
392 330 : MT_lock_unset(&b->theaplock);
393 : /* maintain hash table */
394 330 : for (BUN i = BATcount(b) - ci->ncand;
395 330 : b->thash && i < BATcount(b);
396 0 : i++) {
397 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
398 : }
399 330 : BUN nunique = b->thash ? b->thash->nunique : 0;
400 330 : MT_rwlock_wrunlock(&b->thashlock);
401 330 : if (nunique != 0) {
402 0 : MT_lock_set(&b->theaplock);
403 0 : b->tunique_est = (double) nunique;
404 0 : MT_lock_unset(&b->theaplock);
405 : }
406 330 : return GDK_SUCCEED;
407 : }
408 : /* b and n do not share their vheap, so we need to copy data */
409 188 : if (b->tvheap->parentid != b->batCacheid) {
410 : /* if b shares its vheap with some other bat, unshare it */
411 12 : Heap *h = GDKmalloc(sizeof(Heap));
412 12 : if (h == NULL) {
413 : return GDK_FAIL;
414 : }
415 24 : *h = (Heap) {
416 12 : .parentid = b->batCacheid,
417 12 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
418 : .refs = ATOMIC_VAR_INIT(1),
419 : };
420 12 : strconcat_len(h->filename, sizeof(h->filename),
421 12 : BBP_physical(b->batCacheid), ".theap", NULL);
422 12 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
423 0 : HEAPfree(h, true);
424 0 : GDKfree(h);
425 0 : return GDK_FAIL;
426 : }
427 12 : MT_lock_set(&b->theaplock);
428 12 : Heap *oh = b->tvheap;
429 12 : b->tvheap = h;
430 12 : MT_lock_unset(&b->theaplock);
431 12 : if (oh->parentid != b->batCacheid)
432 12 : BBPrelease(oh->parentid);
433 12 : HEAPdecref(oh, false);
434 : }
435 188 : if (BATcount(b) == 0 &&
436 85 : ci->tpe == cand_dense && ci->ncand == ni->count) {
437 : /* just copy the heaps */
438 85 : MT_lock_set(&b->theaplock);
439 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
440 0 : MT_lock_unset(&b->theaplock);
441 0 : return GDK_FAIL;
442 : }
443 83 : memcpy(b->theap->base, ni->base, ni->hfree);
444 83 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
445 83 : b->theap->free = ni->hfree;
446 83 : b->theap->dirty = true;
447 83 : b->tvheap->free = ni->vhfree;
448 83 : b->tvheap->dirty = true;
449 83 : BATsetcount(b, ni->count);
450 84 : b->tnil = ni->nil;
451 84 : b->tnonil = ni->nonil;
452 84 : b->tsorted = ni->sorted;
453 84 : b->tnosorted = ni->nosorted;
454 84 : b->trevsorted = ni->revsorted;
455 84 : b->tnorevsorted = ni->norevsorted;
456 84 : b->tkey = ni->key;
457 84 : b->tnokey[0] = ni->nokey[0];
458 84 : b->tnokey[1] = ni->nokey[1];
459 84 : b->tminpos = ni->minpos;
460 84 : b->tmaxpos = ni->maxpos;
461 84 : b->tunique_est = ni->unique_est;
462 84 : MT_lock_unset(&b->theaplock);
463 84 : return GDK_SUCCEED;
464 : }
465 : /* copy data from n to b */
466 : r = BATcount(b);
467 380626 : for (BUN i = 0; i < cnt; i++) {
468 380523 : BUN p = canditer_next(ci) - hseq;
469 380521 : const void *t = BUNtvar(*ni, p);
470 380521 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
471 : return GDK_FAIL;
472 : }
473 380523 : r++;
474 : }
475 103 : MT_rwlock_wrlock(&b->thashlock);
476 103 : if (b->thash) {
477 0 : r -= cnt;
478 0 : BATiter bi = bat_iterator_nolock(b);
479 0 : for (BUN i = 0; i < cnt; i++) {
480 0 : const void *t = BUNtvar(bi, r);
481 0 : HASHappend_locked(b, r, t);
482 0 : r++;
483 : }
484 : }
485 103 : BUN nunique = b->thash ? b->thash->nunique : 0;
486 103 : MT_lock_set(&b->theaplock);
487 105 : BATsetcount(b, r);
488 104 : if (nunique != 0)
489 0 : b->tunique_est = (double) nunique;
490 104 : MT_lock_unset(&b->theaplock);
491 105 : MT_rwlock_wrunlock(&b->thashlock);
492 105 : return GDK_SUCCEED;
493 : }
494 :
495 : static gdk_return
496 377 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
497 : {
498 377 : if (ci->ncand == 0)
499 : return GDK_SUCCEED;
500 377 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
501 : return GDK_FAIL;
502 :
503 377 : MT_lock_set(&b->theaplock);
504 :
505 377 : uint32_t boff = b->batCount % 32;
506 377 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
507 377 : b->batCount += ci->ncand;
508 377 : b->theap->dirty = true;
509 377 : b->theap->free = ((b->batCount + 31) / 32) * 4;
510 377 : if (ci->tpe == cand_dense) {
511 377 : const uint32_t *np;
512 377 : uint32_t noff, mask;
513 377 : BUN cnt;
514 377 : noff = (ci->seq - ni->b->hseqbase) % 32;
515 377 : cnt = ci->ncand;
516 377 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
517 377 : if (boff == noff) {
518 : /* words of b and n are aligned, so we don't
519 : * need to shift bits around */
520 77 : if (boff + cnt <= 32) {
521 : /* all new bits within one word */
522 68 : if (cnt == 32) {
523 0 : *bp = *np;
524 : } else {
525 68 : mask = ((1U << cnt) - 1) << boff;
526 68 : *bp &= ~mask;
527 68 : *bp |= *np & mask;
528 : }
529 : } else {
530 : /* multiple words of b are affected */
531 9 : if (boff != 0) {
532 : /* first fill up the rest of the first
533 : * word */
534 0 : mask = ~0U << boff;
535 0 : *bp &= ~mask;
536 0 : *bp++ |= *np++ & mask;
537 0 : cnt -= 32 - boff;
538 : }
539 9 : if (cnt >= 32) {
540 : /* copy an integral number of words fast */
541 9 : BUN nw = cnt / 32;
542 9 : memcpy(bp, np, nw*sizeof(int));
543 9 : bp += nw;
544 9 : np += nw;
545 9 : cnt %= 32;
546 : }
547 9 : if (cnt > 0) {
548 : /* do the left over bits */
549 9 : mask = (1U << cnt) - 1;
550 9 : *bp = *np & mask;
551 : }
552 : }
553 300 : } else if (boff > noff) {
554 300 : if (boff + cnt <= 32) {
555 : /* we only need to copy bits from a
556 : * single word of n to a single word
557 : * of b */
558 : /* boff > 0, so cnt < 32, hence the
559 : * shift is ok */
560 268 : mask = (1U << cnt) - 1;
561 268 : *bp &= ~(mask << boff);
562 268 : *bp |= (*np & (mask << noff)) << (boff - noff);
563 : } else {
564 : /* first fill the rest of the last partial
565 : * word of b, so that's 32-boff bits */
566 32 : mask = (1U << (32 - boff)) - 1;
567 32 : *bp &= ~(mask << boff);
568 32 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
569 32 : cnt -= 32 - boff;
570 :
571 : /* set boff and noff to the amount we need to
572 : * shift bits in consecutive words of n around
573 : * to fit into the next word of b; set mask to
574 : * the mask of the bottom bits of n that fit
575 : * in a word of b (and the complement are the
576 : * top bits that go to another word of b) */
577 32 : boff -= noff;
578 32 : noff = 32 - boff;
579 32 : mask = (1U << noff) - 1;
580 249 : while (cnt >= 32) {
581 217 : *bp = (*np++ & ~mask) >> noff;
582 217 : *bp++ |= (*np & mask) << boff;
583 217 : cnt -= 32;
584 : }
585 32 : if (cnt > noff) {
586 : /* the last bits come from two words
587 : * in n */
588 15 : *bp = (*np++ & ~mask) >> noff;
589 15 : cnt -= noff;
590 15 : mask = (1U << cnt) - 1;
591 15 : *bp++ |= (*np & mask) << boff;
592 17 : } else if (cnt > 0) {
593 : /* the last bits come from a single
594 : * word in n */
595 14 : mask = ((1U << cnt) - 1) << noff;
596 14 : *bp = (*np & mask) >> noff;
597 : }
598 : }
599 : } else {
600 : /* boff < noff */
601 0 : if (noff + cnt <= 32) {
602 : /* only need part of the first word of n */
603 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
604 0 : mask = (1U << cnt) - 1;
605 0 : *bp &= ~(mask << boff);
606 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
607 0 : } else if (boff + cnt <= 32) {
608 : /* only need to fill a single word of
609 : * b, but from two of n */
610 0 : if (cnt < 32)
611 0 : *bp &= ~(((1U << cnt) - 1) << boff);
612 : else
613 0 : *bp = 0;
614 0 : mask = ~((1U << noff) - 1);
615 0 : *bp |= (*np++ & mask) >> (noff - boff);
616 0 : cnt -= 32 - noff;
617 0 : mask = (1U << cnt) - 1;
618 0 : *bp |= (*np & mask) << (32 - noff);
619 : } else {
620 0 : if (boff > 0) {
621 : /* fill the rest of the first word of b */
622 0 : cnt -= 32 - boff;
623 0 : *bp &= (1U << boff) - 1;
624 0 : mask = ~((1U << noff) - 1);
625 0 : noff -= boff;
626 0 : boff = 32 - noff;
627 0 : *bp |= (*np++ & mask) >> noff;
628 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
629 : } else {
630 0 : boff = 32 - noff;
631 : }
632 0 : mask = (1U << noff) - 1;
633 0 : while (cnt >= 32) {
634 0 : *bp = (*np++ & ~mask) >> noff;
635 0 : *bp++ |= (*np & mask) << boff;
636 0 : cnt -= 32;
637 : }
638 0 : if (cnt > 0) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : if (cnt > noff)
641 0 : *bp++ |= (*np & mask) << boff;
642 : }
643 : }
644 : }
645 : } else {
646 0 : oid o;
647 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
648 0 : do {
649 0 : for (uint32_t i = boff; i < 32; i++) {
650 0 : o = canditer_next(ci);
651 0 : if (is_oid_nil(o))
652 : break;
653 0 : o -= ni->b->hseqbase;
654 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
655 : }
656 0 : *bp++ = v;
657 0 : v = 0;
658 0 : boff = 0;
659 0 : } while (!is_oid_nil(o));
660 : }
661 377 : MT_lock_unset(&b->theaplock);
662 377 : return GDK_SUCCEED;
663 : }
664 :
665 : /* Append the contents of BAT n (subject to the optional candidate
666 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
667 : * was passed in, and else the seqbase of n. */
668 : static gdk_return
669 2176608 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
670 : {
671 2176608 : struct canditer ci;
672 2176608 : BUN r;
673 2176608 : oid hseq = n->hseqbase;
674 2176608 : char buf[64];
675 2176608 : lng t0 = 0;
676 2176608 : const ValRecord *prop = NULL;
677 2176608 : ValRecord minprop, maxprop;
678 2176608 : const void *minbound = NULL, *maxbound = NULL;
679 2176608 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
680 2176608 : bool hlocked = false;
681 :
682 2176608 : if (b == NULL || n == NULL || BATcount(n) == 0) {
683 : return GDK_SUCCEED;
684 : }
685 1284838 : assert(b->theap->parentid == b->batCacheid);
686 :
687 1284838 : TRC_DEBUG_IF(ALGO) {
688 0 : t0 = GDKusec();
689 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
690 : }
691 :
692 1284838 : ALIGNapp(b, force, GDK_FAIL);
693 :
694 3817572 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
695 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
696 0 : return GDK_FAIL;
697 : }
698 :
699 1287643 : if (BATttype(b) != BATttype(n) &&
700 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
701 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
702 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
703 : }
704 :
705 1287643 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
706 :
707 1287236 : BATiter ni = bat_iterator(n);
708 :
709 1287550 : canditer_init(&ci, n, s);
710 1282908 : if (ci.ncand == 0) {
711 0 : goto doreturn;
712 : }
713 :
714 1282908 : if (BATcount(b) + ci.ncand > BUN_MAX) {
715 0 : bat_iterator_end(&ni);
716 0 : GDKerror("combined BATs too large\n");
717 0 : return GDK_FAIL;
718 : }
719 :
720 1282908 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
721 0 : bat_iterator_end(&ni);
722 0 : GDKerror("overflow of head value\n");
723 0 : return GDK_FAIL;
724 : }
725 :
726 1282908 : IMPSdestroy(b); /* imprints do not support updates yet */
727 1278753 : OIDXdestroy(b);
728 1278725 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
729 1278602 : RTREEdestroy(b);
730 :
731 1279166 : MT_lock_set(&b->theaplock);
732 1286476 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
733 1284866 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
734 48 : VALcopy(&minprop, prop) != NULL) {
735 48 : minbound = VALptr(&minprop);
736 48 : if (ci.ncand == BATcount(n) &&
737 62 : ni.minpos != BUN_NONE &&
738 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
739 0 : assert(0);
740 : GDKerror("value out of bounds\n");
741 : MT_lock_unset(&b->theaplock);
742 : goto bailout;
743 : }
744 : }
745 1284410 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
746 40 : VALcopy(&maxprop, prop) != NULL) {
747 40 : maxbound = VALptr(&maxprop);
748 40 : if (ci.ncand == BATcount(n) &&
749 52 : ni.maxpos != BUN_NONE &&
750 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
751 0 : assert(0);
752 : GDKerror("value out of bounds\n");
753 : MT_lock_unset(&b->theaplock);
754 : goto bailout;
755 : }
756 : }
757 :
758 1284159 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
759 389155 : if (ni.maxpos != BUN_NONE) {
760 150112 : BATiter bi = bat_iterator_nolock(b);
761 150112 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
762 146333 : if (s == NULL) {
763 146211 : b->tmaxpos = BATcount(b) + ni.maxpos;
764 : } else {
765 122 : b->tmaxpos = BUN_NONE;
766 : }
767 : }
768 : } else {
769 239043 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 1284164 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
773 388854 : if (ni.minpos != BUN_NONE) {
774 150187 : BATiter bi = bat_iterator_nolock(b);
775 150187 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
776 145292 : if (s == NULL) {
777 145173 : b->tminpos = BATcount(b) + ni.minpos;
778 : } else {
779 119 : b->tminpos = BUN_NONE;
780 : }
781 : }
782 : } else {
783 238667 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 1284158 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
787 1283235 : b->tunique_est = 0;
788 : }
789 1284158 : MT_lock_unset(&b->theaplock);
790 : /* load hash so that we can maintain it */
791 1288618 : (void) BATcheckhash(b);
792 :
793 1286315 : if (b->ttype == TYPE_void) {
794 : /* b does not have storage, keep it that way if we can */
795 22397 : HASHdestroy(b); /* we're not maintaining the hash here */
796 22398 : MT_lock_set(&b->theaplock);
797 22399 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
798 21740 : (BATcount(b) == 0 ||
799 18393 : (BATtdense(b) &&
800 18393 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
801 : /* n is also dense and consecutive with b */
802 21685 : if (BATcount(b) == 0) {
803 3347 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
804 0 : assert(0);
805 : GDKerror("value not within bounds\n");
806 : MT_lock_unset(&b->theaplock);
807 : goto bailout;
808 : }
809 3347 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
810 : }
811 21684 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
812 0 : assert(0);
813 : GDKerror("value not within bounds\n");
814 : MT_lock_unset(&b->theaplock);
815 : goto bailout;
816 : }
817 21684 : BATsetcount(b, BATcount(b) + ci.ncand);
818 21681 : MT_lock_unset(&b->theaplock);
819 21684 : goto doreturn;
820 : }
821 714 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
822 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
823 : /* both b and n are void/nil */
824 0 : if (notnull) {
825 0 : assert(0);
826 : GDKerror("NULL value not within bounds\n");
827 : MT_lock_unset(&b->theaplock);
828 : goto bailout;
829 : }
830 0 : BATtseqbase(b, oid_nil);
831 0 : BATsetcount(b, BATcount(b) + ci.ncand);
832 0 : MT_lock_unset(&b->theaplock);
833 0 : goto doreturn;
834 : }
835 : /* we need to materialize b; allocate enough capacity */
836 714 : MT_lock_unset(&b->theaplock);
837 714 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
838 0 : goto bailout;
839 : }
840 : }
841 :
842 : /* property setting */
843 1264632 : MT_lock_set(&b->theaplock);
844 1265785 : r = BATcount(b);
845 :
846 1265785 : if (BATcount(b) == 0) {
847 380355 : b->tsorted = ni.sorted;
848 380355 : b->trevsorted = ni.revsorted;
849 380355 : b->tseqbase = oid_nil;
850 380355 : b->tnonil = ni.nonil;
851 380355 : b->tnil = ni.nil && ci.ncand == BATcount(n);
852 380355 : if (ci.tpe == cand_dense) {
853 380215 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
854 380215 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
855 380215 : if (BATtdensebi(&ni)) {
856 2012 : b->tseqbase = n->tseqbase + ci.seq - hseq;
857 : }
858 : } else {
859 140 : b->tnosorted = 0;
860 140 : b->tnorevsorted = 0;
861 : }
862 380355 : b->tkey = ni.key;
863 380355 : if (ci.ncand == BATcount(n)) {
864 379063 : b->tnokey[0] = ni.nokey[0];
865 379063 : b->tnokey[1] = ni.nokey[1];
866 : } else {
867 1292 : b->tnokey[0] = b->tnokey[1] = 0;
868 : }
869 : } else {
870 885430 : BUN last = r - 1;
871 885430 : BATiter bi = bat_iterator_nolock(b);
872 885741 : int xx = ATOMcmp(b->ttype,
873 : BUNtail(ni, ci.seq - hseq),
874 : BUNtail(bi, last));
875 883127 : if (b->tsorted && (!ni.sorted || xx < 0)) {
876 19565 : b->tsorted = false;
877 19565 : b->tnosorted = 0;
878 19565 : b->tseqbase = oid_nil;
879 : }
880 883127 : if (b->trevsorted &&
881 80335 : (!ni.revsorted || xx > 0)) {
882 16537 : b->trevsorted = false;
883 16537 : b->tnorevsorted = 0;
884 : }
885 883127 : if (b->tkey &&
886 64661 : (!(b->tsorted || b->trevsorted) ||
887 51203 : !ni.key || xx == 0)) {
888 18563 : BATkey(b, false);
889 : }
890 883066 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
891 804 : (!BATtdensebi(&ni) ||
892 946 : ci.tpe != cand_dense ||
893 473 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
894 350 : b->tseqbase = oid_nil;
895 : }
896 883066 : b->tnonil &= ni.nonil;
897 1754953 : b->tnil |= ni.nil && ci.ncand == ni.count;
898 : }
899 1263421 : MT_lock_unset(&b->theaplock);
900 1266737 : if (b->ttype == TYPE_str) {
901 107509 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
902 0 : goto bailout;
903 : }
904 1159228 : } else if (ATOMvarsized(b->ttype)) {
905 520 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 1158708 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
909 : /* no bounds and NOT_NULL property on MSK bats */
910 377 : assert(minbound == NULL && maxbound == NULL && !notnull);
911 377 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
912 0 : goto bailout;
913 : }
914 : } else {
915 1158331 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
916 : /* if needed space exceeds a normal growth
917 : * extend just with what's needed */
918 10441 : BUN ncap = BATcount(b) + ci.ncand;
919 10441 : BUN grows = BATgrows(b);
920 :
921 10407 : if (ncap > grows)
922 : grows = ncap;
923 10407 : if (BATextend(b, grows) != GDK_SUCCEED) {
924 0 : goto bailout;
925 : }
926 : }
927 1158425 : MT_rwlock_wrlock(&b->thashlock);
928 1158825 : hlocked = true;
929 1158825 : if (b->ttype != TYPE_void &&
930 1158295 : ni.type != TYPE_void &&
931 1156189 : ci.tpe == cand_dense) {
932 : /* use fast memcpy if we can */
933 1156211 : memcpy(Tloc(b, BATcount(b)),
934 1156211 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
935 1156211 : ci.ncand << ni.shift);
936 1156477 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
937 5788 : HASHappend_locked(b, r, Tloc(b, r));
938 266 : r++;
939 : }
940 : } else {
941 2614 : const void *atomnil = ATOMnilptr(b->ttype);
942 735782 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
943 730535 : BUN p = canditer_next(&ci) - hseq;
944 730498 : const void *t = BUNtail(ni, p);
945 730726 : bool isnil = atomcmp(t, atomnil) == 0;
946 730344 : if (notnull && isnil) {
947 0 : assert(0);
948 : GDKerror("NULL value not within bounds\n");
949 : goto bailout;
950 730344 : } else if (minbound &&
951 730344 : !isnil &&
952 0 : atomcmp(t, minbound) < 0) {
953 0 : assert(0);
954 : GDKerror("value not within bounds\n");
955 : goto bailout;
956 730344 : } else if (maxbound &&
957 0 : !isnil &&
958 0 : atomcmp(t, maxbound) >= 0) {
959 0 : assert(0);
960 : GDKerror("value not within bounds\n");
961 : goto bailout;
962 730344 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
963 0 : goto bailout;
964 : }
965 730522 : if (b->thash)
966 0 : HASHappend_locked(b, r, t);
967 730535 : r++;
968 : }
969 2614 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
970 : }
971 1153303 : BUN nunique;
972 1153303 : nunique = b->thash ? b->thash->nunique : 0;
973 1153303 : MT_lock_set(&b->theaplock);
974 1157024 : BATsetcount(b, b->batCount + ci.ncand);
975 1156958 : if (nunique != 0)
976 44 : b->tunique_est = (double) nunique;
977 1156958 : MT_lock_unset(&b->theaplock);
978 1157624 : assert(hlocked);
979 1157624 : MT_rwlock_wrunlock(&b->thashlock);
980 1157624 : hlocked = false;
981 : }
982 :
983 1287825 : doreturn:
984 1287825 : bat_iterator_end(&ni);
985 1284289 : if (minbound)
986 48 : VALclear(&minprop);
987 1285300 : if (maxbound)
988 40 : VALclear(&maxprop);
989 1285300 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
990 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
991 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
992 : GDKusec() - t0);
993 :
994 : return GDK_SUCCEED;
995 : bailout:
996 0 : if (hlocked)
997 0 : MT_rwlock_wrunlock(&b->thashlock);
998 0 : if (minbound)
999 0 : VALclear(&minprop);
1000 0 : if (maxbound)
1001 0 : VALclear(&maxprop);
1002 0 : bat_iterator_end(&ni);
1003 0 : return GDK_FAIL;
1004 : }
1005 :
1006 : gdk_return
1007 2184224 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1008 : {
1009 2184224 : return BATappend2(b, n, s, force, true);
1010 : }
1011 :
1012 : gdk_return
1013 4 : BATdel(BAT *b, BAT *d)
1014 : {
1015 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1016 4 : MT_lock_set(&b->theaplock);
1017 4 : BATiter bi = bat_iterator_nolock(b);
1018 4 : MT_lock_unset(&b->theaplock);
1019 :
1020 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1021 4 : assert(d->tsorted);
1022 4 : assert(d->tkey);
1023 4 : if (BATcount(d) == 0)
1024 : return GDK_SUCCEED;
1025 4 : IMPSdestroy(b);
1026 4 : OIDXdestroy(b);
1027 4 : HASHdestroy(b);
1028 4 : PROPdestroy(b);
1029 4 : STRMPdestroy(b);
1030 4 : RTREEdestroy(b);
1031 4 : if (BATtdense(d)) {
1032 2 : oid o = d->tseqbase;
1033 2 : BUN c = BATcount(d);
1034 :
1035 2 : if (o + c <= b->hseqbase)
1036 : return GDK_SUCCEED;
1037 2 : if (o < b->hseqbase) {
1038 0 : c -= b->hseqbase - o;
1039 0 : o = b->hseqbase;
1040 : }
1041 2 : if (o - b->hseqbase < b->batInserted) {
1042 0 : GDKerror("cannot delete committed values\n");
1043 0 : return GDK_FAIL;
1044 : }
1045 2 : if (o + c > b->hseqbase + BATcount(b))
1046 0 : c = b->hseqbase + BATcount(b) - o;
1047 2 : if (c == 0)
1048 : return GDK_SUCCEED;
1049 2 : if (atmdel) {
1050 0 : BUN p = o - b->hseqbase;
1051 0 : BUN q = p + c;
1052 0 : while (p < q) {
1053 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1054 0 : p++;
1055 : }
1056 : }
1057 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1058 : return GDK_FAIL;
1059 2 : MT_lock_set(&b->theaplock);
1060 2 : if (o + c < b->hseqbase + BATcount(b)) {
1061 0 : o -= b->hseqbase;
1062 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1063 0 : BUN n = BATcount(b) - (o + c);
1064 : /* not very efficient, but first see
1065 : * how much this is used */
1066 0 : for (BUN i = 0; i < n; i++)
1067 0 : mskSetVal(b, o + i,
1068 0 : mskGetVal(b, o + c + i));
1069 : } else {
1070 0 : memmove(Tloc(b, o),
1071 0 : Tloc(b, o + c),
1072 0 : b->twidth * (BATcount(b) - (o + c)));
1073 : }
1074 0 : b->theap->dirty = true;
1075 : // o += b->hseqbase; // if this were to be used again
1076 : }
1077 2 : b->batCount -= c;
1078 : } else {
1079 2 : BATiter di = bat_iterator(d);
1080 2 : const oid *o = (const oid *) di.base;
1081 2 : const oid *s;
1082 2 : BUN c = di.count;
1083 2 : BUN nd = 0;
1084 2 : BUN pos;
1085 2 : char *p = NULL;
1086 :
1087 2 : if (o[c - 1] <= b->hseqbase) {
1088 0 : bat_iterator_end(&di);
1089 0 : return GDK_SUCCEED;
1090 : }
1091 2 : while (*o < b->hseqbase) {
1092 0 : o++;
1093 0 : c--;
1094 : }
1095 2 : if (*o - b->hseqbase < b->batInserted) {
1096 0 : bat_iterator_end(&di);
1097 0 : GDKerror("cannot delete committed values\n");
1098 0 : return GDK_FAIL;
1099 : }
1100 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1101 0 : bat_iterator_end(&di);
1102 0 : return GDK_FAIL;
1103 : }
1104 2 : s = o;
1105 2 : pos = *o - b->hseqbase;
1106 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1107 2 : p = Tloc(b, pos);
1108 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1109 4 : size_t n;
1110 4 : if (atmdel)
1111 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1112 4 : o++;
1113 4 : c--;
1114 4 : nd++;
1115 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1116 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1117 2 : else if ((oid) (o - s) < *o - *s)
1118 2 : n = o[0] - o[-1] - 1;
1119 : else
1120 : n = 0;
1121 4 : if (n > 0) {
1122 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1123 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1124 : /* not very efficient, but
1125 : * first see how much this is
1126 : * used */
1127 0 : for (BUN i = 0; i < n; i++) {
1128 0 : mskSetVal(b, pos + i,
1129 0 : mskGetVal(b, opos + i));
1130 : }
1131 0 : pos += n;
1132 : } else {
1133 2 : n *= b->twidth;
1134 2 : memmove(p,
1135 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1136 : n);
1137 2 : p += n;
1138 : }
1139 : s = o;
1140 : }
1141 : }
1142 2 : bat_iterator_end(&di);
1143 2 : MT_lock_set(&b->theaplock);
1144 2 : b->theap->dirty = true;
1145 2 : b->batCount -= nd;
1146 : }
1147 4 : if (b->batCount <= 1) {
1148 : /* some trivial properties */
1149 2 : b->tkey = true;
1150 2 : b->tsorted = b->trevsorted = true;
1151 2 : if (b->batCount == 0) {
1152 2 : b->tnil = false;
1153 2 : b->tnonil = true;
1154 : }
1155 : }
1156 : /* not sure about these anymore */
1157 4 : b->tnosorted = b->tnorevsorted = 0;
1158 4 : b->tnokey[0] = b->tnokey[1] = 0;
1159 4 : b->tminpos = BUN_NONE;
1160 4 : b->tmaxpos = BUN_NONE;
1161 4 : b->tunique_est = 0.0;
1162 4 : MT_lock_unset(&b->theaplock);
1163 :
1164 4 : return GDK_SUCCEED;
1165 : }
1166 :
1167 : /*
1168 : * Replace all values in b with values from n whose location is given by
1169 : * the oid in either p or positions.
1170 : * If positions is used, autoincr specifies whether it is the first of a
1171 : * dense range of positions or whether it is a full-blown array of
1172 : * position.
1173 : * If mayappend is set, the position in p/positions may refer to
1174 : * locations beyond the end of b.
1175 : */
1176 : static gdk_return
1177 91782 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1178 : bool mayappend, bool autoincr, bool force)
1179 : {
1180 91782 : lng t0 = GDKusec();
1181 91775 : oid pos = oid_nil;
1182 91775 : BUN nunique = 0;
1183 :
1184 91775 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1185 : return GDK_SUCCEED;
1186 : }
1187 : /* either p or positions */
1188 91775 : assert((p == NULL) != (positions == NULL));
1189 91775 : if (p != NULL) {
1190 91599 : if (BATcount(p) != BATcount(n)) {
1191 0 : GDKerror("update BATs not the same size\n");
1192 0 : return GDK_FAIL;
1193 : }
1194 91599 : if (ATOMtype(p->ttype) != TYPE_oid) {
1195 0 : GDKerror("positions BAT not type OID\n");
1196 0 : return GDK_FAIL;
1197 : }
1198 91599 : if (BATtdense(p)) {
1199 83153 : pos = p->tseqbase;
1200 83153 : positions = &pos;
1201 83153 : autoincr = true;
1202 83153 : p = NULL;
1203 8446 : } else if (p->ttype != TYPE_void) {
1204 8444 : positions = (const oid *) Tloc(p, 0);
1205 8444 : autoincr = false;
1206 : } else {
1207 : autoincr = false;
1208 : }
1209 176 : } else if (autoincr) {
1210 176 : pos = *positions;
1211 : }
1212 91775 : if (BATcount(n) == 0) {
1213 : return GDK_SUCCEED;
1214 : }
1215 :
1216 26778 : BATiter ni = bat_iterator(n);
1217 :
1218 26793 : OIDXdestroy(b);
1219 26775 : IMPSdestroy(b);
1220 26773 : STRMPdestroy(b);
1221 26772 : RTREEdestroy(b);
1222 : /* load hash so that we can maintain it */
1223 26771 : (void) BATcheckhash(b);
1224 :
1225 26780 : MT_lock_set(&b->theaplock);
1226 26788 : if (!force && (b->batRestricted != BAT_WRITE ||
1227 15 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1228 0 : MT_lock_unset(&b->theaplock);
1229 0 : bat_iterator_end(&ni);
1230 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1231 0 : return GDK_FAIL;
1232 : }
1233 26788 : BATiter bi = bat_iterator_nolock(b);
1234 26788 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1235 26214 : b->tunique_est = 0;
1236 : }
1237 :
1238 26788 : b->tsorted = b->trevsorted = false;
1239 26788 : b->tnosorted = b->tnorevsorted = 0;
1240 26788 : b->tseqbase = oid_nil;
1241 26788 : b->tkey = false;
1242 26788 : b->tnokey[0] = b->tnokey[1] = 0;
1243 :
1244 26788 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1245 26788 : const void *nil = ATOMnilptr(b->ttype);
1246 26788 : oid hseqend = b->hseqbase + BATcount(b);
1247 :
1248 26788 : MT_lock_unset(&b->theaplock);
1249 :
1250 29983 : bool anynil = false;
1251 29983 : bool locked = false;
1252 :
1253 29983 : if (b->tvheap) {
1254 1149288 : for (BUN i = 0; i < ni.count; i++) {
1255 1146746 : oid updid;
1256 1146746 : if (positions) {
1257 1145733 : updid = autoincr ? pos++ : *positions++;
1258 : } else {
1259 1013 : updid = BUNtoid(p, i);
1260 : }
1261 :
1262 1146746 : if (updid < b->hseqbase ||
1263 1146746 : (!mayappend && updid >= hseqend)) {
1264 0 : GDKerror("id out of range\n");
1265 0 : goto bailout;
1266 : }
1267 1146746 : updid -= b->hseqbase;
1268 1146746 : if (!force && updid < b->batInserted) {
1269 0 : GDKerror("updating committed value\n");
1270 0 : goto bailout;
1271 : }
1272 :
1273 1146746 : const void *new = BUNtvar(ni, i);
1274 :
1275 1138734 : if (updid >= BATcount(b)) {
1276 31043 : assert(mayappend);
1277 31043 : if (locked) {
1278 16 : MT_rwlock_wrunlock(&b->thashlock);
1279 16 : locked = false;
1280 : }
1281 31043 : if (b->tminpos != bi.minpos ||
1282 31042 : b->tmaxpos != bi.maxpos) {
1283 1 : MT_lock_set(&b->theaplock);
1284 1 : b->tminpos = bi.minpos;
1285 1 : b->tmaxpos = bi.maxpos;
1286 1 : MT_lock_unset(&b->theaplock);
1287 : }
1288 31051 : if (BATcount(b) < updid &&
1289 8 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1290 0 : bat_iterator_end(&ni);
1291 0 : return GDK_FAIL;
1292 : }
1293 31043 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1294 0 : bat_iterator_end(&ni);
1295 0 : return GDK_FAIL;
1296 : }
1297 31043 : bi = bat_iterator_nolock(b);
1298 84329 : continue;
1299 : }
1300 :
1301 : /* it is possible that a previous run was killed
1302 : * after an update (with a mmapped tail file)
1303 : * but before that was committed, then the
1304 : * offset may point outside of the vheap */
1305 1107691 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1306 :
1307 1107730 : if (old && atomcmp(old, new) == 0) {
1308 : /* replacing with the same value:
1309 : * nothing to do */
1310 53286 : continue;
1311 : }
1312 :
1313 1054258 : bool isnil = atomcmp(new, nil) == 0;
1314 1058945 : anynil |= isnil;
1315 1058945 : MT_lock_set(&b->theaplock);
1316 1059039 : if (old == NULL ||
1317 1059039 : (b->tnil &&
1318 582 : !anynil &&
1319 581 : atomcmp(old, nil) == 0)) {
1320 : /* if old value is nil and no new
1321 : * value is, we're not sure anymore
1322 : * about the nil property, so we must
1323 : * clear it */
1324 582 : b->tnil = false;
1325 : }
1326 1059040 : b->tnonil &= !isnil;
1327 1059040 : b->tnil |= isnil;
1328 1059040 : MT_lock_unset(&b->theaplock);
1329 1059279 : if (bi.maxpos != BUN_NONE) {
1330 9694 : if (!isnil &&
1331 4847 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1332 : /* new value is larger than
1333 : * previous largest */
1334 22 : bi.maxpos = updid;
1335 9650 : } else if (old == NULL ||
1336 4837 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1337 12 : atomcmp(new, old) != 0)) {
1338 : /* old value is equal to
1339 : * largest and new value is
1340 : * smaller, so we don't know
1341 : * anymore which is the
1342 : * largest */
1343 12 : bi.maxpos = BUN_NONE;
1344 : }
1345 : }
1346 1059279 : if (bi.minpos != BUN_NONE) {
1347 9688 : if (!isnil &&
1348 4844 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1349 : /* new value is smaller than
1350 : * previous smallest */
1351 14 : bi.minpos = updid;
1352 9660 : } else if (old == NULL ||
1353 4852 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1354 22 : atomcmp(new, old) != 0)) {
1355 : /* old value is equal to
1356 : * smallest and new value is
1357 : * larger, so we don't know
1358 : * anymore which is the
1359 : * smallest */
1360 22 : bi.minpos = BUN_NONE;
1361 : }
1362 : }
1363 1059279 : if (!locked) {
1364 2088 : MT_rwlock_wrlock(&b->thashlock);
1365 2088 : locked = true;
1366 : }
1367 1059280 : if (old)
1368 1059280 : HASHdelete_locked(&bi, updid, old);
1369 0 : else if (b->thash) {
1370 0 : doHASHdestroy(b, b->thash);
1371 0 : b->thash = NULL;
1372 : }
1373 :
1374 1059257 : var_t d;
1375 1059257 : switch (b->twidth) {
1376 1039433 : case 1:
1377 1039433 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1378 1039433 : break;
1379 16779 : case 2:
1380 16779 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1381 16779 : break;
1382 3010 : case 4:
1383 3010 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1384 3010 : break;
1385 : #if SIZEOF_VAR_T == 8
1386 35 : case 8:
1387 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1388 35 : break;
1389 : #endif
1390 : default:
1391 0 : MT_UNREACHABLE();
1392 : }
1393 1059257 : MT_lock_set(&b->theaplock);
1394 1059218 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1395 1059202 : MT_lock_unset(&b->theaplock);
1396 1059228 : if (rc != GDK_SUCCEED) {
1397 0 : goto bailout;
1398 : }
1399 1059228 : if (b->twidth < SIZEOF_VAR_T &&
1400 1059207 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1401 : /* doesn't fit in current heap, upgrade it */
1402 20 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1403 0 : goto bailout;
1404 : }
1405 : }
1406 : /* in case ATOMreplaceVAR and/or
1407 : * GDKupgradevarheap replaces a heap, we need to
1408 : * reinitialize the iterator */
1409 : {
1410 : /* save and restore minpos/maxpos */
1411 1059228 : BUN minpos = bi.minpos;
1412 1059228 : BUN maxpos = bi.maxpos;
1413 1059228 : bi = bat_iterator_nolock(b);
1414 1059228 : bi.minpos = minpos;
1415 1059228 : bi.maxpos = maxpos;
1416 : }
1417 1059228 : switch (b->twidth) {
1418 1039386 : case 1:
1419 1039386 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1420 1039386 : break;
1421 16795 : case 2:
1422 16795 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1423 16795 : break;
1424 3012 : case 4:
1425 3012 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1426 3012 : break;
1427 : #if SIZEOF_VAR_T == 8
1428 35 : case 8:
1429 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1430 35 : break;
1431 : #endif
1432 : default:
1433 0 : MT_UNREACHABLE();
1434 : }
1435 1059228 : HASHinsert_locked(&bi, updid, new);
1436 :
1437 : }
1438 2542 : if (locked) {
1439 2071 : if (b->thash)
1440 2 : nunique = b->thash->nunique;
1441 2071 : MT_rwlock_wrunlock(&b->thashlock);
1442 2071 : locked = false;
1443 : }
1444 2544 : MT_lock_set(&b->theaplock);
1445 2544 : b->tvheap->dirty = true;
1446 2544 : MT_lock_unset(&b->theaplock);
1447 24236 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1448 0 : assert(b->thash == NULL);
1449 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1450 0 : for (BUN i = 0; i < ni.count; i++) {
1451 0 : oid updid;
1452 0 : if (positions) {
1453 0 : updid = autoincr ? pos++ : *positions++;
1454 : } else {
1455 0 : updid = BUNtoid(p, i);
1456 : }
1457 :
1458 0 : if (updid < b->hseqbase ||
1459 0 : (!mayappend && updid >= hseqend)) {
1460 0 : GDKerror("id out of range\n");
1461 0 : bat_iterator_end(&ni);
1462 0 : return GDK_FAIL;
1463 : }
1464 0 : updid -= b->hseqbase;
1465 0 : if (!force && updid < b->batInserted) {
1466 0 : GDKerror("updating committed value\n");
1467 0 : bat_iterator_end(&ni);
1468 0 : return GDK_FAIL;
1469 : }
1470 0 : if (updid >= BATcount(b)) {
1471 0 : assert(mayappend);
1472 0 : if (BATcount(b) < updid &&
1473 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1474 0 : bat_iterator_end(&ni);
1475 0 : return GDK_FAIL;
1476 : }
1477 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1478 0 : bat_iterator_end(&ni);
1479 0 : return GDK_FAIL;
1480 : }
1481 0 : continue;
1482 : }
1483 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1484 : }
1485 0 : bi = bat_iterator_nolock(b);
1486 24236 : } else if (autoincr) {
1487 16119 : if (pos < b->hseqbase ||
1488 15197 : (!mayappend && pos + ni.count > hseqend)) {
1489 0 : GDKerror("id out of range\n");
1490 0 : bat_iterator_end(&ni);
1491 0 : return GDK_FAIL;
1492 : }
1493 16119 : pos -= b->hseqbase;
1494 16119 : if (!force && pos < b->batInserted) {
1495 0 : GDKerror("updating committed value\n");
1496 0 : bat_iterator_end(&ni);
1497 0 : return GDK_FAIL;
1498 : }
1499 :
1500 16119 : if (pos >= BATcount(b)) {
1501 501 : assert(mayappend);
1502 501 : bat_iterator_end(&ni);
1503 525 : if (BATcount(b) < pos &&
1504 24 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1505 : return GDK_FAIL;
1506 : }
1507 501 : return BATappend(b, n, NULL, force);
1508 : }
1509 15624 : if (pos + ni.count > BATcount(b) &&
1510 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1511 0 : bat_iterator_end(&ni);
1512 0 : return GDK_FAIL;
1513 : }
1514 15618 : bi = bat_iterator_nolock(b);
1515 :
1516 : /* we copy all of n, so if there are nils in n we get
1517 : * nils in b (and else we don't know) */
1518 15618 : b->tnil = ni.nil;
1519 : /* we may not copy over all of b, so we only know that
1520 : * there are no nils in b afterward if there weren't
1521 : * any in either b or n to begin with */
1522 15618 : b->tnonil &= ni.nonil;
1523 : /* if there is no hash, we don't start the loop, if
1524 : * there is only a persisted hash, it will get destroyed
1525 : * in the first iteration, after which there is no hash
1526 : * and the loop ends */
1527 15618 : MT_rwlock_wrlock(&b->thashlock);
1528 15626 : locked = true;
1529 15626 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1530 17 : HASHdelete_locked(&bi, i, Tloc(b, i));
1531 15609 : if (ni.type == TYPE_void) {
1532 0 : assert(b->ttype == TYPE_oid);
1533 0 : oid *o = Tloc(b, pos);
1534 0 : if (is_oid_nil(ni.tseq)) {
1535 : /* we may or may not overwrite the old
1536 : * min/max values */
1537 0 : bi.minpos = BUN_NONE;
1538 0 : bi.maxpos = BUN_NONE;
1539 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1540 0 : o[i] = oid_nil;
1541 0 : b->tnil = true;
1542 : } else {
1543 0 : oid v = ni.tseq;
1544 : /* we know min/max of n, so we know
1545 : * the new min/max of b if those of n
1546 : * are smaller/larger than the old */
1547 0 : if (bi.minpos != BUN_NONE) {
1548 0 : if (v <= BUNtoid(b, bi.minpos))
1549 0 : bi.minpos = pos;
1550 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1551 0 : bi.minpos = BUN_NONE;
1552 : }
1553 0 : if (complex_cand(n)) {
1554 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1555 0 : o[i] = *(oid *)Tpos(&ni, i);
1556 : /* last value */
1557 0 : v = o[ni.count - 1];
1558 : } else {
1559 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1560 0 : o[i] = v++;
1561 : /* last value added (not one beyond) */
1562 0 : v--;
1563 : }
1564 0 : if (bi.maxpos != BUN_NONE) {
1565 0 : if (v >= BUNtoid(b, bi.maxpos))
1566 0 : bi.maxpos = pos + ni.count - 1;
1567 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1568 0 : bi.maxpos = BUN_NONE;
1569 : }
1570 : }
1571 : } else {
1572 : /* if the extremes of n are at least as
1573 : * extreme as those of b, we can replace b's
1574 : * min/max, else we don't know what b's new
1575 : * min/max are*/
1576 15902 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1577 293 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1578 184 : bi.minpos = pos + ni.minpos;
1579 : } else {
1580 15425 : bi.minpos = BUN_NONE;
1581 : }
1582 15889 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1583 280 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1584 191 : bi.maxpos = pos + ni.maxpos;
1585 : } else {
1586 15418 : bi.maxpos = BUN_NONE;
1587 : }
1588 15609 : memcpy(Tloc(b, pos), ni.base,
1589 15609 : ni.count << b->tshift);
1590 : }
1591 : /* either we have a hash that was updated above, or we
1592 : * have no hash; we cannot have the case where there is
1593 : * only a persisted (unloaded) hash since it would have
1594 : * been destroyed above */
1595 15609 : if (b->thash != NULL) {
1596 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1597 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1598 0 : if (b->thash)
1599 0 : nunique = b->thash->nunique;
1600 : }
1601 15619 : MT_rwlock_wrunlock(&b->thashlock);
1602 15627 : locked = false;
1603 15627 : if (ni.count == BATcount(b)) {
1604 : /* if we replaced all values of b by values
1605 : * from n, we can also copy the min/max
1606 : * properties */
1607 7444 : bi.minpos = ni.minpos;
1608 7444 : bi.maxpos = ni.maxpos;
1609 7444 : if (BATtdensebi(&ni)) {
1610 : /* replaced all of b with a dense sequence */
1611 47 : MT_lock_set(&b->theaplock);
1612 47 : BATtseqbase(b, ni.tseq);
1613 47 : MT_lock_unset(&b->theaplock);
1614 : }
1615 : }
1616 : } else {
1617 160332383 : for (BUN i = 0; i < ni.count; i++) {
1618 160324261 : oid updid;
1619 160324261 : if (positions) {
1620 : /* assert(!autoincr) */
1621 160324261 : updid = *positions++;
1622 : } else {
1623 0 : updid = BUNtoid(p, i);
1624 : }
1625 :
1626 160324261 : if (updid < b->hseqbase ||
1627 160324261 : (!mayappend && updid >= hseqend)) {
1628 0 : GDKerror("id out of range\n");
1629 0 : goto bailout;
1630 : }
1631 160324261 : updid -= b->hseqbase;
1632 160324261 : if (!force && updid < b->batInserted) {
1633 0 : GDKerror("updating committed value\n");
1634 0 : goto bailout;
1635 : }
1636 :
1637 160324261 : const void *new = BUNtloc(ni, i);
1638 :
1639 160324261 : if (updid >= BATcount(b)) {
1640 31287 : assert(mayappend);
1641 31287 : if (locked) {
1642 46 : MT_rwlock_wrunlock(&b->thashlock);
1643 46 : locked = false;
1644 : }
1645 31287 : if (b->tminpos != bi.minpos ||
1646 31285 : b->tmaxpos != bi.maxpos) {
1647 11 : MT_lock_set(&b->theaplock);
1648 11 : b->tminpos = bi.minpos;
1649 11 : b->tmaxpos = bi.maxpos;
1650 11 : MT_lock_unset(&b->theaplock);
1651 : }
1652 31311 : if (BATcount(b) < updid &&
1653 24 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1654 0 : goto bailout;
1655 : }
1656 31287 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1657 0 : bat_iterator_end(&ni);
1658 0 : return GDK_FAIL;
1659 : }
1660 31287 : bi = bat_iterator_nolock(b);
1661 31287 : continue;
1662 : }
1663 :
1664 160292974 : const void *old = BUNtloc(bi, updid);
1665 160292974 : bool isnil = atomcmp(new, nil) == 0;
1666 156091906 : anynil |= isnil;
1667 156091906 : if (b->tnil &&
1668 1821 : !anynil &&
1669 1820 : atomcmp(old, nil) == 0) {
1670 : /* if old value is nil and no new
1671 : * value is, we're not sure anymore
1672 : * about the nil property, so we must
1673 : * clear it */
1674 1821 : b->tnil = false;
1675 : }
1676 156091907 : b->tnonil &= !isnil;
1677 156091907 : b->tnil |= isnil;
1678 156091907 : if (bi.maxpos != BUN_NONE) {
1679 23084 : if (!isnil &&
1680 11542 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1681 : /* new value is larger than
1682 : * previous largest */
1683 141 : bi.maxpos = updid;
1684 11440 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1685 39 : atomcmp(new, old) != 0) {
1686 : /* old value is equal to
1687 : * largest and new value is
1688 : * smaller, so we don't know
1689 : * anymore which is the
1690 : * largest */
1691 3 : bi.maxpos = BUN_NONE;
1692 : }
1693 : }
1694 156091907 : if (bi.minpos != BUN_NONE) {
1695 23088 : if (!isnil &&
1696 11544 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1697 : /* new value is smaller than
1698 : * previous smallest */
1699 6 : bi.minpos = updid;
1700 11564 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1701 26 : atomcmp(new, old) != 0) {
1702 : /* old value is equal to
1703 : * smallest and new value is
1704 : * larger, so we don't know
1705 : * anymore which is the
1706 : * smallest */
1707 2 : bi.minpos = BUN_NONE;
1708 : }
1709 : }
1710 :
1711 156091907 : if (!locked) {
1712 8103 : MT_rwlock_wrlock(&b->thashlock);
1713 8103 : locked = true;
1714 : }
1715 156091912 : HASHdelete_locked(&bi, updid, old);
1716 156191820 : switch (b->twidth) {
1717 27620798 : case 1:
1718 27620798 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1719 27620798 : break;
1720 526089 : case 2:
1721 526089 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1722 526089 : break;
1723 23978619 : case 4:
1724 23978619 : ((int *) b->theap->base)[updid] = * (int *) new;
1725 23978619 : break;
1726 98877839 : case 8:
1727 98877839 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1728 98877839 : break;
1729 5188475 : case 16:
1730 : #ifdef HAVE_HGE
1731 5188475 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1732 : #else
1733 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1734 : #endif
1735 5188475 : break;
1736 0 : default:
1737 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1738 0 : break;
1739 : }
1740 156191820 : HASHinsert_locked(&bi, updid, new);
1741 : }
1742 8122 : if (locked) {
1743 8064 : if (b->thash)
1744 5 : nunique = b->thash->nunique;
1745 8064 : MT_rwlock_wrunlock(&b->thashlock);
1746 8064 : locked = false;
1747 : }
1748 : }
1749 26292 : bat_iterator_end(&ni);
1750 26269 : MT_lock_set(&b->theaplock);
1751 26286 : if (nunique != 0)
1752 7 : b->tunique_est = (double) nunique;
1753 26286 : b->tminpos = bi.minpos;
1754 26286 : b->tmaxpos = bi.maxpos;
1755 26286 : b->theap->dirty = true;
1756 26286 : MT_lock_unset(&b->theaplock);
1757 26293 : TRC_DEBUG(ALGO,
1758 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1759 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1760 : GDKusec() - t0);
1761 : return GDK_SUCCEED;
1762 :
1763 0 : bailout:
1764 0 : bat_iterator_end(&ni);
1765 0 : if (locked) {
1766 0 : Hash *h = b->thash;
1767 0 : b->thash = NULL;
1768 0 : MT_rwlock_wrunlock(&b->thashlock);
1769 0 : doHASHdestroy(b, h);
1770 : }
1771 : return GDK_FAIL;
1772 : }
1773 :
1774 : /* replace values from b at locations specified in p with values in n */
1775 : gdk_return
1776 90458 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1777 : {
1778 90458 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1779 : }
1780 :
1781 : /* like BATreplace, but p may specify locations beyond the end of b */
1782 : gdk_return
1783 1217 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1784 : {
1785 1217 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1786 : }
1787 :
1788 : #if 0 /* not used */
1789 : /* like BATreplace, but the positions are given by an array of oid values */
1790 : gdk_return
1791 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1792 : {
1793 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1794 : }
1795 : #endif
1796 :
1797 : /* like BATreplace, but the positions are given by an array of oid
1798 : * values, and they may specify locations beyond the end of b */
1799 : gdk_return
1800 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1801 : {
1802 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1803 : }
1804 :
1805 : /*
1806 : * BAT Selections
1807 : * The BAT selectors are among the most heavily used operators.
1808 : * Their efficient implementation is therefore mandatory.
1809 : *
1810 : * BAT slice
1811 : * This function returns a horizontal slice from a BAT. It optimizes
1812 : * execution by avoiding to copy when the BAT is memory mapped (in
1813 : * this case, an independent submap is created) or else when it is
1814 : * read-only, then a VIEW bat is created as a result.
1815 : *
1816 : * If a new copy has to be created, this function takes care to
1817 : * preserve void-columns (in this case, the seqbase has to be
1818 : * recomputed in the result).
1819 : *
1820 : * The selected range is excluding the high value.
1821 : */
1822 : BAT *
1823 11394581 : BATslice(BAT *b, BUN l, BUN h)
1824 : {
1825 11394581 : BUN low = l;
1826 11394581 : BAT *bn = NULL;
1827 :
1828 11394581 : BATcheck(b, NULL);
1829 11394581 : BATiter bi = bat_iterator(b);
1830 11406451 : if (l > bi.count)
1831 : l = bi.count;
1832 11406451 : if (h > bi.count)
1833 : h = bi.count;
1834 11406451 : if (h < l)
1835 : h = l;
1836 :
1837 11406451 : if (complex_cand(b)) {
1838 : /* slicing a candidate list with exceptions */
1839 75 : struct canditer ci;
1840 75 : canditer_init(&ci, NULL, b);
1841 75 : if (b->hseqbase + l >= ci.hseq) {
1842 75 : l = b->hseqbase + l - ci.hseq;
1843 75 : h = b->hseqbase + h - ci.hseq;
1844 : } else {
1845 0 : l = 0;
1846 0 : if (b->hseqbase + h >= ci.hseq)
1847 0 : h = b->hseqbase + h - ci.hseq;
1848 : else
1849 : h = 0;
1850 : }
1851 75 : bn = canditer_slice(&ci, l, h);
1852 75 : goto doreturn;
1853 : }
1854 : /* If the source BAT is readonly, then we can obtain a VIEW
1855 : * that just reuses the memory of the source. */
1856 11406376 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1857 : /* forget about slices for bit masks: we can't deal
1858 : * with difference in alignment, so we'll just make a
1859 : * copy */
1860 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1861 : /* we use BATappend with a candidate list to easily
1862 : * copy the part of b that we need */
1863 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1864 0 : if (bn == NULL ||
1865 0 : s == NULL ||
1866 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1867 0 : BBPreclaim(bn);
1868 0 : BBPreclaim(s);
1869 0 : bn = NULL;
1870 0 : goto doreturn;
1871 : }
1872 0 : BBPunfix(s->batCacheid);
1873 0 : goto doreturn;
1874 : }
1875 11406376 : restrict_t prestricted;
1876 13272738 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1877 1857956 : BAT *pb = BBP_desc(VIEWtparent(b));
1878 1857956 : MT_lock_set(&pb->theaplock);
1879 1861506 : prestricted = pb->batRestricted;
1880 1861506 : MT_lock_unset(&pb->theaplock);
1881 : } else {
1882 : prestricted = BAT_WRITE; /* just initialize with anything */
1883 : }
1884 11414782 : if (bi.restricted == BAT_READ &&
1885 11360374 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1886 11360299 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1887 11360299 : if (bn == NULL)
1888 : goto doreturn;
1889 : } else {
1890 : /* create a new BAT and put everything into it */
1891 54483 : BUN p = l;
1892 54483 : BUN q = h;
1893 :
1894 54483 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1895 54537 : if (bn == NULL)
1896 0 : goto doreturn;
1897 :
1898 54537 : if (bn->ttype == TYPE_void) {
1899 32997 : BATsetcount(bn, h - l);
1900 32946 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1901 21540 : } else if (bn->tvheap == NULL) {
1902 14074 : assert(BATatoms[bn->ttype].atomPut == NULL);
1903 14074 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1904 14074 : (q - p) << bn->tshift);
1905 14074 : bn->theap->dirty = true;
1906 14074 : BATsetcount(bn, h - l);
1907 : } else {
1908 1721867 : for (; p < q; p++) {
1909 1714440 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1910 0 : BBPreclaim(bn);
1911 0 : bn = NULL;
1912 0 : goto doreturn;
1913 : }
1914 : }
1915 : }
1916 54359 : bn->theap->dirty = true;
1917 54359 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1918 54359 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1919 54359 : bn->tkey = bi.key || bn->batCount <= 1;
1920 54359 : bn->tnonil = bi.nonil;
1921 54359 : bn->tnil = false; /* we don't know */
1922 54359 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1923 5520 : bn->tnosorted = bi.nosorted - l;
1924 : else
1925 48839 : bn->tnosorted = 0;
1926 54359 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1927 6956 : bn->tnorevsorted = bi.norevsorted - l;
1928 : else
1929 47403 : bn->tnorevsorted = 0;
1930 54359 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1931 44037 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1932 674 : bi.nokey[0] != bi.nokey[1] &&
1933 : !bn->tkey) {
1934 674 : bn->tnokey[0] = bi.nokey[0] - l;
1935 674 : bn->tnokey[1] = bi.nokey[1] - l;
1936 : } else {
1937 53685 : bn->tnokey[0] = bn->tnokey[1] = 0;
1938 : }
1939 : }
1940 11417599 : doreturn:
1941 11417599 : bat_iterator_end(&bi);
1942 11419115 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1943 : ALGOOPTBATFMT "\n",
1944 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1945 : return bn;
1946 : }
1947 :
1948 : #define BAT_ORDERED(TPE) \
1949 : do { \
1950 : const TPE *restrict vals = Tloc(b, 0); \
1951 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1952 : if (vals[p - 1] > vals[p]) { \
1953 : b->tnosorted = p; \
1954 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1955 : goto doreturn; \
1956 : } else if (vals[p - 1] < vals[p]) { \
1957 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1958 : b->tnorevsorted = p; \
1959 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1960 : } \
1961 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1962 : b->tnokey[0] = p - 1; \
1963 : b->tnokey[1] = p; \
1964 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1965 : } \
1966 : } \
1967 : } while (0)
1968 :
1969 : #define BAT_ORDERED_FP(TPE) \
1970 : do { \
1971 : const TPE *restrict vals = Tloc(b, 0); \
1972 : TPE prev = vals[0]; \
1973 : bool prevnil = is_##TPE##_nil(prev); \
1974 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1975 : TPE next = vals[p]; \
1976 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1977 : prev = next; \
1978 : if (cmp > 0) { \
1979 : b->tnosorted = bi.nosorted = p; \
1980 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1981 : goto doreturn; \
1982 : } else if (cmp < 0) { \
1983 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1984 : b->tnorevsorted = bi.norevsorted = p; \
1985 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1986 : } \
1987 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1988 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1989 : b->tnokey[1] = bi.nokey[1] = p; \
1990 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1991 : } \
1992 : } \
1993 : } while (0)
1994 :
1995 : /* Return whether the BAT is ordered or not. If we don't know, invest
1996 : * in a scan and record the results in the bat descriptor. If during
1997 : * the scan we happen to find evidence that the BAT is not reverse
1998 : * sorted, we record the location. */
1999 : bool
2000 3041238 : BATordered(BAT *b)
2001 : {
2002 3041238 : lng t0 = GDKusec();
2003 3042439 : bool sorted;
2004 :
2005 3042439 : MT_lock_set(&b->theaplock);
2006 3043427 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2007 513481 : MT_lock_unset(&b->theaplock);
2008 513534 : return true;
2009 : }
2010 2529946 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2011 2072759 : MT_lock_unset(&b->theaplock);
2012 2072760 : return false;
2013 : }
2014 :
2015 : /* There are a few reasons why we need a lock here. It may be
2016 : * that multiple threads call this functions at the same time
2017 : * (happens a lot with mitosis/mergetable), but we only need to
2018 : * scan the bat in one thread: the others can reap the rewards
2019 : * when that one thread is done. Also, we need the heap to
2020 : * remain accessible (could have used bat_iterator for that),
2021 : * and, and this is the killer argument, we may need to make
2022 : * changes to the bat descriptor. */
2023 457187 : BATiter bi = bat_iterator_nolock(b);
2024 457187 : if (!b->tsorted && b->tnosorted == 0) {
2025 824345 : switch (ATOMbasetype(b->ttype)) {
2026 86538 : case TYPE_bte:
2027 130764458 : BAT_ORDERED(bte);
2028 : break;
2029 9050 : case TYPE_sht:
2030 1596224 : BAT_ORDERED(sht);
2031 : break;
2032 306450 : case TYPE_int:
2033 117491001 : BAT_ORDERED(int);
2034 : break;
2035 8398 : case TYPE_lng:
2036 63742620 : BAT_ORDERED(lng);
2037 : break;
2038 : #ifdef HAVE_HGE
2039 421 : case TYPE_hge:
2040 8002845 : BAT_ORDERED(hge);
2041 : break;
2042 : #endif
2043 970 : case TYPE_flt:
2044 8007227 : BAT_ORDERED_FP(flt);
2045 : break;
2046 764 : case TYPE_dbl:
2047 8350595 : BAT_ORDERED_FP(dbl);
2048 : break;
2049 : case TYPE_str:
2050 21020325 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2051 21007580 : int c;
2052 21007580 : const char *p1 = BUNtvar(bi, p - 1);
2053 21007911 : const char *p2 = BUNtvar(bi, p);
2054 21007589 : if (p1 == p2)
2055 : c = 0;
2056 2240519 : else if (p1[0] == '\200') {
2057 1561 : if (p2[0] == '\200')
2058 : c = 0;
2059 : else
2060 : c = -1;
2061 2238958 : } else if (p2[0] == '\200')
2062 : c = 1;
2063 : else
2064 2237932 : c = strcmp(p1, p2);
2065 2237932 : if (c > 0) {
2066 31677 : b->tnosorted = bi.nosorted = p;
2067 31677 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2068 31677 : goto doreturn;
2069 20975912 : } else if (c < 0) {
2070 2208807 : assert(!b->trevsorted);
2071 2208807 : if (b->tnorevsorted == 0) {
2072 15983 : b->tnorevsorted = bi.norevsorted = p;
2073 15983 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2074 : }
2075 18767105 : } else if (b->tnokey[1] == 0) {
2076 4276 : assert(!b->tkey);
2077 4276 : b->tnokey[0] = bi.nokey[0] = p - 1;
2078 4276 : b->tnokey[1] = bi.nokey[1] = p;
2079 20975912 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2080 : }
2081 : }
2082 : break;
2083 183 : default: {
2084 183 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2085 4462 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2086 4420 : int c;
2087 4420 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2088 141 : b->tnosorted = bi.nosorted = p;
2089 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2090 141 : goto doreturn;
2091 4279 : } else if (c < 0) {
2092 4186 : if (!b->trevsorted && b->tnorevsorted == 0) {
2093 84 : b->tnorevsorted = bi.norevsorted = p;
2094 84 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2095 : }
2096 93 : } else if (!b->tkey && b->tnokey[1] == 0) {
2097 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2098 8 : b->tnokey[1] = bi.nokey[1] = p;
2099 4279 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2100 : }
2101 : }
2102 : break;
2103 : }
2104 : }
2105 : /* we only get here if we completed the scan; note that
2106 : * if we didn't record evidence about *reverse*
2107 : * sortedness, we know that the BAT is also reverse
2108 : * sorted; similarly, if we didn't record evidence about
2109 : * keyness, we know the BAT is key */
2110 140753 : b->tsorted = bi.sorted = true;
2111 140753 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2112 141380 : if (!b->trevsorted && b->tnorevsorted == 0) {
2113 57106 : b->trevsorted = bi.revsorted = true;
2114 57106 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2115 : }
2116 141380 : if (!b->tkey && b->tnokey[1] == 0) {
2117 49427 : b->tkey = bi.key = true;
2118 49427 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2119 : }
2120 : }
2121 141380 : doreturn:
2122 457823 : sorted = b->tsorted;
2123 457823 : bat pbid = VIEWtparent(b);
2124 457823 : MT_lock_unset(&b->theaplock);
2125 458363 : if (pbid) {
2126 256011 : BAT *pb = BBP_desc(pbid);
2127 256011 : MT_lock_set(&pb->theaplock);
2128 256009 : if (bi.count == BATcount(pb) &&
2129 195055 : bi.h == pb->theap &&
2130 195055 : bi.type == pb->ttype) {
2131 : /* add to knowledge in parent bat */
2132 195050 : pb->tsorted |= bi.sorted;
2133 195050 : if (pb->tnosorted == 0)
2134 195050 : pb->tnosorted = bi.nosorted;
2135 195050 : pb->trevsorted |= bi.revsorted;
2136 195050 : if (pb->tnorevsorted == 0)
2137 30120 : pb->tnorevsorted = bi.norevsorted;
2138 195050 : pb->tkey |= bi.key;
2139 195050 : if (pb->tnokey[1] == 0) {
2140 172429 : pb->tnokey[0] = bi.nokey[0];
2141 172429 : pb->tnokey[1] = bi.nokey[1];
2142 : }
2143 : }
2144 256009 : MT_lock_unset(&pb->theaplock);
2145 : }
2146 : return sorted;
2147 : }
2148 :
2149 : #define BAT_REVORDERED(TPE) \
2150 : do { \
2151 : const TPE *restrict vals = Tloc(b, 0); \
2152 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2153 : if (vals[p - 1] < vals[p]) { \
2154 : b->tnorevsorted = p; \
2155 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2156 : goto doreturn; \
2157 : } \
2158 : } \
2159 : } while (0)
2160 :
2161 : #define BAT_REVORDERED_FP(TPE) \
2162 : do { \
2163 : const TPE *restrict vals = Tloc(b, 0); \
2164 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2165 : TPE prev = vals[p - 1], next = vals[p]; \
2166 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2167 : if (cmp < 0) { \
2168 : b->tnorevsorted = bi.norevsorted = p; \
2169 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2170 : goto doreturn; \
2171 : } \
2172 : } \
2173 : } while (0)
2174 :
2175 : /* Return whether the BAT is reverse ordered or not. If we don't
2176 : * know, invest in a scan and record the results in the bat
2177 : * descriptor. */
2178 : bool
2179 2926381 : BATordered_rev(BAT *b)
2180 : {
2181 2926381 : lng t0 = GDKusec();
2182 2927478 : bool revsorted;
2183 :
2184 2927478 : if (b == NULL || !ATOMlinear(b->ttype))
2185 : return false;
2186 2927471 : MT_lock_set(&b->theaplock);
2187 2927471 : if (BATcount(b) <= 1 || b->trevsorted) {
2188 340334 : MT_lock_unset(&b->theaplock);
2189 340343 : return true;
2190 : }
2191 2587137 : if (b->ttype == TYPE_void) {
2192 23241 : MT_lock_unset(&b->theaplock);
2193 23243 : return is_oid_nil(b->tseqbase);
2194 : }
2195 2563896 : if (BATtdense(b) || b->tnorevsorted > 0) {
2196 2471255 : MT_lock_unset(&b->theaplock);
2197 2471308 : return false;
2198 : }
2199 92641 : BATiter bi = bat_iterator_nolock(b);
2200 92641 : if (!b->trevsorted && b->tnorevsorted == 0) {
2201 148858 : switch (ATOMbasetype(b->ttype)) {
2202 34480 : case TYPE_bte:
2203 10849858 : BAT_REVORDERED(bte);
2204 : break;
2205 3875 : case TYPE_sht:
2206 3144338 : BAT_REVORDERED(sht);
2207 : break;
2208 32722 : case TYPE_int:
2209 1088162 : BAT_REVORDERED(int);
2210 : break;
2211 4499 : case TYPE_lng:
2212 2619754 : BAT_REVORDERED(lng);
2213 : break;
2214 : #ifdef HAVE_HGE
2215 129 : case TYPE_hge:
2216 996 : BAT_REVORDERED(hge);
2217 : break;
2218 : #endif
2219 512 : case TYPE_flt:
2220 1999 : BAT_REVORDERED_FP(flt);
2221 : break;
2222 329 : case TYPE_dbl:
2223 485331 : BAT_REVORDERED_FP(dbl);
2224 : break;
2225 16095 : default: {
2226 16095 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2227 864155 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2228 861571 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2229 13506 : b->tnorevsorted = p;
2230 13506 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2231 13506 : goto doreturn;
2232 : }
2233 : }
2234 : break;
2235 : }
2236 : }
2237 25048 : b->trevsorted = bi.revsorted = true;
2238 25048 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2239 : }
2240 25048 : doreturn:
2241 92636 : revsorted = b->trevsorted;
2242 92636 : bat pbid = VIEWtparent(b);
2243 92636 : MT_lock_unset(&b->theaplock);
2244 93010 : if (pbid) {
2245 30078 : BAT *pb = BBP_desc(pbid);
2246 30078 : MT_lock_set(&pb->theaplock);
2247 30078 : if (bi.count == BATcount(pb) &&
2248 4158 : bi.h == pb->theap &&
2249 4158 : bi.type == pb->ttype) {
2250 : /* add to knowledge in parent bat */
2251 4158 : pb->trevsorted |= bi.revsorted;
2252 4158 : if (pb->tnorevsorted == 0)
2253 4157 : pb->tnorevsorted = bi.norevsorted;
2254 : }
2255 30078 : MT_lock_unset(&pb->theaplock);
2256 : }
2257 : return revsorted;
2258 : }
2259 :
2260 : /* figure out which sort function is to be called
2261 : * stable sort can produce an error (not enough memory available),
2262 : * "quick" sort does not produce errors */
2263 : static gdk_return
2264 3567145 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2265 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2266 : bool stable)
2267 : {
2268 3567145 : if (n <= 1) /* trivially sorted */
2269 : return GDK_SUCCEED;
2270 2746141 : if (stable) {
2271 185 : if (reverse)
2272 1 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2273 : else
2274 184 : return GDKssort(h, t, base, n, hs, ts, tpe);
2275 : } else {
2276 2745956 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2277 : }
2278 2745956 : return GDK_SUCCEED;
2279 : }
2280 :
2281 : /* Sort the bat b according to both o and g. The stable and reverse
2282 : * parameters indicate whether the sort should be stable or descending
2283 : * respectively. The parameter b is required, o and g are optional
2284 : * (i.e., they may be NULL).
2285 : *
2286 : * A sorted copy is returned through the sorted parameter, the new
2287 : * ordering is returned through the order parameter, group information
2288 : * is returned through the groups parameter. All three output
2289 : * parameters may be NULL. If they're all NULL, this function does
2290 : * nothing.
2291 : *
2292 : * If o is specified, it is used to first rearrange b according to the
2293 : * order specified in o, after which b is sorted taking g into
2294 : * account.
2295 : *
2296 : * If g is specified, it indicates groups which should be individually
2297 : * ordered. Each row of consecutive equal values in g indicates a
2298 : * group which is sorted according to stable and reverse. g is used
2299 : * after the order in b was rearranged according to o.
2300 : *
2301 : * The outputs order and groups can be used in subsequent calls to
2302 : * this function. This can be used if multiple BATs need to be sorted
2303 : * together. The BATs should then be sorted in order of significance,
2304 : * and each following call should use the original unordered BAT plus
2305 : * the order and groups bat from the previous call. In this case, the
2306 : * sorted BATs are not of much use, so the sorted output parameter
2307 : * does not need to be specified.
2308 : * Apart from error checking and maintaining reference counts, sorting
2309 : * three columns (col1, col2, col3) could look like this with the
2310 : * sorted results in (col1s, col2s, col3s):
2311 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2312 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2313 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2314 : * Note that the "reverse" parameter can be different for each call.
2315 : */
2316 : gdk_return
2317 28897 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2318 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2319 : {
2320 28897 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2321 28897 : BATiter pbi;
2322 28897 : oid *restrict grps, *restrict ords, prev;
2323 28897 : BUN p, q, r;
2324 28897 : lng t0 = GDKusec();
2325 28898 : bool mkorderidx, orderidxlock = false;
2326 28898 : Heap *oidxh = NULL;
2327 :
2328 : /* we haven't implemented NILs as largest value for stable
2329 : * sort, so NILs come first for ascending and last for
2330 : * descending */
2331 28898 : assert(!stable || reverse == nilslast);
2332 :
2333 28898 : if (b == NULL) {
2334 0 : GDKerror("b must exist\n");
2335 0 : return GDK_FAIL;
2336 : }
2337 28898 : if (stable && reverse != nilslast) {
2338 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2339 0 : return GDK_FAIL;
2340 : }
2341 28898 : if (!ATOMlinear(b->ttype)) {
2342 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2343 0 : return GDK_FAIL;
2344 : }
2345 28898 : MT_lock_set(&b->theaplock);
2346 28897 : if (b->ttype == TYPE_void) {
2347 170 : b->tsorted = true;
2348 267 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2349 0 : b->trevsorted = !b->trevsorted;
2350 : }
2351 340 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2352 0 : b->tkey = !b->tkey;
2353 : }
2354 28727 : } else if (b->batCount <= 1) {
2355 8927 : if (!b->tsorted || !b->trevsorted) {
2356 31 : b->tsorted = b->trevsorted = true;
2357 : }
2358 : }
2359 28897 : MT_lock_unset(&b->theaplock);
2360 28897 : if (o != NULL &&
2361 15349 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2362 15349 : BATcount(o) != BATcount(b) || /* same size as b */
2363 5945 : (o->ttype == TYPE_void && /* no nil tail */
2364 2349 : BATcount(o) != 0 &&
2365 2349 : is_oid_nil(o->tseqbase)))) {
2366 0 : GDKerror("o must have type oid and same size as b\n");
2367 0 : return GDK_FAIL;
2368 : }
2369 28897 : if (g != NULL &&
2370 15349 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2371 15349 : !g->tsorted || /* sorted */
2372 15349 : BATcount(g) != BATcount(b) || /* same size as b */
2373 6255 : (g->ttype == TYPE_void && /* no nil tail */
2374 2659 : BATcount(g) != 0 &&
2375 2659 : is_oid_nil(g->tseqbase)))) {
2376 0 : GDKerror("g must have type oid, sorted on the tail, "
2377 : "and same size as b\n");
2378 0 : return GDK_FAIL;
2379 : }
2380 28897 : if (sorted == NULL && order == NULL) {
2381 : /* no place to put result, so we're done quickly */
2382 0 : GDKerror("no place to put the result.\n");
2383 0 : return GDK_FAIL;
2384 : }
2385 28897 : if (g == NULL && !stable) {
2386 : /* pre-ordering doesn't make sense if we're not
2387 : * subsorting and the sort is not stable */
2388 13265 : o = NULL;
2389 : }
2390 28897 : if (b->tnonil) {
2391 : /* if there are no nils, placement of nils doesn't
2392 : * matter, so set nilslast such that ordered bits can
2393 : * be used */
2394 12786 : nilslast = reverse;
2395 : }
2396 28897 : pbi = bat_iterator(NULL);
2397 29955 : if (BATcount(b) <= 1 ||
2398 19761 : (reverse == nilslast &&
2399 : (reverse ? b->trevsorted : b->tsorted) &&
2400 5214 : o == NULL && g == NULL &&
2401 2500 : (groups == NULL || BATtkey(b) ||
2402 : (reverse ? b->tsorted : b->trevsorted)))) {
2403 : /* trivially (sub)sorted, and either we don't need to
2404 : * return group information, or we can trivially
2405 : * deduce the groups */
2406 10873 : if (sorted) {
2407 9378 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2408 9378 : if (bn == NULL)
2409 0 : goto error;
2410 9378 : *sorted = bn;
2411 : }
2412 10873 : if (order) {
2413 9975 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2414 9977 : if (on == NULL)
2415 0 : goto error;
2416 9977 : *order = on;
2417 : }
2418 10875 : if (groups) {
2419 5588 : if (BATtkey(b)) {
2420 : /* singleton groups */
2421 5160 : gn = BATdense(0, 0, BATcount(b));
2422 5160 : if (gn == NULL)
2423 0 : goto error;
2424 : } else {
2425 : /* single group */
2426 428 : const oid *o = 0;
2427 428 : assert(BATcount(b) == 1 ||
2428 : (b->tsorted && b->trevsorted));
2429 428 : gn = BATconstant(0, TYPE_oid, &o, BATcount(b), TRANSIENT);
2430 428 : if (gn == NULL)
2431 0 : goto error;
2432 : }
2433 5588 : *groups = gn;
2434 : }
2435 10875 : bat_iterator_end(&pbi);
2436 10876 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2437 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2438 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2439 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2440 : ALGOOPTBATFMT " -- trivial (" LLFMT
2441 : " usec)\n",
2442 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2443 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2444 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2445 : ALGOOPTBATPAR(on), GDKusec() - t0);
2446 10876 : return GDK_SUCCEED;
2447 : }
2448 18019 : if (VIEWtparent(b)) {
2449 3808 : pb = BATdescriptor(VIEWtparent(b));
2450 3809 : if (pb != NULL &&
2451 3809 : (b->tbaseoff != pb->tbaseoff ||
2452 2945 : BATcount(b) != BATcount(pb) ||
2453 2629 : b->hseqbase != pb->hseqbase ||
2454 2618 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2455 1191 : BBPunfix(pb->batCacheid);
2456 1191 : pb = NULL;
2457 : }
2458 : } else {
2459 : pb = b;
2460 : }
2461 18020 : bat_iterator_end(&pbi);
2462 18020 : pbi = bat_iterator(pb);
2463 : /* when we will create an order index if it doesn't already exist */
2464 18022 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2465 18022 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2466 6099 : (void) BATcheckorderidx(pb);
2467 6098 : MT_lock_set(&pb->batIdxLock);
2468 6099 : if (pb->torderidx) {
2469 57 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2470 : /* we can use the order index */
2471 57 : oidxh = pb->torderidx;
2472 57 : HEAPincref(oidxh);
2473 : }
2474 : mkorderidx = false;
2475 6042 : } else if (b != pb) {
2476 : /* don't build orderidx on parent bat */
2477 : mkorderidx = false;
2478 5121 : } else if (mkorderidx) {
2479 : /* keep lock when going to create */
2480 4425 : orderidxlock = true;
2481 : }
2482 6099 : if (!orderidxlock)
2483 1674 : MT_lock_unset(&pb->batIdxLock);
2484 : }
2485 18022 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2486 : /* there is an order index that we can use */
2487 57 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2488 57 : if (on == NULL)
2489 0 : goto error;
2490 57 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2491 57 : BATsetcount(on, BATcount(b));
2492 57 : HEAPdecref(oidxh, false);
2493 57 : oidxh = NULL;
2494 57 : on->tkey = true;
2495 57 : on->tnil = false;
2496 57 : on->tnonil = true;
2497 57 : on->tsorted = on->trevsorted = false;
2498 57 : on->tseqbase = oid_nil;
2499 57 : if (sorted || groups) {
2500 57 : bn = BATproject(on, b);
2501 57 : if (bn == NULL)
2502 0 : goto error;
2503 57 : bn->tsorted = true;
2504 57 : if (groups) {
2505 4 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2506 0 : goto error;
2507 4 : if (sorted &&
2508 4 : (*groups)->tkey &&
2509 : g == NULL) {
2510 : /* if new groups bat is key
2511 : * and since there is no input
2512 : * groups bat, we know the
2513 : * result bat is key */
2514 4 : bn->tkey = true;
2515 : }
2516 : }
2517 57 : if (sorted)
2518 57 : *sorted = bn;
2519 : else {
2520 0 : BBPunfix(bn->batCacheid);
2521 0 : bn = NULL;
2522 : }
2523 : }
2524 57 : if (order)
2525 7 : *order = on;
2526 : else {
2527 50 : BBPunfix(on->batCacheid);
2528 50 : on = NULL;
2529 : }
2530 57 : bat_iterator_end(&pbi);
2531 57 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2532 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2533 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2534 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2535 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2536 : " usec)\n",
2537 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2538 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2539 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2540 : ALGOOPTBATPAR(on), GDKusec() - t0);
2541 57 : if (pb != NULL && pb != b)
2542 53 : BBPunfix(pb->batCacheid);
2543 57 : return GDK_SUCCEED;
2544 11288 : } else if (oidxh) {
2545 0 : HEAPdecref(oidxh, false);
2546 0 : oidxh = NULL;
2547 : }
2548 17964 : if (o) {
2549 10728 : bn = BATproject(o, b);
2550 10728 : if (bn == NULL)
2551 0 : goto error;
2552 10728 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2553 4394 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2554 2197 : BBPunfix(bn->batCacheid);
2555 2197 : bn = b2;
2556 : }
2557 10728 : if (pb) {
2558 10192 : bat_iterator_end(&pbi);
2559 10192 : if (pb != b)
2560 1304 : BBPunfix(pb->batCacheid);
2561 10192 : pbi = bat_iterator(NULL);
2562 10192 : pb = NULL;
2563 : }
2564 : } else {
2565 7236 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2566 : }
2567 17960 : if (bn == NULL)
2568 0 : goto error;
2569 17960 : if (order) {
2570 : /* prepare order bat */
2571 15079 : if (o) {
2572 : /* make copy of input so that we can refine it;
2573 : * copy can be read-only if we take the shortcut
2574 : * below in the case g is "key" */
2575 9105 : on = COLcopy(o, TYPE_oid,
2576 9105 : g == NULL ||
2577 9105 : !(g->tkey || g->ttype == TYPE_void),
2578 : TRANSIENT);
2579 9105 : if (on == NULL)
2580 0 : goto error;
2581 9105 : BAThseqbase(on, b->hseqbase);
2582 9105 : on->tminpos = BUN_NONE;
2583 9105 : on->tmaxpos = BUN_NONE;
2584 : } else {
2585 : /* create new order */
2586 5974 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2587 5977 : if (on == NULL)
2588 0 : goto error;
2589 5977 : ords = (oid *) Tloc(on, 0);
2590 31086905 : for (p = 0, q = BATcount(bn); p < q; p++)
2591 31080928 : ords[p] = p + b->hseqbase;
2592 5977 : BATsetcount(on, BATcount(bn));
2593 5977 : on->tkey = true;
2594 5977 : on->tnil = false;
2595 5977 : on->tnonil = true;
2596 : }
2597 : /* COLcopy above can create TYPE_void */
2598 15082 : if (on->ttype != TYPE_void) {
2599 14366 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2600 14366 : on->tseqbase = oid_nil; /* and hence not dense */
2601 14366 : on->tnosorted = on->tnorevsorted = 0;
2602 : }
2603 15082 : *order = on;
2604 15082 : ords = (oid *) Tloc(on, 0);
2605 : } else {
2606 : ords = NULL;
2607 : }
2608 17963 : if (g) {
2609 10728 : if (g->tkey || g->ttype == TYPE_void) {
2610 : /* if g is "key", all groups are size 1, so no
2611 : * subsorting needed */
2612 4892 : if (sorted) {
2613 4474 : *sorted = bn;
2614 : } else {
2615 418 : BBPunfix(bn->batCacheid);
2616 418 : bn = NULL;
2617 : }
2618 4892 : if (order) {
2619 3737 : *order = on;
2620 3737 : if (o) {
2621 : /* we can inherit sortedness
2622 : * after all */
2623 3737 : on->tsorted = o->tsorted;
2624 3737 : on->trevsorted = o->trevsorted;
2625 3737 : if (o->tnosorted)
2626 56 : on->tnosorted = o->tnosorted;
2627 3737 : if (o->tnorevsorted)
2628 74 : on->tnorevsorted = o->tnorevsorted;
2629 : } else {
2630 : /* we didn't rearrange, so
2631 : * still sorted */
2632 0 : on->tsorted = true;
2633 0 : on->trevsorted = false;
2634 : }
2635 3737 : if (BATcount(on) <= 1) {
2636 0 : on->tsorted = true;
2637 0 : on->trevsorted = true;
2638 : }
2639 : }
2640 4892 : if (groups) {
2641 2807 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2642 2807 : if (gn == NULL)
2643 0 : goto error;
2644 2807 : *groups = gn;
2645 : }
2646 4892 : bat_iterator_end(&pbi);
2647 4892 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2648 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2649 : ",reverse=%d,nilslast=%d,stable=%d"
2650 : ") = (" ALGOOPTBATFMT ","
2651 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2652 : " -- key group (" LLFMT " usec)\n",
2653 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2654 : ALGOBATPAR(g), reverse, nilslast,
2655 : stable, ALGOOPTBATPAR(bn),
2656 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2657 : GDKusec() - t0);
2658 4892 : if (pb != NULL && pb != b)
2659 0 : BBPunfix(pb->batCacheid);
2660 4892 : return GDK_SUCCEED;
2661 : }
2662 5836 : assert(g->ttype == TYPE_oid);
2663 5836 : grps = (oid *) Tloc(g, 0);
2664 5836 : prev = grps[0];
2665 5836 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2666 0 : goto error;
2667 44939108 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2668 44933272 : if (grps[p] != prev) {
2669 : /* sub sort [r,p) */
2670 6809405 : if (do_sort(Tloc(bn, r),
2671 3254662 : ords ? ords + r : NULL,
2672 3554743 : bn->tvheap ? bn->tvheap->base : NULL,
2673 3554743 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2674 3554743 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2675 0 : goto error;
2676 3554743 : r = p;
2677 3554743 : prev = grps[p];
2678 : }
2679 : }
2680 : /* sub sort [r,q) */
2681 11204 : if (do_sort(Tloc(bn, r),
2682 5368 : ords ? ords + r : NULL,
2683 5836 : bn->tvheap ? bn->tvheap->base : NULL,
2684 5836 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2685 5836 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2686 0 : goto error;
2687 : /* if single group (r==0) the result is (rev)sorted,
2688 : * otherwise (maybe) not */
2689 5836 : bn->tsorted = r == 0 && !reverse && !nilslast;
2690 11638 : bn->trevsorted = r == 0 && reverse && nilslast;
2691 : } else {
2692 7235 : Heap *m = NULL;
2693 : /* only invest in creating an order index if the BAT
2694 : * is persistent */
2695 7235 : if (mkorderidx) {
2696 4424 : assert(orderidxlock);
2697 4424 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2698 : ords == NULL) {
2699 0 : ords = (oid *) m->base + ORDERIDXOFF;
2700 0 : if (o && o->ttype != TYPE_void)
2701 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2702 0 : else if (o)
2703 0 : for (p = 0, q = BATcount(o); p < q; p++)
2704 0 : ords[p] = p + o->tseqbase;
2705 : else
2706 0 : for (p = 0, q = BATcount(b); p < q; p++)
2707 0 : ords[p] = p + b->hseqbase;
2708 : }
2709 : }
2710 7236 : if ((reverse != nilslast ||
2711 13708 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2712 13140 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2713 6569 : do_sort(Tloc(bn, 0),
2714 : ords,
2715 6569 : bn->tvheap ? bn->tvheap->base : NULL,
2716 6569 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2717 6569 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2718 0 : if (m != NULL) {
2719 0 : HEAPfree(m, true);
2720 0 : GDKfree(m);
2721 : }
2722 0 : goto error;
2723 : }
2724 7235 : bn->tsorted = !reverse && !nilslast;
2725 7235 : bn->trevsorted = reverse && nilslast;
2726 7235 : if (m != NULL) {
2727 4423 : assert(orderidxlock);
2728 4423 : if (pb->torderidx == NULL) {
2729 4423 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2730 4423 : memcpy((oid *) m->base + ORDERIDXOFF,
2731 : ords,
2732 4423 : pbi.count * sizeof(oid));
2733 : }
2734 4423 : pb->torderidx = m;
2735 4423 : persistOIDX(pb);
2736 : } else {
2737 0 : HEAPfree(m, true);
2738 0 : GDKfree(m);
2739 : }
2740 : }
2741 : }
2742 13071 : if (orderidxlock) {
2743 4423 : MT_lock_unset(&pb->batIdxLock);
2744 4423 : orderidxlock = false;
2745 : }
2746 13072 : bn->theap->dirty = true;
2747 13072 : bn->tnosorted = 0;
2748 13072 : bn->tnorevsorted = 0;
2749 13072 : bn->tnokey[0] = bn->tnokey[1] = 0;
2750 13072 : bn->tminpos = BUN_NONE;
2751 13072 : bn->tmaxpos = BUN_NONE;
2752 13072 : if (groups) {
2753 7754 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2754 0 : goto error;
2755 7754 : if ((*groups)->tkey &&
2756 926 : (g == NULL || (g->tsorted && g->trevsorted))) {
2757 : /* if new groups bat is key and the input
2758 : * group bat has a single value (both sorted
2759 : * and revsorted), we know the result bat is
2760 : * key */
2761 1128 : bn->tkey = true;
2762 : }
2763 : }
2764 :
2765 13072 : bat_iterator_end(&pbi);
2766 13071 : if (sorted)
2767 9409 : *sorted = bn;
2768 : else {
2769 3662 : BBPunfix(bn->batCacheid);
2770 3662 : bn = NULL;
2771 : }
2772 :
2773 13071 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2774 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2775 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2776 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2777 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2778 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2779 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2780 : g ? "grouped " : "", GDKusec() - t0);
2781 13071 : if (pb && pb != b)
2782 1260 : BBPunfix(pb->batCacheid);
2783 : return GDK_SUCCEED;
2784 :
2785 0 : error:
2786 0 : bat_iterator_end(&pbi);
2787 0 : if (orderidxlock)
2788 0 : MT_lock_unset(&pb->batIdxLock);
2789 0 : if (oidxh)
2790 0 : HEAPdecref(oidxh, false);
2791 0 : BBPreclaim(bn);
2792 0 : if (pb && pb != b)
2793 0 : BBPunfix(pb->batCacheid);
2794 0 : BBPreclaim(on);
2795 0 : if (sorted)
2796 0 : *sorted = NULL;
2797 0 : if (order)
2798 0 : *order = NULL;
2799 0 : if (groups)
2800 0 : *groups = NULL;
2801 : return GDK_FAIL;
2802 : }
2803 :
2804 : /* return a new BAT of length n with seqbase hseq, and the constant v
2805 : * in the tail */
2806 : BAT *
2807 1483950 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2808 : {
2809 1483950 : BAT *bn;
2810 1483950 : void *restrict p;
2811 1483950 : BUN i;
2812 1483950 : lng t0 = 0;
2813 :
2814 1483950 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2815 1483950 : if (v == NULL)
2816 : return NULL;
2817 1483950 : bn = COLnew(hseq, tailtype, n, role);
2818 1485864 : if (bn != NULL && n > 0) {
2819 77891 : p = Tloc(bn, 0);
2820 77891 : switch (ATOMstorage(tailtype)) {
2821 6 : case TYPE_void:
2822 6 : v = &oid_nil;
2823 6 : BATtseqbase(bn, oid_nil);
2824 6 : break;
2825 0 : case TYPE_msk:
2826 0 : if (*(msk*)v) {
2827 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2828 0 : if (n & 31) {
2829 0 : uint32_t *m = p;
2830 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2831 : }
2832 : } else
2833 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2834 : break;
2835 9484 : case TYPE_bte:
2836 9484 : memset(p, *(bte*)v, n);
2837 9484 : break;
2838 : case TYPE_sht:
2839 6163444 : for (i = 0; i < n; i++)
2840 6145104 : ((sht *) p)[i] = *(sht *) v;
2841 : break;
2842 : case TYPE_int:
2843 : case TYPE_flt:
2844 : assert(sizeof(int) == sizeof(flt));
2845 201471531 : for (i = 0; i < n; i++)
2846 201466541 : ((int *) p)[i] = *(int *) v;
2847 : break;
2848 : case TYPE_lng:
2849 : case TYPE_dbl:
2850 : assert(sizeof(lng) == sizeof(dbl));
2851 234337980 : for (i = 0; i < n; i++)
2852 234303350 : ((lng *) p)[i] = *(lng *) v;
2853 : break;
2854 : #ifdef HAVE_HGE
2855 : case TYPE_hge:
2856 23636861 : for (i = 0; i < n; i++)
2857 23635388 : ((hge *) p)[i] = *(hge *) v;
2858 : break;
2859 : #endif
2860 : case TYPE_uuid:
2861 200047 : for (i = 0; i < n; i++)
2862 200038 : ((uuid *) p)[i] = *(uuid *) v;
2863 : break;
2864 8786 : case TYPE_str:
2865 : /* insert the first value, then just copy the
2866 : * offset lots of times */
2867 8786 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2868 0 : BBPreclaim(bn);
2869 0 : return NULL;
2870 : }
2871 8804 : char val[sizeof(var_t)];
2872 8804 : memcpy(val, Tloc(bn, 0), bn->twidth);
2873 8804 : if (bn->twidth == 1 && n > 1) {
2874 : /* single byte value: we have a
2875 : * function for that */
2876 5013 : memset(Tloc(bn, 1), val[0], n - 1);
2877 : } else {
2878 3791 : char *p = Tloc(bn, 0);
2879 3811 : for (i = 1; i < n; i++) {
2880 20 : p += bn->twidth;
2881 20 : memcpy(p, val, bn->twidth);
2882 : }
2883 : }
2884 : break;
2885 : default:
2886 333808 : for (i = 0; i < n; i++)
2887 333637 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2888 0 : BBPreclaim(bn);
2889 0 : return NULL;
2890 : }
2891 : break;
2892 : }
2893 77907 : bn->theap->dirty = true;
2894 77907 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2895 77927 : BATsetcount(bn, n);
2896 78160 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2897 78160 : bn->tnonil = !bn->tnil;
2898 78160 : bn->tkey = BATcount(bn) <= 1;
2899 : }
2900 1486133 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2901 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2902 : return bn;
2903 : }
2904 :
2905 : /*
2906 : * BAT Aggregates
2907 : *
2908 : * We retain the size() and card() aggregate results in the column
2909 : * descriptor. We would like to have such functionality in an
2910 : * extensible way for many aggregates, for DD (1) we do not want to
2911 : * change the binary BAT format on disk and (2) aggr and size are the
2912 : * most relevant aggregates.
2913 : *
2914 : * It is all hacked into the aggr[3] records; three adjacent integers
2915 : * that were left over in the column record. We refer to these as if
2916 : * it where an int aggr[3] array. The below routines set and retrieve
2917 : * the aggregate values from the tail of the BAT, as many
2918 : * aggregate-manipulating BAT functions work on tail.
2919 : *
2920 : * The rules are as follows: aggr[0] contains the alignment ID of the
2921 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2922 : * equal to b->talign, the precomputed aggregate values in
2923 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2924 : * of them may be set at the time. This is encoded by the value
2925 : * int_nil, which cannot occur in these two aggregates.
2926 : *
2927 : * This was now extended to record the property whether we know there
2928 : * is a nil value present by mis-using the highest bits of both
2929 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2930 : */
2931 :
2932 : void
2933 43677754 : PROPdestroy_nolock(BAT *b)
2934 : {
2935 43677754 : PROPrec *p = b->tprops;
2936 43677754 : PROPrec *n;
2937 :
2938 43677754 : b->tprops = NULL;
2939 43701338 : while (p) {
2940 4473 : n = p->next;
2941 4473 : assert(p->id != (enum prop_t) 20);
2942 4473 : VALclear(&p->v);
2943 4473 : GDKfree(p);
2944 4473 : p = n;
2945 : }
2946 43696865 : }
2947 :
2948 : void
2949 397 : PROPdestroy(BAT *b)
2950 : {
2951 397 : MT_lock_set(&b->theaplock);
2952 397 : PROPdestroy_nolock(b);
2953 397 : MT_lock_unset(&b->theaplock);
2954 397 : }
2955 :
2956 : ValPtr
2957 223631819 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2958 : {
2959 223631819 : PROPrec *p;
2960 :
2961 223631819 : p = b->tprops;
2962 223708066 : while (p && p->id != idx)
2963 76247 : p = p->next;
2964 223631819 : return p ? &p->v : NULL;
2965 : }
2966 :
2967 : void
2968 346011 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2969 : {
2970 346011 : PROPrec *prop = b->tprops, *prev = NULL;
2971 :
2972 346246 : while (prop) {
2973 346119 : if (prop->id == idx) {
2974 345884 : if (prev)
2975 106 : prev->next = prop->next;
2976 : else
2977 345778 : b->tprops = prop->next;
2978 345884 : VALclear(&prop->v);
2979 345884 : GDKfree(prop);
2980 345884 : return;
2981 : }
2982 235 : prev = prop;
2983 235 : prop = prop->next;
2984 : }
2985 : }
2986 :
2987 : ValPtr
2988 350394 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
2989 : {
2990 350394 : PROPrec *p;
2991 :
2992 350394 : p = b->tprops;
2993 357976 : while (p && p->id != idx)
2994 7582 : p = p->next;
2995 350394 : if (p == NULL) {
2996 350387 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
2997 : /* properties are hints, so if we can't create
2998 : * one we ignore the error */
2999 0 : GDKclrerr();
3000 0 : return NULL;
3001 : }
3002 350387 : p->id = idx;
3003 350387 : p->next = b->tprops;
3004 350387 : p->v.vtype = 0;
3005 350387 : b->tprops = p;
3006 : } else {
3007 7 : VALclear(&p->v);
3008 : }
3009 350394 : if (VALinit(&p->v, type, v) == NULL) {
3010 : /* failed to initialize, so remove property */
3011 0 : BATrmprop_nolock(b, idx);
3012 0 : GDKclrerr();
3013 0 : p = NULL;
3014 : }
3015 0 : return p ? &p->v : NULL;
3016 : }
3017 :
3018 : ValPtr
3019 3143637 : BATgetprop(BAT *b, enum prop_t idx)
3020 : {
3021 3143637 : ValPtr p;
3022 :
3023 3143637 : MT_lock_set(&b->theaplock);
3024 3147147 : p = BATgetprop_nolock(b, idx);
3025 3146944 : MT_lock_unset(&b->theaplock);
3026 3147902 : return p;
3027 : }
3028 :
3029 : ValPtr
3030 4281 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3031 : {
3032 4281 : ValPtr p;
3033 4281 : MT_lock_set(&b->theaplock);
3034 4281 : p = BATsetprop_nolock(b, idx, type, v);
3035 4281 : MT_lock_unset(&b->theaplock);
3036 4281 : return p;
3037 : }
3038 :
3039 : void
3040 2 : BATrmprop(BAT *b, enum prop_t idx)
3041 : {
3042 2 : MT_lock_set(&b->theaplock);
3043 2 : BATrmprop_nolock(b, idx);
3044 2 : MT_lock_unset(&b->theaplock);
3045 2 : }
3046 :
3047 : /*
3048 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3049 : * non-nil tail value.
3050 : * This function does not fail (the callers currently don't check for failure).
3051 : */
3052 : BUN
3053 2389 : BATcount_no_nil(BAT *b, BAT *s)
3054 : {
3055 2389 : BUN cnt = 0;
3056 2389 : const void *restrict p, *restrict nil;
3057 2389 : const char *restrict base;
3058 2389 : int t;
3059 2389 : int (*cmp)(const void *, const void *);
3060 2389 : struct canditer ci;
3061 2389 : oid hseq;
3062 :
3063 2389 : BATcheck(b, 0);
3064 :
3065 2389 : hseq = b->hseqbase;
3066 2389 : canditer_init(&ci, b, s);
3067 2392 : if (b->tnonil)
3068 1735 : return ci.ncand;
3069 657 : BATiter bi = bat_iterator(b);
3070 656 : p = bi.base;
3071 656 : t = ATOMbasetype(bi.type);
3072 656 : switch (t) {
3073 0 : case TYPE_void:
3074 0 : cnt = ci.ncand * BATtdensebi(&bi);
3075 0 : break;
3076 0 : case TYPE_msk:
3077 0 : cnt = ci.ncand;
3078 0 : break;
3079 38 : case TYPE_bte:
3080 21878 : CAND_LOOP(&ci)
3081 21840 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3082 : break;
3083 31 : case TYPE_sht:
3084 4403 : CAND_LOOP(&ci)
3085 4372 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3086 : break;
3087 372 : case TYPE_int:
3088 5852451 : CAND_LOOP(&ci)
3089 5852078 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3090 : break;
3091 97 : case TYPE_lng:
3092 156901 : CAND_LOOP(&ci)
3093 156804 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3094 : break;
3095 : #ifdef HAVE_HGE
3096 0 : case TYPE_hge:
3097 0 : CAND_LOOP(&ci)
3098 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3099 : break;
3100 : #endif
3101 0 : case TYPE_flt:
3102 0 : CAND_LOOP(&ci)
3103 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3104 : break;
3105 27 : case TYPE_dbl:
3106 68 : CAND_LOOP(&ci)
3107 41 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3108 : break;
3109 1 : case TYPE_uuid:
3110 4 : CAND_LOOP(&ci)
3111 3 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3112 : break;
3113 90 : case TYPE_str:
3114 90 : base = bi.vh->base;
3115 90 : switch (bi.width) {
3116 58 : case 1:
3117 7072 : CAND_LOOP(&ci)
3118 7014 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3119 : break;
3120 30 : case 2:
3121 22755 : CAND_LOOP(&ci)
3122 22725 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3123 : break;
3124 2 : case 4:
3125 2485 : CAND_LOOP(&ci)
3126 2483 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3127 : break;
3128 : #if SIZEOF_VAR_T == 8
3129 0 : case 8:
3130 0 : CAND_LOOP(&ci)
3131 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3132 : break;
3133 : #endif
3134 : default:
3135 0 : MT_UNREACHABLE();
3136 : }
3137 : break;
3138 0 : default:
3139 0 : nil = ATOMnilptr(t);
3140 0 : cmp = ATOMcompare(t);
3141 0 : if (nil == NULL) {
3142 0 : cnt = ci.ncand;
3143 0 : } else if (b->tvheap) {
3144 0 : base = b->tvheap->base;
3145 0 : CAND_LOOP(&ci)
3146 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3147 : } else {
3148 0 : CAND_LOOP(&ci)
3149 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3150 : }
3151 : break;
3152 : }
3153 657 : if (cnt == bi.count) {
3154 458 : MT_lock_set(&b->theaplock);
3155 458 : if (cnt == BATcount(b) && bi.h == b->theap) {
3156 : /* we learned something */
3157 458 : b->tnonil = true;
3158 458 : assert(!b->tnil);
3159 458 : b->tnil = false;
3160 : }
3161 458 : bat pbid = VIEWtparent(b);
3162 458 : MT_lock_unset(&b->theaplock);
3163 458 : if (pbid) {
3164 186 : BAT *pb = BATdescriptor(pbid);
3165 187 : if (pb) {
3166 187 : MT_lock_set(&pb->theaplock);
3167 187 : if (cnt == BATcount(pb) &&
3168 11 : bi.h == pb->theap &&
3169 11 : !pb->tnonil) {
3170 11 : pb->tnonil = true;
3171 11 : assert(!pb->tnil);
3172 11 : pb->tnil = false;
3173 : }
3174 187 : MT_lock_unset(&pb->theaplock);
3175 187 : BBPunfix(pb->batCacheid);
3176 : }
3177 : }
3178 : }
3179 658 : bat_iterator_end(&bi);
3180 658 : return cnt;
3181 : }
|