Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 56916157 : unshare_varsized_heap(BAT *b)
27 : {
28 56916157 : if (ATOMvarsized(b->ttype) &&
29 22237427 : b->tvheap->parentid != b->batCacheid) {
30 1029 : Heap *h = GDKmalloc(sizeof(Heap));
31 1035 : if (h == NULL)
32 : return GDK_FAIL;
33 1035 : MT_thread_setalgorithm("unshare vheap");
34 2063 : *h = (Heap) {
35 1029 : .parentid = b->batCacheid,
36 1034 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 1029 : strconcat_len(h->filename, sizeof(h->filename),
40 1029 : BBP_physical(b->batCacheid), ".theap", NULL);
41 1035 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 1033 : MT_lock_set(&b->theaplock);
47 1034 : Heap *oh = b->tvheap;
48 1034 : b->tvheap = h;
49 1034 : MT_lock_unset(&b->theaplock);
50 1035 : BBPrelease(oh->parentid);
51 1035 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 86010 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 86010 : size_t toff = ~(size_t) 0; /* tail offset */
66 86010 : BUN p, r; /* loop variables */
67 86010 : const void *tp = NULL; /* tail value pointer */
68 86010 : var_t v;
69 86010 : size_t off; /* offset within n's string heap */
70 86010 : BUN cnt = ci->ncand;
71 86010 : BUN oldcnt = BATcount(b);
72 :
73 86010 : assert(b->ttype == TYPE_str);
74 86010 : assert(b->tbaseoff == 0);
75 86010 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 86010 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 86010 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 86004 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 20517 : toff = 0;
85 20517 : MT_thread_setalgorithm("shared vheap");
86 65487 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 51148 : MT_lock_set(&b->theaplock);
90 51307 : bat bid = b->tvheap->parentid;
91 51307 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 51436 : HEAPincref(ni->vh);
93 51424 : b->tvheap = ni->vh;
94 51424 : b->tascii = ni->ascii;
95 51424 : MT_lock_unset(&b->theaplock);
96 51448 : BBPretain(ni->vh->parentid);
97 51418 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 51418 : toff = 0;
100 51418 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 15373 : if (b->tvheap->parentid != b->batCacheid &&
106 1031 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 14342 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 91 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5924950 : for (int i = 0; i < 1024; i++) {
120 5919104 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5919198 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5919198 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5846 : len = (len + 512) / 1024; /* rounded average length */
125 5846 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5846 : BUN ecnt = ci->ncand;
129 5846 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 71 : ecnt = (BUN)ni->b->tunique_est;
131 5846 : if (r < ecnt)
132 933 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4913 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5846 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5840 : if (oldcnt == 0) {
140 5801 : toff = 0;
141 5801 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 39 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 39 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5839 : MT_lock_set(&b->theaplock);
148 5839 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5833 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5833 : b->tvheap->free = toff + ni->vhfree;
154 5833 : b->tvheap->dirty = true;
155 5833 : b->tascii &= ni->ascii;
156 5833 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 77772 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 77715 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 86311 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 86209 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 77436 : MT_thread_setalgorithm("memcpy offsets");
177 77584 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 8773 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 185 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 185 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 185 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 185 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 185 : MT_thread_setalgorithm("copy offset values");
197 185 : r = b->batCount;
198 3529010 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3528443 : p = canditer_next(ci) - ni->b->hseqbase;
200 3528444 : switch (ni->width) {
201 106 : case 1:
202 106 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 106 : break;
204 4658 : case 2:
205 4658 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 4658 : break;
207 3523679 : case 4:
208 3523679 : v = (var_t) tip[p];
209 3523679 : break;
210 : #if SIZEOF_VAR_T == 8
211 1 : case 8:
212 1 : v = (var_t) tlp[p];
213 1 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3528444 : v = (var_t) ((size_t) v + toff);
219 3528444 : assert(v >= GDK_VAROFFSET);
220 3528444 : assert((size_t) v < b->tvheap->free);
221 3528444 : switch (b->twidth) {
222 4661 : case 1:
223 4661 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 4661 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 4661 : break;
226 761 : case 2:
227 761 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 761 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 761 : break;
230 3523022 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3523022 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3523022 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3523022 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3528444 : MT_UNREACHABLE();
243 : }
244 : }
245 8588 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 8537 : r = b->batCount;
253 8537 : oid hseq = ni->b->hseqbase;
254 8537 : MT_thread_setalgorithm("insert string values");
255 11752776 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 11735192 : p = canditer_next(ci) - hseq;
257 11626911 : tp = BUNtvar(*ni, p);
258 11626911 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 11735246 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 51 : r = b->batCount;
271 51 : MT_thread_setalgorithm("insert string values with check");
272 4655 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 4553 : p = canditer_next(ci) - ni->b->hseqbase;
274 4553 : off = BUNtvaroff(*ni, p); /* the offset */
275 4553 : tp = ni->vh->base + off; /* the string */
276 4553 : if (off < b->tvheap->free &&
277 4553 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 686 : v = (var_t) off;
284 686 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 682 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 682 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 682 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 682 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3867 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 4553 : r++;
313 : }
314 : }
315 86412 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 86059 : MT_rwlock_wrlock(&b->thashlock);
317 86462 : MT_lock_set(&b->theaplock);
318 86339 : BATsetcount(b, oldcnt + ci->ncand);
319 86389 : assert(b->batCapacity >= b->batCount);
320 86389 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 89438 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 3010 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 86428 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 86428 : MT_rwlock_wrunlock(&b->thashlock);
327 86430 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 510 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 510 : BUN cnt = ci->ncand, r;
339 510 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 510 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 510 : assert(b->twidth == ni->width);
345 510 : assert(b->twidth == SIZEOF_VAR_T);
346 510 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 511 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 511 : if (mayshare) {
360 513 : MT_lock_set(&b->theaplock);
361 516 : if (BATcount(b) == 0 &&
362 232 : b->batRole == TRANSIENT &&
363 148 : ni->restricted == BAT_READ &&
364 148 : b->tvheap != ni->vh) {
365 : /* if b is still empty, in the transient farm,
366 : * and n is read-only, we replace b's vheap with
367 : * a reference to n's */
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : BBPretain(ni->vh->parentid);
373 148 : if (bid != b->batCacheid)
374 0 : BBPrelease(bid);
375 : }
376 516 : MT_lock_unset(&b->theaplock);
377 : }
378 516 : if (b->tvheap == ni->vh) {
379 : /* if b and n use the same vheap, we only need to copy
380 : * the offsets from n to b */
381 342 : if (ci->tpe == cand_dense) {
382 : /* fast memcpy since we copy a consecutive
383 : * chunk of memory */
384 342 : memcpy(Tloc(b, BATcount(b)),
385 342 : (const var_t *) ni->base + (ci->seq - hseq),
386 342 : cnt << b->tshift);
387 : } else {
388 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
389 0 : const var_t *restrict src = (const var_t *) ni->base;
390 0 : while (cnt > 0) {
391 0 : cnt--;
392 0 : *dst++ = src[canditer_next(ci) - hseq];
393 : }
394 : }
395 342 : MT_rwlock_wrlock(&b->thashlock);
396 342 : MT_lock_set(&b->theaplock);
397 342 : BATsetcount(b, BATcount(b) + ci->ncand);
398 342 : MT_lock_unset(&b->theaplock);
399 : /* maintain hash table */
400 342 : for (BUN i = BATcount(b) - ci->ncand;
401 342 : b->thash && i < BATcount(b);
402 0 : i++) {
403 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
404 : }
405 342 : BUN nunique = b->thash ? b->thash->nunique : 0;
406 342 : MT_rwlock_wrunlock(&b->thashlock);
407 342 : if (nunique != 0) {
408 0 : MT_lock_set(&b->theaplock);
409 0 : b->tunique_est = (double) nunique;
410 0 : MT_lock_unset(&b->theaplock);
411 : }
412 342 : return GDK_SUCCEED;
413 : }
414 : /* b and n do not share their vheap, so we need to copy data */
415 174 : if (b->tvheap->parentid != b->batCacheid) {
416 : /* if b shares its vheap with some other bat, unshare it */
417 18 : Heap *h = GDKmalloc(sizeof(Heap));
418 18 : if (h == NULL) {
419 : return GDK_FAIL;
420 : }
421 36 : *h = (Heap) {
422 18 : .parentid = b->batCacheid,
423 18 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
424 : .refs = ATOMIC_VAR_INIT(1),
425 : };
426 18 : strconcat_len(h->filename, sizeof(h->filename),
427 18 : BBP_physical(b->batCacheid), ".theap", NULL);
428 18 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
429 0 : HEAPfree(h, true);
430 0 : GDKfree(h);
431 0 : return GDK_FAIL;
432 : }
433 18 : MT_lock_set(&b->theaplock);
434 18 : Heap *oh = b->tvheap;
435 18 : b->tvheap = h;
436 18 : MT_lock_unset(&b->theaplock);
437 18 : if (oh->parentid != b->batCacheid)
438 18 : BBPrelease(oh->parentid);
439 18 : HEAPdecref(oh, false);
440 : }
441 174 : if (BATcount(b) == 0 &&
442 84 : ci->tpe == cand_dense && ci->ncand == ni->count) {
443 : /* just copy the heaps */
444 86 : MT_lock_set(&b->theaplock);
445 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
446 0 : MT_lock_unset(&b->theaplock);
447 0 : return GDK_FAIL;
448 : }
449 85 : memcpy(b->theap->base, ni->base, ni->hfree);
450 85 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
451 85 : b->theap->free = ni->hfree;
452 85 : b->theap->dirty = true;
453 85 : b->tvheap->free = ni->vhfree;
454 85 : b->tvheap->dirty = true;
455 85 : BATsetcount(b, ni->count);
456 86 : b->tnil = ni->nil;
457 86 : b->tnonil = ni->nonil;
458 86 : b->tsorted = ni->sorted;
459 86 : b->tnosorted = ni->nosorted;
460 86 : b->trevsorted = ni->revsorted;
461 86 : b->tnorevsorted = ni->norevsorted;
462 86 : b->tkey = ni->key;
463 86 : b->tnokey[0] = ni->nokey[0];
464 86 : b->tnokey[1] = ni->nokey[1];
465 86 : b->tminpos = ni->minpos;
466 86 : b->tmaxpos = ni->maxpos;
467 86 : b->tunique_est = ni->unique_est;
468 86 : MT_lock_unset(&b->theaplock);
469 86 : return GDK_SUCCEED;
470 : }
471 : /* copy data from n to b */
472 : r = BATcount(b);
473 380591 : for (BUN i = 0; i < cnt; i++) {
474 380502 : BUN p = canditer_next(ci) - hseq;
475 380501 : const void *t = BUNtvar(*ni, p);
476 380501 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
477 : return GDK_FAIL;
478 : }
479 380503 : r++;
480 : }
481 89 : MT_rwlock_wrlock(&b->thashlock);
482 89 : if (b->thash) {
483 0 : r -= cnt;
484 0 : BATiter bi = bat_iterator_nolock(b);
485 0 : for (BUN i = 0; i < cnt; i++) {
486 0 : const void *t = BUNtvar(bi, r);
487 0 : HASHappend_locked(b, r, t);
488 0 : r++;
489 : }
490 : }
491 89 : BUN nunique = b->thash ? b->thash->nunique : 0;
492 89 : MT_lock_set(&b->theaplock);
493 90 : BATsetcount(b, r);
494 90 : if (nunique != 0)
495 0 : b->tunique_est = (double) nunique;
496 90 : MT_lock_unset(&b->theaplock);
497 90 : MT_rwlock_wrunlock(&b->thashlock);
498 90 : return GDK_SUCCEED;
499 : }
500 :
501 : static gdk_return
502 138 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
503 : {
504 138 : if (ci->ncand == 0)
505 : return GDK_SUCCEED;
506 138 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
507 : return GDK_FAIL;
508 :
509 138 : MT_lock_set(&b->theaplock);
510 :
511 138 : uint32_t boff = b->batCount % 32;
512 138 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
513 138 : b->batCount += ci->ncand;
514 138 : if (ci->tpe == cand_dense) {
515 138 : const uint32_t *np;
516 138 : uint32_t noff, mask;
517 138 : BUN cnt;
518 138 : noff = (ci->seq - ni->b->hseqbase) % 32;
519 138 : cnt = ci->ncand;
520 138 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
521 138 : if (boff == noff) {
522 : /* words of b and n are aligned, so we don't
523 : * need to shift bits around */
524 55 : if (boff + cnt <= 32) {
525 : /* all new bits within one word */
526 49 : if (cnt == 32) {
527 0 : *bp = *np;
528 : } else {
529 49 : mask = ((1U << cnt) - 1) << boff;
530 49 : *bp &= ~mask;
531 49 : *bp |= *np & mask;
532 : }
533 : } else {
534 : /* multiple words of b are affected */
535 6 : if (boff != 0) {
536 : /* first fill up the rest of the first
537 : * word */
538 0 : mask = ~0U << boff;
539 0 : *bp &= ~mask;
540 0 : *bp++ |= *np++ & mask;
541 0 : cnt -= 32 - boff;
542 : }
543 6 : if (cnt >= 32) {
544 : /* copy an integral number of words fast */
545 6 : BUN nw = cnt / 32;
546 6 : memcpy(bp, np, nw*sizeof(int));
547 6 : bp += nw;
548 6 : np += nw;
549 6 : cnt %= 32;
550 : }
551 6 : if (cnt > 0) {
552 : /* do the left over bits */
553 6 : mask = (1U << cnt) - 1;
554 6 : *bp = *np & mask;
555 : }
556 : }
557 83 : } else if (boff > noff) {
558 83 : if (boff + cnt <= 32) {
559 : /* we only need to copy bits from a
560 : * single word of n to a single word
561 : * of b */
562 : /* boff > 0, so cnt < 32, hence the
563 : * shift is ok */
564 74 : mask = (1U << cnt) - 1;
565 74 : *bp &= ~(mask << boff);
566 74 : *bp |= (*np & (mask << noff)) << (boff - noff);
567 : } else {
568 : /* first fill the rest of the last partial
569 : * word of b, so that's 32-boff bits */
570 9 : mask = (1U << (32 - boff)) - 1;
571 9 : *bp &= ~(mask << boff);
572 9 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
573 9 : cnt -= 32 - boff;
574 :
575 : /* set boff and noff to the amount we need to
576 : * shift bits in consecutive words of n around
577 : * to fit into the next word of b; set mask to
578 : * the mask of the bottom bits of n that fit
579 : * in a word of b (and the complement are the
580 : * top bits that go to another word of b) */
581 9 : boff -= noff;
582 9 : noff = 32 - boff;
583 9 : mask = (1U << noff) - 1;
584 141 : while (cnt >= 32) {
585 132 : *bp = (*np++ & ~mask) >> noff;
586 132 : *bp++ |= (*np & mask) << boff;
587 132 : cnt -= 32;
588 : }
589 9 : if (cnt > noff) {
590 : /* the last bits come from two words
591 : * in n */
592 2 : *bp = (*np++ & ~mask) >> noff;
593 2 : cnt -= noff;
594 2 : mask = (1U << cnt) - 1;
595 2 : *bp++ |= (*np & mask) << boff;
596 7 : } else if (cnt > 0) {
597 : /* the last bits come from a single
598 : * word in n */
599 7 : mask = ((1U << cnt) - 1) << noff;
600 7 : *bp = (*np & mask) >> noff;
601 : }
602 : }
603 : } else {
604 : /* boff < noff */
605 0 : if (noff + cnt <= 32) {
606 : /* only need part of the first word of n */
607 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
608 0 : mask = (1U << cnt) - 1;
609 0 : *bp &= ~(mask << boff);
610 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
611 0 : } else if (boff + cnt <= 32) {
612 : /* only need to fill a single word of
613 : * b, but from two of n */
614 0 : if (cnt < 32)
615 0 : *bp &= ~(((1U << cnt) - 1) << boff);
616 : else
617 0 : *bp = 0;
618 0 : mask = ~((1U << noff) - 1);
619 0 : *bp |= (*np++ & mask) >> (noff - boff);
620 0 : cnt -= 32 - noff;
621 0 : mask = (1U << cnt) - 1;
622 0 : *bp |= (*np & mask) << (32 - noff);
623 : } else {
624 0 : if (boff > 0) {
625 : /* fill the rest of the first word of b */
626 0 : cnt -= 32 - boff;
627 0 : *bp &= (1U << boff) - 1;
628 0 : mask = ~((1U << noff) - 1);
629 0 : noff -= boff;
630 0 : boff = 32 - noff;
631 0 : *bp |= (*np++ & mask) >> noff;
632 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
633 : } else {
634 0 : boff = 32 - noff;
635 : }
636 0 : mask = (1U << noff) - 1;
637 0 : while (cnt >= 32) {
638 0 : *bp = (*np++ & ~mask) >> noff;
639 0 : *bp++ |= (*np & mask) << boff;
640 0 : cnt -= 32;
641 : }
642 0 : if (cnt > 0) {
643 0 : *bp = (*np++ & ~mask) >> noff;
644 0 : if (cnt > noff)
645 0 : *bp++ |= (*np & mask) << boff;
646 : }
647 : }
648 : }
649 : } else {
650 0 : oid o;
651 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
652 0 : do {
653 0 : for (uint32_t i = boff; i < 32; i++) {
654 0 : o = canditer_next(ci);
655 0 : if (is_oid_nil(o))
656 : break;
657 0 : o -= ni->b->hseqbase;
658 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
659 : }
660 0 : *bp++ = v;
661 0 : v = 0;
662 0 : boff = 0;
663 0 : } while (!is_oid_nil(o));
664 : }
665 138 : b->theap->dirty = true;
666 138 : b->theap->free = ((b->batCount + 31) / 32) * 4;
667 138 : MT_lock_unset(&b->theaplock);
668 138 : return GDK_SUCCEED;
669 : }
670 :
671 : /* Append the contents of BAT n (subject to the optional candidate
672 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
673 : * was passed in, and else the seqbase of n. */
674 : static gdk_return
675 2256707 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
676 : {
677 2256707 : struct canditer ci;
678 2256707 : BUN r;
679 2256707 : oid hseq = n->hseqbase;
680 2256707 : char buf[64];
681 2256707 : lng t0 = 0;
682 2256707 : const ValRecord *prop = NULL;
683 2256707 : ValRecord minprop, maxprop;
684 2256707 : const void *minbound = NULL, *maxbound = NULL;
685 2256707 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
686 2256707 : bool hlocked = false;
687 :
688 2256707 : if (b == NULL || n == NULL || BATcount(n) == 0) {
689 : return GDK_SUCCEED;
690 : }
691 1305166 : assert(b->theap->parentid == b->batCacheid);
692 :
693 1305166 : TRC_DEBUG_IF(ALGO) {
694 0 : t0 = GDKusec();
695 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
696 : }
697 :
698 1305166 : ALIGNapp(b, force, GDK_FAIL);
699 :
700 3667171 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
701 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
702 0 : return GDK_FAIL;
703 : }
704 :
705 1308113 : if (BATttype(b) != BATttype(n) &&
706 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
707 0 : TRC_DEBUG(CHECK, "Interpreting %s as %s.\n",
708 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
709 : }
710 :
711 1308113 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
712 :
713 1308084 : BATiter ni = bat_iterator(n);
714 :
715 1309138 : canditer_init(&ci, n, s);
716 1307184 : if (ci.ncand == 0) {
717 0 : goto doreturn;
718 : }
719 :
720 1307184 : if (BATcount(b) + ci.ncand > BUN_MAX) {
721 0 : bat_iterator_end(&ni);
722 0 : GDKerror("combined BATs too large\n");
723 0 : return GDK_FAIL;
724 : }
725 :
726 1307184 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
727 0 : bat_iterator_end(&ni);
728 0 : GDKerror("overflow of head value\n");
729 0 : return GDK_FAIL;
730 : }
731 :
732 1307184 : OIDXdestroy(b);
733 1309455 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
734 1309550 : RTREEdestroy(b);
735 :
736 1307596 : MT_lock_set(&b->theaplock);
737 1307888 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
738 1308064 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
739 48 : VALcopy(&minprop, prop) != NULL) {
740 48 : minbound = VALptr(&minprop);
741 48 : if (ci.ncand == BATcount(n) &&
742 62 : ni.minpos != BUN_NONE &&
743 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
744 0 : assert(0);
745 : GDKerror("value out of bounds\n");
746 : MT_lock_unset(&b->theaplock);
747 : goto bailout;
748 : }
749 : }
750 1307785 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
751 40 : VALcopy(&maxprop, prop) != NULL) {
752 40 : maxbound = VALptr(&maxprop);
753 40 : if (ci.ncand == BATcount(n) &&
754 52 : ni.maxpos != BUN_NONE &&
755 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
756 0 : assert(0);
757 : GDKerror("value out of bounds\n");
758 : MT_lock_unset(&b->theaplock);
759 : goto bailout;
760 : }
761 : }
762 :
763 1306529 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
764 399645 : if (ni.maxpos != BUN_NONE) {
765 154118 : BATiter bi = bat_iterator_nolock(b);
766 154118 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
767 148102 : if (s == NULL) {
768 147960 : b->tmaxpos = BATcount(b) + ni.maxpos;
769 : } else {
770 142 : b->tmaxpos = BUN_NONE;
771 : }
772 : }
773 : } else {
774 245527 : b->tmaxpos = BUN_NONE;
775 : }
776 : }
777 1306535 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
778 398911 : if (ni.minpos != BUN_NONE) {
779 153556 : BATiter bi = bat_iterator_nolock(b);
780 153556 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
781 146358 : if (s == NULL) {
782 146219 : b->tminpos = BATcount(b) + ni.minpos;
783 : } else {
784 139 : b->tminpos = BUN_NONE;
785 : }
786 : }
787 : } else {
788 245355 : b->tminpos = BUN_NONE;
789 : }
790 : }
791 1306529 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
792 1304778 : b->tunique_est = 0;
793 : }
794 1306529 : MT_lock_unset(&b->theaplock);
795 : /* load hash so that we can maintain it */
796 1309570 : (void) BATcheckhash(b);
797 :
798 1307281 : if (b->ttype == TYPE_void) {
799 : /* b does not have storage, keep it that way if we can */
800 128726 : HASHdestroy(b); /* we're not maintaining the hash here */
801 128713 : MT_lock_set(&b->theaplock);
802 128717 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
803 126245 : (BATcount(b) == 0 ||
804 109575 : (BATtdense(b) &&
805 109575 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
806 : /* n is also dense and consecutive with b */
807 126185 : if (BATcount(b) == 0) {
808 16669 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
809 0 : assert(0);
810 : GDKerror("value not within bounds\n");
811 : MT_lock_unset(&b->theaplock);
812 : goto bailout;
813 : }
814 16669 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
815 : }
816 126172 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
817 0 : assert(0);
818 : GDKerror("value not within bounds\n");
819 : MT_lock_unset(&b->theaplock);
820 : goto bailout;
821 : }
822 126172 : BATsetcount(b, BATcount(b) + ci.ncand);
823 126144 : MT_lock_unset(&b->theaplock);
824 126182 : goto doreturn;
825 : }
826 2532 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
827 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
828 : /* both b and n are void/nil */
829 0 : if (notnull) {
830 0 : assert(0);
831 : GDKerror("NULL value not within bounds\n");
832 : MT_lock_unset(&b->theaplock);
833 : goto bailout;
834 : }
835 0 : BATtseqbase(b, oid_nil);
836 0 : BATsetcount(b, BATcount(b) + ci.ncand);
837 0 : MT_lock_unset(&b->theaplock);
838 0 : goto doreturn;
839 : }
840 : /* we need to materialize b; allocate enough capacity */
841 2532 : MT_lock_unset(&b->theaplock);
842 2533 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
843 0 : goto bailout;
844 : }
845 : }
846 :
847 : /* property setting */
848 1181088 : MT_lock_set(&b->theaplock);
849 1181828 : r = BATcount(b);
850 :
851 1181828 : if (BATcount(b) == 0) {
852 373680 : b->tsorted = ni.sorted;
853 373680 : b->trevsorted = ni.revsorted;
854 373680 : b->tseqbase = oid_nil;
855 373680 : b->tnonil = ni.nonil;
856 373680 : b->tnil = ni.nil && ci.ncand == BATcount(n);
857 373680 : if (ci.tpe == cand_dense) {
858 373514 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
859 373514 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
860 373514 : if (BATtdensebi(&ni)) {
861 2136 : b->tseqbase = n->tseqbase + ci.seq - hseq;
862 : }
863 : } else {
864 166 : b->tnosorted = 0;
865 166 : b->tnorevsorted = 0;
866 : }
867 373680 : b->tkey = ni.key;
868 373680 : if (ci.ncand == BATcount(n)) {
869 372050 : b->tnokey[0] = ni.nokey[0];
870 372050 : b->tnokey[1] = ni.nokey[1];
871 : } else {
872 1630 : b->tnokey[0] = b->tnokey[1] = 0;
873 : }
874 : } else {
875 808148 : BUN last = r - 1;
876 808148 : BATiter bi = bat_iterator_nolock(b);
877 808148 : int xx = ATOMcmp(b->ttype,
878 : BUNtail(ni, ci.seq - hseq),
879 : BUNtail(bi, last));
880 807413 : if (b->tsorted && (!ni.sorted || xx < 0)) {
881 22032 : b->tsorted = false;
882 22032 : b->tnosorted = 0;
883 22032 : b->tseqbase = oid_nil;
884 : }
885 807413 : if (b->trevsorted &&
886 69148 : (!ni.revsorted || xx > 0)) {
887 18648 : b->trevsorted = false;
888 18648 : b->tnorevsorted = 0;
889 : }
890 807413 : if (b->tkey &&
891 75208 : (!(b->tsorted || b->trevsorted) ||
892 60352 : !ni.key || xx == 0)) {
893 20886 : BATkey(b, false);
894 : }
895 807363 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
896 2704 : (!BATtdensebi(&ni) ||
897 552 : ci.tpe != cand_dense ||
898 552 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
899 2172 : b->tseqbase = oid_nil;
900 : }
901 807363 : b->tnonil &= ni.nonil;
902 1602405 : b->tnil |= ni.nil && ci.ncand == ni.count;
903 : }
904 1181043 : MT_lock_unset(&b->theaplock);
905 1182828 : if (b->ttype == TYPE_str) {
906 86259 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
907 0 : goto bailout;
908 : }
909 1096569 : } else if (ATOMvarsized(b->ttype)) {
910 518 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
911 0 : goto bailout;
912 : }
913 1096051 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
914 : /* no bounds and NOT_NULL property on MSK bats */
915 138 : assert(minbound == NULL && maxbound == NULL && !notnull);
916 138 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
917 0 : goto bailout;
918 : }
919 : } else {
920 1095913 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
921 : /* if needed space exceeds a normal growth
922 : * extend just with what's needed */
923 10691 : BUN ncap = BATcount(b) + ci.ncand;
924 10691 : BUN grows = BATgrows(b);
925 :
926 10688 : if (ncap > grows)
927 : grows = ncap;
928 10688 : if (BATextend(b, grows) != GDK_SUCCEED) {
929 0 : goto bailout;
930 : }
931 : }
932 1096030 : MT_rwlock_wrlock(&b->thashlock);
933 1096673 : hlocked = true;
934 1096673 : if (b->ttype != TYPE_void &&
935 1096390 : ni.type != TYPE_void &&
936 1092487 : ci.tpe == cand_dense) {
937 : /* use fast memcpy if we can */
938 1092390 : memcpy(Tloc(b, BATcount(b)),
939 1092390 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
940 1092390 : ci.ncand << ni.shift);
941 1092403 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
942 3917 : HASHappend_locked(b, r, Tloc(b, r));
943 13 : r++;
944 : }
945 : } else {
946 4283 : const void *atomnil = ATOMnilptr(b->ttype);
947 2607870 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
948 2599282 : BUN p = canditer_next(&ci) - hseq;
949 2595836 : const void *t = BUNtail(ni, p);
950 2599393 : bool isnil = atomcmp(t, atomnil) == 0;
951 2596501 : if (notnull && isnil) {
952 0 : assert(0);
953 : GDKerror("NULL value not within bounds\n");
954 : goto bailout;
955 2596501 : } else if (minbound &&
956 2596501 : !isnil &&
957 0 : atomcmp(t, minbound) < 0) {
958 0 : assert(0);
959 : GDKerror("value not within bounds\n");
960 : goto bailout;
961 2596501 : } else if (maxbound &&
962 0 : !isnil &&
963 0 : atomcmp(t, maxbound) >= 0) {
964 0 : assert(0);
965 : GDKerror("value not within bounds\n");
966 : goto bailout;
967 2596501 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
968 0 : goto bailout;
969 : }
970 2599247 : if (b->thash)
971 0 : HASHappend_locked(b, r, t);
972 2599287 : r++;
973 : }
974 4283 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
975 : }
976 1092768 : BUN nunique;
977 1092768 : nunique = b->thash ? b->thash->nunique : 0;
978 1092768 : MT_lock_set(&b->theaplock);
979 1095234 : BATsetcount(b, b->batCount + ci.ncand);
980 1095223 : if (nunique != 0)
981 5 : b->tunique_est = (double) nunique;
982 1095223 : MT_lock_unset(&b->theaplock);
983 1096626 : assert(hlocked);
984 1096626 : MT_rwlock_wrunlock(&b->thashlock);
985 1096626 : hlocked = false;
986 : }
987 :
988 1309499 : doreturn:
989 1309499 : bat_iterator_end(&ni);
990 1307174 : if (minbound)
991 48 : VALclear(&minprop);
992 1307524 : if (maxbound)
993 40 : VALclear(&maxprop);
994 1307524 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
995 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
996 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
997 : GDKusec() - t0);
998 :
999 : return GDK_SUCCEED;
1000 : bailout:
1001 0 : if (hlocked)
1002 0 : MT_rwlock_wrunlock(&b->thashlock);
1003 0 : if (minbound)
1004 0 : VALclear(&minprop);
1005 0 : if (maxbound)
1006 0 : VALclear(&maxprop);
1007 0 : bat_iterator_end(&ni);
1008 0 : return GDK_FAIL;
1009 : }
1010 :
1011 : gdk_return
1012 2264183 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1013 : {
1014 2264183 : return BATappend2(b, n, s, force, true);
1015 : }
1016 :
1017 : gdk_return
1018 4 : BATdel(BAT *b, BAT *d)
1019 : {
1020 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1021 4 : MT_lock_set(&b->theaplock);
1022 4 : BATiter bi = bat_iterator_nolock(b);
1023 4 : MT_lock_unset(&b->theaplock);
1024 :
1025 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1026 4 : assert(d->tsorted);
1027 4 : assert(d->tkey);
1028 4 : if (BATcount(d) == 0)
1029 : return GDK_SUCCEED;
1030 4 : OIDXdestroy(b);
1031 4 : HASHdestroy(b);
1032 4 : PROPdestroy(b);
1033 4 : STRMPdestroy(b);
1034 4 : RTREEdestroy(b);
1035 4 : if (BATtdense(d)) {
1036 2 : oid o = d->tseqbase;
1037 2 : BUN c = BATcount(d);
1038 :
1039 2 : if (o + c <= b->hseqbase)
1040 : return GDK_SUCCEED;
1041 2 : if (o < b->hseqbase) {
1042 0 : c -= b->hseqbase - o;
1043 0 : o = b->hseqbase;
1044 : }
1045 2 : if (o - b->hseqbase < b->batInserted) {
1046 0 : GDKerror("cannot delete committed values\n");
1047 0 : return GDK_FAIL;
1048 : }
1049 2 : if (o + c > b->hseqbase + BATcount(b))
1050 0 : c = b->hseqbase + BATcount(b) - o;
1051 2 : if (c == 0)
1052 : return GDK_SUCCEED;
1053 2 : if (atmdel) {
1054 0 : BUN p = o - b->hseqbase;
1055 0 : BUN q = p + c;
1056 0 : while (p < q) {
1057 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1058 0 : p++;
1059 : }
1060 : }
1061 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1062 : return GDK_FAIL;
1063 2 : MT_lock_set(&b->theaplock);
1064 2 : if (o + c < b->hseqbase + BATcount(b)) {
1065 0 : o -= b->hseqbase;
1066 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1067 0 : BUN n = BATcount(b) - (o + c);
1068 : /* not very efficient, but first see
1069 : * how much this is used */
1070 0 : for (BUN i = 0; i < n; i++)
1071 0 : mskSetVal(b, o + i,
1072 0 : mskGetVal(b, o + c + i));
1073 : } else {
1074 0 : memmove(Tloc(b, o),
1075 0 : Tloc(b, o + c),
1076 0 : b->twidth * (BATcount(b) - (o + c)));
1077 : }
1078 0 : b->theap->dirty = true;
1079 : // o += b->hseqbase; // if this were to be used again
1080 : }
1081 2 : b->batCount -= c;
1082 : } else {
1083 2 : BATiter di = bat_iterator(d);
1084 2 : const oid *o = (const oid *) di.base;
1085 2 : const oid *s;
1086 2 : BUN c = di.count;
1087 2 : BUN nd = 0;
1088 2 : BUN pos;
1089 2 : char *p = NULL;
1090 :
1091 2 : if (o[c - 1] <= b->hseqbase) {
1092 0 : bat_iterator_end(&di);
1093 0 : return GDK_SUCCEED;
1094 : }
1095 2 : while (*o < b->hseqbase) {
1096 0 : o++;
1097 0 : c--;
1098 : }
1099 2 : if (*o - b->hseqbase < b->batInserted) {
1100 0 : bat_iterator_end(&di);
1101 0 : GDKerror("cannot delete committed values\n");
1102 0 : return GDK_FAIL;
1103 : }
1104 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1105 0 : bat_iterator_end(&di);
1106 0 : return GDK_FAIL;
1107 : }
1108 2 : s = o;
1109 2 : pos = *o - b->hseqbase;
1110 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1111 2 : p = Tloc(b, pos);
1112 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1113 4 : size_t n;
1114 4 : if (atmdel)
1115 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1116 4 : o++;
1117 4 : c--;
1118 4 : nd++;
1119 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1120 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1121 2 : else if ((oid) (o - s) < *o - *s)
1122 2 : n = o[0] - o[-1] - 1;
1123 : else
1124 : n = 0;
1125 4 : if (n > 0) {
1126 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1127 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1128 : /* not very efficient, but
1129 : * first see how much this is
1130 : * used */
1131 0 : for (BUN i = 0; i < n; i++) {
1132 0 : mskSetVal(b, pos + i,
1133 0 : mskGetVal(b, opos + i));
1134 : }
1135 0 : pos += n;
1136 : } else {
1137 2 : n *= b->twidth;
1138 2 : memmove(p,
1139 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1140 : n);
1141 2 : p += n;
1142 : }
1143 : s = o;
1144 : }
1145 : }
1146 2 : bat_iterator_end(&di);
1147 2 : MT_lock_set(&b->theaplock);
1148 2 : b->theap->dirty = true;
1149 2 : b->batCount -= nd;
1150 : }
1151 4 : if (b->batCount <= 1) {
1152 : /* some trivial properties */
1153 2 : b->tkey = true;
1154 2 : b->tsorted = b->trevsorted = true;
1155 2 : if (b->batCount == 0) {
1156 2 : b->tnil = false;
1157 2 : b->tnonil = true;
1158 : }
1159 : }
1160 : /* not sure about these anymore */
1161 4 : b->tnosorted = b->tnorevsorted = 0;
1162 4 : b->tnokey[0] = b->tnokey[1] = 0;
1163 4 : b->tminpos = BUN_NONE;
1164 4 : b->tmaxpos = BUN_NONE;
1165 4 : b->tunique_est = 0.0;
1166 4 : MT_lock_unset(&b->theaplock);
1167 :
1168 4 : return GDK_SUCCEED;
1169 : }
1170 :
1171 : /*
1172 : * Replace all values in b with values from n whose location is given by
1173 : * the oid in either p or positions.
1174 : * If positions is used, autoincr specifies whether it is the first of a
1175 : * dense range of positions or whether it is a full-blown array of
1176 : * position.
1177 : * If mayappend is set, the position in p/positions may refer to
1178 : * locations beyond the end of b.
1179 : */
1180 : static gdk_return
1181 217202 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1182 : bool mayappend, bool autoincr, bool force)
1183 : {
1184 217202 : lng t0 = GDKusec();
1185 217282 : oid pos = oid_nil;
1186 217282 : BUN nunique = 0;
1187 :
1188 217282 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1189 : return GDK_SUCCEED;
1190 : }
1191 : /* either p or positions */
1192 217282 : assert((p == NULL) != (positions == NULL));
1193 217282 : if (p != NULL) {
1194 217107 : if (BATcount(p) != BATcount(n)) {
1195 0 : GDKerror("update BATs not the same size\n");
1196 0 : return GDK_FAIL;
1197 : }
1198 217107 : if (ATOMtype(p->ttype) != TYPE_oid) {
1199 0 : GDKerror("positions BAT not type OID\n");
1200 0 : return GDK_FAIL;
1201 : }
1202 217107 : if (BATtdense(p)) {
1203 208152 : pos = p->tseqbase;
1204 208152 : positions = &pos;
1205 208152 : autoincr = true;
1206 208152 : p = NULL;
1207 8955 : } else if (p->ttype != TYPE_void) {
1208 8953 : positions = (const oid *) Tloc(p, 0);
1209 8953 : autoincr = false;
1210 : } else {
1211 : autoincr = false;
1212 : }
1213 175 : } else if (autoincr) {
1214 175 : pos = *positions;
1215 : }
1216 217282 : if (BATcount(n) == 0) {
1217 : return GDK_SUCCEED;
1218 : }
1219 :
1220 29419 : BATiter ni = bat_iterator(n);
1221 :
1222 29430 : OIDXdestroy(b);
1223 29433 : STRMPdestroy(b);
1224 29434 : RTREEdestroy(b);
1225 : /* load hash so that we can maintain it */
1226 29425 : (void) BATcheckhash(b);
1227 :
1228 29419 : MT_lock_set(&b->theaplock);
1229 29407 : if (!force && (b->batRestricted != BAT_WRITE ||
1230 49 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1231 0 : MT_lock_unset(&b->theaplock);
1232 0 : bat_iterator_end(&ni);
1233 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1234 0 : return GDK_FAIL;
1235 : }
1236 29407 : BATiter bi = bat_iterator_nolock(b);
1237 29407 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1238 28718 : b->tunique_est = 0;
1239 : }
1240 :
1241 29407 : b->tsorted = b->trevsorted = false;
1242 29407 : b->tnosorted = b->tnorevsorted = 0;
1243 29407 : b->tseqbase = oid_nil;
1244 29407 : b->tkey = false;
1245 29407 : b->tnokey[0] = b->tnokey[1] = 0;
1246 :
1247 29407 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1248 29407 : const void *nil = ATOMnilptr(b->ttype);
1249 29407 : oid hseqend = b->hseqbase + BATcount(b);
1250 :
1251 29407 : MT_lock_unset(&b->theaplock);
1252 :
1253 30198 : bool anynil = false;
1254 30198 : bool locked = false;
1255 :
1256 30198 : if (b->tvheap) {
1257 1199073 : for (BUN i = 0; i < ni.count; i++) {
1258 1196180 : oid updid;
1259 1196180 : if (positions) {
1260 1195167 : updid = autoincr ? pos++ : *positions++;
1261 : } else {
1262 1013 : updid = BUNtoid(p, i);
1263 : }
1264 :
1265 1196180 : if (updid < b->hseqbase ||
1266 1196180 : (!mayappend && updid >= hseqend)) {
1267 0 : GDKerror("id out of range\n");
1268 0 : goto bailout;
1269 : }
1270 1196180 : updid -= b->hseqbase;
1271 1196180 : if (!force && updid < b->batInserted) {
1272 0 : GDKerror("updating committed value\n");
1273 0 : goto bailout;
1274 : }
1275 :
1276 1196180 : const void *new = BUNtvar(ni, i);
1277 :
1278 1196180 : if (updid >= BATcount(b)) {
1279 23585 : assert(mayappend);
1280 23585 : if (locked) {
1281 4 : MT_rwlock_wrunlock(&b->thashlock);
1282 4 : locked = false;
1283 : }
1284 23585 : if (b->tminpos != bi.minpos ||
1285 23584 : b->tmaxpos != bi.maxpos) {
1286 1 : MT_lock_set(&b->theaplock);
1287 1 : b->tminpos = bi.minpos;
1288 1 : b->tmaxpos = bi.maxpos;
1289 1 : MT_lock_unset(&b->theaplock);
1290 : }
1291 23585 : if (BATcount(b) < updid &&
1292 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1293 0 : bat_iterator_end(&ni);
1294 0 : return GDK_FAIL;
1295 : }
1296 23585 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1297 0 : bat_iterator_end(&ni);
1298 0 : return GDK_FAIL;
1299 : }
1300 23585 : bi = bat_iterator_nolock(b);
1301 128306 : continue;
1302 : }
1303 :
1304 : /* it is possible that a previous run was killed
1305 : * after an update (with a mmapped tail file)
1306 : * but before that was committed, then the
1307 : * offset may point outside of the vheap */
1308 1172595 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1309 :
1310 1166553 : if (old && atomcmp(old, new) == 0) {
1311 : /* replacing with the same value:
1312 : * nothing to do */
1313 104721 : continue;
1314 : }
1315 :
1316 1067623 : bool isnil = atomcmp(new, nil) == 0;
1317 1066677 : anynil |= isnil;
1318 1066677 : MT_lock_set(&b->theaplock);
1319 1066958 : if (old == NULL ||
1320 1066958 : (b->tnil &&
1321 789 : !anynil &&
1322 790 : atomcmp(old, nil) == 0)) {
1323 : /* if old value is nil and no new
1324 : * value is, we're not sure anymore
1325 : * about the nil property, so we must
1326 : * clear it */
1327 783 : b->tnil = false;
1328 : }
1329 1066957 : b->tnonil &= !isnil;
1330 1066957 : b->tnil |= isnil;
1331 1066957 : MT_lock_unset(&b->theaplock);
1332 1067253 : if (bi.maxpos != BUN_NONE) {
1333 4234 : if (!isnil &&
1334 2117 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1335 : /* new value is larger than
1336 : * previous largest */
1337 23 : bi.maxpos = updid;
1338 4188 : } else if (old == NULL ||
1339 2107 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1340 13 : atomcmp(new, old) != 0)) {
1341 : /* old value is equal to
1342 : * largest and new value is
1343 : * smaller, so we don't know
1344 : * anymore which is the
1345 : * largest */
1346 13 : bi.maxpos = BUN_NONE;
1347 : }
1348 : }
1349 1067253 : if (bi.minpos != BUN_NONE) {
1350 4230 : if (!isnil &&
1351 2115 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1352 : /* new value is smaller than
1353 : * previous smallest */
1354 17 : bi.minpos = updid;
1355 4196 : } else if (old == NULL ||
1356 2120 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1357 22 : atomcmp(new, old) != 0)) {
1358 : /* old value is equal to
1359 : * smallest and new value is
1360 : * larger, so we don't know
1361 : * anymore which is the
1362 : * smallest */
1363 22 : bi.minpos = BUN_NONE;
1364 : }
1365 : }
1366 1067253 : if (!locked) {
1367 2393 : MT_rwlock_wrlock(&b->thashlock);
1368 2393 : locked = true;
1369 : }
1370 1067253 : if (old)
1371 1067253 : HASHdelete_locked(&bi, updid, old);
1372 0 : else if (b->thash) {
1373 0 : doHASHdestroy(b, b->thash);
1374 0 : b->thash = NULL;
1375 : }
1376 :
1377 1067219 : var_t d;
1378 1067219 : switch (b->twidth) {
1379 1058684 : case 1:
1380 1058684 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1381 1058684 : break;
1382 6503 : case 2:
1383 6503 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1384 6503 : break;
1385 1997 : case 4:
1386 1997 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1387 1997 : break;
1388 : #if SIZEOF_VAR_T == 8
1389 35 : case 8:
1390 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1391 35 : break;
1392 : #endif
1393 : default:
1394 0 : MT_UNREACHABLE();
1395 : }
1396 1067219 : MT_lock_set(&b->theaplock);
1397 1067197 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1398 1067162 : MT_lock_unset(&b->theaplock);
1399 1067126 : if (rc != GDK_SUCCEED) {
1400 0 : goto bailout;
1401 : }
1402 1067126 : if (b->twidth < SIZEOF_VAR_T &&
1403 1067170 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1404 : /* doesn't fit in current heap, upgrade it */
1405 16 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1406 0 : goto bailout;
1407 : }
1408 : }
1409 : /* in case ATOMreplaceVAR and/or
1410 : * GDKupgradevarheap replaces a heap, we need to
1411 : * reinitialize the iterator */
1412 : {
1413 : /* save and restore minpos/maxpos */
1414 1067126 : BUN minpos = bi.minpos;
1415 1067126 : BUN maxpos = bi.maxpos;
1416 1067126 : bi = bat_iterator_nolock(b);
1417 1067126 : bi.minpos = minpos;
1418 1067126 : bi.maxpos = maxpos;
1419 : }
1420 1067126 : switch (b->twidth) {
1421 1058575 : case 1:
1422 1058575 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1423 1058575 : break;
1424 6519 : case 2:
1425 6519 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1426 6519 : break;
1427 1997 : case 4:
1428 1997 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1429 1997 : break;
1430 : #if SIZEOF_VAR_T == 8
1431 35 : case 8:
1432 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1433 35 : break;
1434 : #endif
1435 : default:
1436 0 : MT_UNREACHABLE();
1437 : }
1438 1067126 : HASHinsert_locked(&bi, updid, new);
1439 :
1440 : }
1441 2893 : if (locked) {
1442 2388 : if (b->thash)
1443 2 : nunique = b->thash->nunique;
1444 2388 : MT_rwlock_wrunlock(&b->thashlock);
1445 2388 : locked = false;
1446 : }
1447 2894 : MT_lock_set(&b->theaplock);
1448 2893 : b->tvheap->dirty = true;
1449 2893 : MT_lock_unset(&b->theaplock);
1450 26531 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1451 0 : assert(b->thash == NULL);
1452 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1453 0 : for (BUN i = 0; i < ni.count; i++) {
1454 0 : oid updid;
1455 0 : if (positions) {
1456 0 : updid = autoincr ? pos++ : *positions++;
1457 : } else {
1458 0 : updid = BUNtoid(p, i);
1459 : }
1460 :
1461 0 : if (updid < b->hseqbase ||
1462 0 : (!mayappend && updid >= hseqend)) {
1463 0 : GDKerror("id out of range\n");
1464 0 : bat_iterator_end(&ni);
1465 0 : return GDK_FAIL;
1466 : }
1467 0 : updid -= b->hseqbase;
1468 0 : if (!force && updid < b->batInserted) {
1469 0 : GDKerror("updating committed value\n");
1470 0 : bat_iterator_end(&ni);
1471 0 : return GDK_FAIL;
1472 : }
1473 0 : if (updid >= BATcount(b)) {
1474 0 : assert(mayappend);
1475 0 : if (BATcount(b) < updid &&
1476 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1477 0 : bat_iterator_end(&ni);
1478 0 : return GDK_FAIL;
1479 : }
1480 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1481 0 : bat_iterator_end(&ni);
1482 0 : return GDK_FAIL;
1483 : }
1484 0 : continue;
1485 : }
1486 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1487 : }
1488 0 : bi = bat_iterator_nolock(b);
1489 26531 : } else if (autoincr) {
1490 18077 : if (pos < b->hseqbase ||
1491 17112 : (!mayappend && pos + ni.count > hseqend)) {
1492 0 : GDKerror("id out of range\n");
1493 0 : bat_iterator_end(&ni);
1494 0 : return GDK_FAIL;
1495 : }
1496 18077 : pos -= b->hseqbase;
1497 18077 : if (!force && pos < b->batInserted) {
1498 0 : GDKerror("updating committed value\n");
1499 0 : bat_iterator_end(&ni);
1500 0 : return GDK_FAIL;
1501 : }
1502 :
1503 18077 : if (pos >= BATcount(b)) {
1504 513 : assert(mayappend);
1505 513 : bat_iterator_end(&ni);
1506 513 : if (BATcount(b) < pos &&
1507 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1508 : return GDK_FAIL;
1509 : }
1510 513 : return BATappend(b, n, NULL, force);
1511 : }
1512 17572 : if (pos + ni.count > BATcount(b) &&
1513 8 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1514 0 : bat_iterator_end(&ni);
1515 0 : return GDK_FAIL;
1516 : }
1517 17564 : bi = bat_iterator_nolock(b);
1518 :
1519 : /* we copy all of n, so if there are nils in n we get
1520 : * nils in b (and else we don't know) */
1521 17564 : b->tnil = ni.nil;
1522 : /* we may not copy over all of b, so we only know that
1523 : * there are no nils in b afterward if there weren't
1524 : * any in either b or n to begin with */
1525 17564 : b->tnonil &= ni.nonil;
1526 : /* if there is no hash, we don't start the loop, if
1527 : * there is only a persisted hash, it will get destroyed
1528 : * in the first iteration, after which there is no hash
1529 : * and the loop ends */
1530 17564 : MT_rwlock_wrlock(&b->thashlock);
1531 17574 : locked = true;
1532 17574 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1533 27 : HASHdelete_locked(&bi, i, Tloc(b, i));
1534 17547 : if (ni.type == TYPE_void) {
1535 0 : assert(b->ttype == TYPE_oid);
1536 0 : oid *o = Tloc(b, pos);
1537 0 : if (is_oid_nil(ni.tseq)) {
1538 : /* we may or may not overwrite the old
1539 : * min/max values */
1540 0 : bi.minpos = BUN_NONE;
1541 0 : bi.maxpos = BUN_NONE;
1542 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1543 0 : o[i] = oid_nil;
1544 0 : b->tnil = true;
1545 : } else {
1546 0 : oid v = ni.tseq;
1547 : /* we know min/max of n, so we know
1548 : * the new min/max of b if those of n
1549 : * are smaller/larger than the old */
1550 0 : if (bi.minpos != BUN_NONE) {
1551 0 : if (v <= BUNtoid(b, bi.minpos))
1552 0 : bi.minpos = pos;
1553 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1554 0 : bi.minpos = BUN_NONE;
1555 : }
1556 0 : if (complex_cand(n)) {
1557 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1558 0 : o[i] = *(oid *)Tpos(&ni, i);
1559 : /* last value */
1560 0 : v = o[ni.count - 1];
1561 : } else {
1562 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1563 0 : o[i] = v++;
1564 : /* last value added (not one beyond) */
1565 0 : v--;
1566 : }
1567 0 : if (bi.maxpos != BUN_NONE) {
1568 0 : if (v >= BUNtoid(b, bi.maxpos))
1569 0 : bi.maxpos = pos + ni.count - 1;
1570 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1571 0 : bi.maxpos = BUN_NONE;
1572 : }
1573 : }
1574 : } else {
1575 : /* if the extremes of n are at least as
1576 : * extreme as those of b, we can replace b's
1577 : * min/max, else we don't know what b's new
1578 : * min/max are*/
1579 17875 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1580 328 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1581 213 : bi.minpos = pos + ni.minpos;
1582 : } else {
1583 17334 : bi.minpos = BUN_NONE;
1584 : }
1585 17899 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1586 352 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1587 266 : bi.maxpos = pos + ni.maxpos;
1588 : } else {
1589 17281 : bi.maxpos = BUN_NONE;
1590 : }
1591 17547 : memcpy(Tloc(b, pos), ni.base,
1592 17547 : ni.count << b->tshift);
1593 : }
1594 : /* either we have a hash that was updated above, or we
1595 : * have no hash; we cannot have the case where there is
1596 : * only a persisted (unloaded) hash since it would have
1597 : * been destroyed above */
1598 17547 : if (b->thash != NULL) {
1599 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1600 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1601 0 : if (b->thash)
1602 0 : nunique = b->thash->nunique;
1603 : }
1604 17559 : MT_rwlock_wrunlock(&b->thashlock);
1605 17572 : locked = false;
1606 17572 : if (ni.count == BATcount(b)) {
1607 : /* if we replaced all values of b by values
1608 : * from n, we can also copy the min/max
1609 : * properties */
1610 8836 : bi.minpos = ni.minpos;
1611 8836 : bi.maxpos = ni.maxpos;
1612 8836 : if (BATtdensebi(&ni)) {
1613 : /* replaced all of b with a dense sequence */
1614 47 : MT_lock_set(&b->theaplock);
1615 47 : BATtseqbase(b, ni.tseq);
1616 47 : MT_lock_unset(&b->theaplock);
1617 : }
1618 : }
1619 : } else {
1620 169652709 : for (BUN i = 0; i < ni.count; i++) {
1621 169644260 : oid updid;
1622 169644260 : if (positions) {
1623 : /* assert(!autoincr) */
1624 169644260 : updid = *positions++;
1625 : } else {
1626 0 : updid = BUNtoid(p, i);
1627 : }
1628 :
1629 169644260 : if (updid < b->hseqbase ||
1630 169644260 : (!mayappend && updid >= hseqend)) {
1631 0 : GDKerror("id out of range\n");
1632 0 : goto bailout;
1633 : }
1634 169644260 : updid -= b->hseqbase;
1635 169644260 : if (!force && updid < b->batInserted) {
1636 0 : GDKerror("updating committed value\n");
1637 0 : goto bailout;
1638 : }
1639 :
1640 169644260 : const void *new = BUNtloc(ni, i);
1641 :
1642 169644260 : if (updid >= BATcount(b)) {
1643 16059 : assert(mayappend);
1644 16059 : if (locked) {
1645 10 : MT_rwlock_wrunlock(&b->thashlock);
1646 10 : locked = false;
1647 : }
1648 16059 : if (b->tminpos != bi.minpos ||
1649 16057 : b->tmaxpos != bi.maxpos) {
1650 3 : MT_lock_set(&b->theaplock);
1651 3 : b->tminpos = bi.minpos;
1652 3 : b->tmaxpos = bi.maxpos;
1653 3 : MT_lock_unset(&b->theaplock);
1654 : }
1655 16059 : if (BATcount(b) < updid &&
1656 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1657 0 : goto bailout;
1658 : }
1659 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1660 0 : bat_iterator_end(&ni);
1661 0 : return GDK_FAIL;
1662 : }
1663 16059 : bi = bat_iterator_nolock(b);
1664 16059 : continue;
1665 : }
1666 :
1667 169628201 : const void *old = BUNtloc(bi, updid);
1668 169628201 : bool isnil = atomcmp(new, nil) == 0;
1669 166101895 : anynil |= isnil;
1670 166101895 : if (b->tnil &&
1671 1908 : !anynil &&
1672 1908 : atomcmp(old, nil) == 0) {
1673 : /* if old value is nil and no new
1674 : * value is, we're not sure anymore
1675 : * about the nil property, so we must
1676 : * clear it */
1677 1904 : b->tnil = false;
1678 : }
1679 166101895 : b->tnonil &= !isnil;
1680 166101895 : b->tnil |= isnil;
1681 166101895 : if (bi.maxpos != BUN_NONE) {
1682 10672 : if (!isnil &&
1683 5334 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1684 : /* new value is larger than
1685 : * previous largest */
1686 137 : bi.maxpos = updid;
1687 5208 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1688 7 : atomcmp(new, old) != 0) {
1689 : /* old value is equal to
1690 : * largest and new value is
1691 : * smaller, so we don't know
1692 : * anymore which is the
1693 : * largest */
1694 7 : bi.maxpos = BUN_NONE;
1695 : }
1696 : }
1697 166101895 : if (bi.minpos != BUN_NONE) {
1698 10672 : if (!isnil &&
1699 5334 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1700 : /* new value is smaller than
1701 : * previous smallest */
1702 6 : bi.minpos = updid;
1703 5338 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1704 6 : atomcmp(new, old) != 0) {
1705 : /* old value is equal to
1706 : * smallest and new value is
1707 : * larger, so we don't know
1708 : * anymore which is the
1709 : * smallest */
1710 6 : bi.minpos = BUN_NONE;
1711 : }
1712 : }
1713 :
1714 166101895 : if (!locked) {
1715 8444 : MT_rwlock_wrlock(&b->thashlock);
1716 8444 : locked = true;
1717 : }
1718 166101900 : HASHdelete_locked(&bi, updid, old);
1719 166013055 : switch (b->twidth) {
1720 27331109 : case 1:
1721 27331109 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1722 27331109 : break;
1723 525054 : case 2:
1724 525054 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1725 525054 : break;
1726 34355532 : case 4:
1727 34355532 : ((int *) b->theap->base)[updid] = * (int *) new;
1728 34355532 : break;
1729 98412584 : case 8:
1730 98412584 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1731 98412584 : break;
1732 5388776 : case 16:
1733 : #ifdef HAVE_HGE
1734 5388776 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1735 : #else
1736 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1737 : #endif
1738 5388776 : break;
1739 0 : default:
1740 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1741 0 : break;
1742 : }
1743 166013055 : HASHinsert_locked(&bi, updid, new);
1744 : }
1745 8449 : if (locked) {
1746 8439 : if (b->thash)
1747 0 : nunique = b->thash->nunique;
1748 8439 : MT_rwlock_wrunlock(&b->thashlock);
1749 8439 : locked = false;
1750 : }
1751 : }
1752 28916 : bat_iterator_end(&ni);
1753 28901 : MT_lock_set(&b->theaplock);
1754 28906 : if (nunique != 0)
1755 2 : b->tunique_est = (double) nunique;
1756 28906 : b->tminpos = bi.minpos;
1757 28906 : b->tmaxpos = bi.maxpos;
1758 28906 : b->theap->dirty = true;
1759 28906 : MT_lock_unset(&b->theaplock);
1760 28920 : TRC_DEBUG(ALGO,
1761 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1762 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1763 : GDKusec() - t0);
1764 : return GDK_SUCCEED;
1765 :
1766 0 : bailout:
1767 0 : bat_iterator_end(&ni);
1768 0 : if (locked) {
1769 0 : Hash *h = b->thash;
1770 0 : b->thash = NULL;
1771 0 : MT_rwlock_wrunlock(&b->thashlock);
1772 0 : doHASHdestroy(b, h);
1773 : }
1774 : return GDK_FAIL;
1775 : }
1776 :
1777 : /* replace values from b at locations specified in p with values in n */
1778 : gdk_return
1779 216281 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1780 : {
1781 216281 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1782 : }
1783 :
1784 : /* like BATreplace, but p may specify locations beyond the end of b */
1785 : gdk_return
1786 1110 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1787 : {
1788 1110 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1789 : }
1790 :
1791 : #if 0 /* not used */
1792 : /* like BATreplace, but the positions are given by an array of oid values */
1793 : gdk_return
1794 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1795 : {
1796 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1797 : }
1798 : #endif
1799 :
1800 : /* like BATreplace, but the positions are given by an array of oid
1801 : * values, and they may specify locations beyond the end of b */
1802 : gdk_return
1803 175 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1804 : {
1805 175 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1806 : }
1807 :
1808 : /*
1809 : * BAT Selections
1810 : * The BAT selectors are among the most heavily used operators.
1811 : * Their efficient implementation is therefore mandatory.
1812 : *
1813 : * BAT slice
1814 : * This function returns a horizontal slice from a BAT. It optimizes
1815 : * execution by avoiding to copy when the BAT is memory mapped (in
1816 : * this case, an independent submap is created) or else when it is
1817 : * read-only, then a VIEW bat is created as a result.
1818 : *
1819 : * If a new copy has to be created, this function takes care to
1820 : * preserve void-columns (in this case, the seqbase has to be
1821 : * recomputed in the result).
1822 : *
1823 : * The selected range is excluding the high value.
1824 : */
1825 : BAT *
1826 12485771 : BATslice(BAT *b, BUN l, BUN h)
1827 : {
1828 12485771 : BUN low = l;
1829 12485771 : BAT *bn = NULL;
1830 :
1831 12485771 : BATcheck(b, NULL);
1832 12485771 : BATiter bi = bat_iterator(b);
1833 12500132 : if (l > bi.count)
1834 : l = bi.count;
1835 12500132 : if (h > bi.count)
1836 : h = bi.count;
1837 12500132 : if (h < l)
1838 : h = l;
1839 :
1840 12500132 : if (complex_cand(b)) {
1841 : /* slicing a candidate list with exceptions */
1842 95 : struct canditer ci;
1843 95 : canditer_init(&ci, NULL, b);
1844 95 : if (b->hseqbase + l >= ci.hseq) {
1845 95 : l = b->hseqbase + l - ci.hseq;
1846 95 : h = b->hseqbase + h - ci.hseq;
1847 : } else {
1848 0 : l = 0;
1849 0 : if (b->hseqbase + h >= ci.hseq)
1850 0 : h = b->hseqbase + h - ci.hseq;
1851 : else
1852 : h = 0;
1853 : }
1854 95 : bn = canditer_slice(&ci, l, h);
1855 95 : goto doreturn;
1856 : }
1857 : /* If the source BAT is readonly, then we can obtain a VIEW
1858 : * that just reuses the memory of the source. */
1859 12500037 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1860 : /* forget about slices for bit masks: we can't deal
1861 : * with difference in alignment, so we'll just make a
1862 : * copy */
1863 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1864 : /* we use BATappend with a candidate list to easily
1865 : * copy the part of b that we need */
1866 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1867 0 : if (bn == NULL ||
1868 0 : s == NULL ||
1869 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1870 0 : BBPreclaim(bn);
1871 0 : BBPreclaim(s);
1872 0 : bn = NULL;
1873 0 : goto doreturn;
1874 : }
1875 0 : BBPunfix(s->batCacheid);
1876 0 : goto doreturn;
1877 : }
1878 12500037 : restrict_t prestricted;
1879 14482795 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1880 1978497 : BAT *pb = BBP_desc(VIEWtparent(b));
1881 1978497 : MT_lock_set(&pb->theaplock);
1882 1977759 : prestricted = pb->batRestricted;
1883 1977759 : MT_lock_unset(&pb->theaplock);
1884 : } else {
1885 : prestricted = BAT_WRITE; /* just initialize with anything */
1886 : }
1887 12504298 : if (bi.restricted == BAT_READ &&
1888 12449389 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1889 12449280 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1890 12449280 : if (bn == NULL)
1891 : goto doreturn;
1892 : } else {
1893 : /* create a new BAT and put everything into it */
1894 55018 : BUN p = l;
1895 55018 : BUN q = h;
1896 :
1897 55018 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1898 55045 : if (bn == NULL)
1899 0 : goto doreturn;
1900 :
1901 55045 : if (bn->ttype == TYPE_void) {
1902 37184 : BATsetcount(bn, h - l);
1903 37134 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1904 17861 : } else if (bn->tvheap == NULL) {
1905 11280 : assert(BATatoms[bn->ttype].atomPut == NULL);
1906 11280 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1907 11280 : (q - p) << bn->tshift);
1908 11280 : bn->theap->dirty = true;
1909 11280 : BATsetcount(bn, h - l);
1910 : } else {
1911 1716833 : for (; p < q; p++) {
1912 1710266 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1913 0 : BBPreclaim(bn);
1914 0 : bn = NULL;
1915 0 : goto doreturn;
1916 : }
1917 : }
1918 : }
1919 54969 : bn->theap->dirty = true;
1920 54969 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1921 54969 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1922 54969 : bn->tkey = bi.key || bn->batCount <= 1;
1923 54969 : bn->tnonil = bi.nonil;
1924 54969 : bn->tnil = false; /* we don't know */
1925 54969 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1926 2456 : bn->tnosorted = bi.nosorted - l;
1927 : else
1928 52513 : bn->tnosorted = 0;
1929 54969 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1930 4894 : bn->tnorevsorted = bi.norevsorted - l;
1931 : else
1932 50075 : bn->tnorevsorted = 0;
1933 54969 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1934 44791 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1935 433 : bi.nokey[0] != bi.nokey[1] &&
1936 : !bn->tkey) {
1937 433 : bn->tnokey[0] = bi.nokey[0] - l;
1938 433 : bn->tnokey[1] = bi.nokey[1] - l;
1939 : } else {
1940 54536 : bn->tnokey[0] = bn->tnokey[1] = 0;
1941 : }
1942 : }
1943 12503418 : doreturn:
1944 12503418 : bat_iterator_end(&bi);
1945 12505778 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1946 : ALGOOPTBATFMT "\n",
1947 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1948 : return bn;
1949 : }
1950 :
1951 : #define BAT_ORDERED(TPE) \
1952 : do { \
1953 : const TPE *restrict vals = Tloc(b, 0); \
1954 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1955 : if (vals[p - 1] > vals[p]) { \
1956 : b->tnosorted = p; \
1957 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1958 : goto doreturn; \
1959 : } else if (vals[p - 1] < vals[p]) { \
1960 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1961 : b->tnorevsorted = p; \
1962 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1963 : } \
1964 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1965 : b->tnokey[0] = p - 1; \
1966 : b->tnokey[1] = p; \
1967 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1968 : } \
1969 : } \
1970 : } while (0)
1971 :
1972 : #define BAT_ORDERED_FP(TPE) \
1973 : do { \
1974 : const TPE *restrict vals = Tloc(b, 0); \
1975 : TPE prev = vals[0]; \
1976 : bool prevnil = is_##TPE##_nil(prev); \
1977 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1978 : TPE next = vals[p]; \
1979 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1980 : prev = next; \
1981 : if (cmp > 0) { \
1982 : b->tnosorted = bi.nosorted = p; \
1983 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1984 : goto doreturn; \
1985 : } else if (cmp < 0) { \
1986 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1987 : b->tnorevsorted = bi.norevsorted = p; \
1988 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1989 : } \
1990 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1991 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1992 : b->tnokey[1] = bi.nokey[1] = p; \
1993 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1994 : } \
1995 : } \
1996 : } while (0)
1997 :
1998 : /* Return whether the BAT is ordered or not. If we don't know, invest
1999 : * in a scan and record the results in the bat descriptor. If during
2000 : * the scan we happen to find evidence that the BAT is not reverse
2001 : * sorted, we record the location. */
2002 : bool
2003 3180996 : BATordered(BAT *b)
2004 : {
2005 3180996 : lng t0 = GDKusec();
2006 3181714 : bool sorted;
2007 :
2008 3181714 : MT_rwlock_rdlock(&b->thashlock);
2009 3183188 : MT_lock_set(&b->theaplock);
2010 3182231 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2011 529792 : MT_lock_unset(&b->theaplock);
2012 530133 : MT_rwlock_rdunlock(&b->thashlock);
2013 530133 : return true;
2014 : }
2015 2652439 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2016 2139285 : MT_lock_unset(&b->theaplock);
2017 2139510 : MT_rwlock_rdunlock(&b->thashlock);
2018 2139510 : return false;
2019 : }
2020 :
2021 : /* There are a few reasons why we need a lock here. It may be
2022 : * that multiple threads call this functions at the same time
2023 : * (happens a lot with mitosis/mergetable), but we only need to
2024 : * scan the bat in one thread: the others can reap the rewards
2025 : * when that one thread is done. Also, we need the heap to
2026 : * remain accessible (could have used bat_iterator for that),
2027 : * and, and this is the killer argument, we may need to make
2028 : * changes to the bat descriptor. */
2029 513154 : BATiter bi = bat_iterator_nolock(b);
2030 513154 : if (!b->tsorted && b->tnosorted == 0) {
2031 925292 : switch (ATOMbasetype(b->ttype)) {
2032 108601 : case TYPE_bte:
2033 132734391 : BAT_ORDERED(bte);
2034 : break;
2035 10549 : case TYPE_sht:
2036 1648780 : BAT_ORDERED(sht);
2037 : break;
2038 329905 : case TYPE_int:
2039 120116502 : BAT_ORDERED(int);
2040 : break;
2041 10246 : case TYPE_lng:
2042 62757959 : BAT_ORDERED(lng);
2043 : break;
2044 : #ifdef HAVE_HGE
2045 374 : case TYPE_hge:
2046 8002676 : BAT_ORDERED(hge);
2047 : break;
2048 : #endif
2049 975 : case TYPE_flt:
2050 8007246 : BAT_ORDERED_FP(flt);
2051 : break;
2052 796 : case TYPE_dbl:
2053 8234167 : BAT_ORDERED_FP(dbl);
2054 : break;
2055 : case TYPE_str:
2056 21569229 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2057 21554351 : int c;
2058 21554351 : const char *p1 = BUNtvar(bi, p - 1);
2059 21554351 : const char *p2 = BUNtvar(bi, p);
2060 21554351 : if (p1 == p2)
2061 : c = 0;
2062 2255516 : else if (p1[0] == '\200') {
2063 1783 : if (p2[0] == '\200')
2064 : c = 0;
2065 : else
2066 : c = -1;
2067 2253733 : } else if (p2[0] == '\200')
2068 : c = 1;
2069 : else
2070 2252594 : c = strcmp(p1, p2);
2071 2252594 : if (c > 0) {
2072 36658 : b->tnosorted = bi.nosorted = p;
2073 36658 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2074 36658 : goto doreturn;
2075 21517693 : } else if (c < 0) {
2076 2218859 : assert(!b->trevsorted);
2077 2218859 : if (b->tnorevsorted == 0) {
2078 17447 : b->tnorevsorted = bi.norevsorted = p;
2079 17447 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2080 : }
2081 19298834 : } else if (b->tnokey[1] == 0) {
2082 4913 : assert(!b->tkey);
2083 4913 : b->tnokey[0] = bi.nokey[0] = p - 1;
2084 4913 : b->tnokey[1] = bi.nokey[1] = p;
2085 21517693 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2086 : }
2087 : }
2088 : break;
2089 186 : default: {
2090 186 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2091 2482 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2092 2438 : int c;
2093 2438 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2094 142 : b->tnosorted = bi.nosorted = p;
2095 142 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2096 142 : goto doreturn;
2097 2296 : } else if (c < 0) {
2098 2246 : if (!b->trevsorted && b->tnorevsorted == 0) {
2099 90 : b->tnorevsorted = bi.norevsorted = p;
2100 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2101 : }
2102 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2103 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2104 8 : b->tnokey[1] = bi.nokey[1] = p;
2105 2296 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2106 : }
2107 : }
2108 : break;
2109 : }
2110 : }
2111 : /* we only get here if we completed the scan; note that
2112 : * if we didn't record evidence about *reverse*
2113 : * sortedness, we know that the BAT is also reverse
2114 : * sorted; similarly, if we didn't record evidence about
2115 : * keyness, we know the BAT is key */
2116 167532 : b->tsorted = bi.sorted = true;
2117 167532 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2118 167925 : if (!b->trevsorted && b->tnorevsorted == 0) {
2119 80409 : b->trevsorted = bi.revsorted = true;
2120 80409 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 167925 : if (!b->tkey && b->tnokey[1] == 0) {
2123 52812 : b->tkey = bi.key = true;
2124 52812 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2125 : }
2126 : }
2127 167925 : doreturn:
2128 513563 : MT_rwlock_rdunlock(&b->thashlock);
2129 513431 : sorted = b->tsorted;
2130 513431 : bat pbid = VIEWtparent(b);
2131 513431 : MT_lock_unset(&b->theaplock);
2132 513949 : if (pbid) {
2133 283497 : BAT *pb = BBP_desc(pbid);
2134 283497 : MT_lock_set(&pb->theaplock);
2135 283492 : if (bi.count == BATcount(pb) &&
2136 207964 : bi.h == pb->theap &&
2137 207964 : bi.type == pb->ttype) {
2138 : /* add to knowledge in parent bat */
2139 207961 : pb->tsorted |= bi.sorted;
2140 207961 : if (pb->tnosorted == 0)
2141 207961 : pb->tnosorted = bi.nosorted;
2142 207961 : pb->trevsorted |= bi.revsorted;
2143 207961 : if (pb->tnorevsorted == 0)
2144 33660 : pb->tnorevsorted = bi.norevsorted;
2145 207961 : pb->tkey |= bi.key;
2146 207961 : if (pb->tnokey[1] == 0) {
2147 184382 : pb->tnokey[0] = bi.nokey[0];
2148 184382 : pb->tnokey[1] = bi.nokey[1];
2149 : }
2150 : }
2151 283492 : MT_lock_unset(&pb->theaplock);
2152 : }
2153 : return sorted;
2154 : }
2155 :
2156 : #define BAT_REVORDERED(TPE) \
2157 : do { \
2158 : const TPE *restrict vals = Tloc(b, 0); \
2159 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2160 : if (vals[p - 1] < vals[p]) { \
2161 : b->tnorevsorted = p; \
2162 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2163 : goto doreturn; \
2164 : } \
2165 : } \
2166 : } while (0)
2167 :
2168 : #define BAT_REVORDERED_FP(TPE) \
2169 : do { \
2170 : const TPE *restrict vals = Tloc(b, 0); \
2171 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2172 : TPE prev = vals[p - 1], next = vals[p]; \
2173 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2174 : if (cmp < 0) { \
2175 : b->tnorevsorted = bi.norevsorted = p; \
2176 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2177 : goto doreturn; \
2178 : } \
2179 : } \
2180 : } while (0)
2181 :
2182 : /* Return whether the BAT is reverse ordered or not. If we don't
2183 : * know, invest in a scan and record the results in the bat
2184 : * descriptor. */
2185 : bool
2186 2964914 : BATordered_rev(BAT *b)
2187 : {
2188 2964914 : lng t0 = GDKusec();
2189 2965639 : bool revsorted;
2190 :
2191 2965639 : if (b == NULL || !ATOMlinear(b->ttype))
2192 : return false;
2193 2965630 : MT_rwlock_rdlock(&b->thashlock);
2194 2965501 : MT_lock_set(&b->theaplock);
2195 2965474 : if (BATcount(b) <= 1 || b->trevsorted) {
2196 280318 : MT_lock_unset(&b->theaplock);
2197 280423 : MT_rwlock_rdunlock(&b->thashlock);
2198 280423 : return true;
2199 : }
2200 2685156 : if (b->ttype == TYPE_void) {
2201 23211 : MT_lock_unset(&b->theaplock);
2202 23220 : MT_rwlock_rdunlock(&b->thashlock);
2203 23220 : return is_oid_nil(b->tseqbase);
2204 : }
2205 2661945 : if (BATtdense(b) || b->tnorevsorted > 0) {
2206 2560210 : MT_lock_unset(&b->theaplock);
2207 2560576 : MT_rwlock_rdunlock(&b->thashlock);
2208 2560576 : return false;
2209 : }
2210 101735 : BATiter bi = bat_iterator_nolock(b);
2211 101735 : if (!b->trevsorted && b->tnorevsorted == 0) {
2212 165954 : switch (ATOMbasetype(b->ttype)) {
2213 35618 : case TYPE_bte:
2214 10435233 : BAT_REVORDERED(bte);
2215 : break;
2216 4469 : case TYPE_sht:
2217 3171246 : BAT_REVORDERED(sht);
2218 : break;
2219 36120 : case TYPE_int:
2220 1192196 : BAT_REVORDERED(int);
2221 : break;
2222 4810 : case TYPE_lng:
2223 2697642 : BAT_REVORDERED(lng);
2224 : break;
2225 : #ifdef HAVE_HGE
2226 122 : case TYPE_hge:
2227 988 : BAT_REVORDERED(hge);
2228 : break;
2229 : #endif
2230 511 : case TYPE_flt:
2231 1997 : BAT_REVORDERED_FP(flt);
2232 : break;
2233 323 : case TYPE_dbl:
2234 485514 : BAT_REVORDERED_FP(dbl);
2235 : break;
2236 19762 : default: {
2237 19762 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2238 579746 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2239 576434 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2240 16450 : b->tnorevsorted = p;
2241 16450 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2242 16450 : goto doreturn;
2243 : }
2244 : }
2245 : break;
2246 : }
2247 : }
2248 26424 : b->trevsorted = bi.revsorted = true;
2249 26424 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2250 : }
2251 26424 : doreturn:
2252 101735 : MT_rwlock_rdunlock(&b->thashlock);
2253 101892 : revsorted = b->trevsorted;
2254 101892 : bat pbid = VIEWtparent(b);
2255 101892 : MT_lock_unset(&b->theaplock);
2256 102059 : if (pbid) {
2257 34618 : BAT *pb = BBP_desc(pbid);
2258 34618 : MT_lock_set(&pb->theaplock);
2259 34624 : if (bi.count == BATcount(pb) &&
2260 6477 : bi.h == pb->theap &&
2261 6477 : bi.type == pb->ttype) {
2262 : /* add to knowledge in parent bat */
2263 6476 : pb->trevsorted |= bi.revsorted;
2264 6476 : if (pb->tnorevsorted == 0)
2265 6476 : pb->tnorevsorted = bi.norevsorted;
2266 : }
2267 34624 : MT_lock_unset(&pb->theaplock);
2268 : }
2269 : return revsorted;
2270 : }
2271 :
2272 : /* figure out which sort function is to be called
2273 : * stable sort can produce an error (not enough memory available),
2274 : * "quick" sort does not produce errors */
2275 : static gdk_return
2276 3951710 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2277 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2278 : bool stable)
2279 : {
2280 3951710 : if (n <= 1) /* trivially sorted */
2281 : return GDK_SUCCEED;
2282 2763400 : switch (tpe) {
2283 2715126 : case TYPE_bte:
2284 : case TYPE_sht:
2285 : case TYPE_int:
2286 : case TYPE_lng:
2287 : #ifdef HAVE_HGE
2288 : case TYPE_hge:
2289 : #endif
2290 : case TYPE_date:
2291 : case TYPE_daytime:
2292 : case TYPE_timestamp:
2293 2715126 : assert(base == NULL);
2294 2715126 : if (nilslast == reverse && (stable || n > 100))
2295 20318 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2296 : break;
2297 4 : case TYPE_uuid:
2298 4 : assert(base == NULL);
2299 4 : if (nilslast == reverse && (stable || n > 100))
2300 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2301 : break;
2302 : default:
2303 : break;
2304 : }
2305 48382 : if (stable) {
2306 27 : if (reverse)
2307 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2308 : else
2309 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2310 : } else {
2311 2743054 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2312 : }
2313 2743054 : return GDK_SUCCEED;
2314 : }
2315 :
2316 : /* Sort the bat b according to both o and g. The stable and reverse
2317 : * parameters indicate whether the sort should be stable or descending
2318 : * respectively. The parameter b is required, o and g are optional
2319 : * (i.e., they may be NULL).
2320 : *
2321 : * A sorted copy is returned through the sorted parameter, the new
2322 : * ordering is returned through the order parameter, group information
2323 : * is returned through the groups parameter. All three output
2324 : * parameters may be NULL. If they're all NULL, this function does
2325 : * nothing.
2326 : *
2327 : * If o is specified, it is used to first rearrange b according to the
2328 : * order specified in o, after which b is sorted taking g into
2329 : * account.
2330 : *
2331 : * If g is specified, it indicates groups which should be individually
2332 : * ordered. Each row of consecutive equal values in g indicates a
2333 : * group which is sorted according to stable and reverse. g is used
2334 : * after the order in b was rearranged according to o.
2335 : *
2336 : * The outputs order and groups can be used in subsequent calls to
2337 : * this function. This can be used if multiple BATs need to be sorted
2338 : * together. The BATs should then be sorted in order of significance,
2339 : * and each following call should use the original unordered BAT plus
2340 : * the order and groups bat from the previous call. In this case, the
2341 : * sorted BATs are not of much use, so the sorted output parameter
2342 : * does not need to be specified.
2343 : * Apart from error checking and maintaining reference counts, sorting
2344 : * three columns (col1, col2, col3) could look like this with the
2345 : * sorted results in (col1s, col2s, col3s):
2346 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2347 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2348 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2349 : * Note that the "reverse" parameter can be different for each call.
2350 : */
2351 : gdk_return
2352 34404 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2353 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2354 : {
2355 34404 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2356 34404 : BATiter pbi;
2357 34404 : oid *restrict grps, *restrict ords, prev;
2358 34404 : BUN p, q, r;
2359 34404 : lng t0 = GDKusec();
2360 34407 : bool mkorderidx, orderidxlock = false;
2361 34407 : Heap *oidxh = NULL;
2362 :
2363 : /* we haven't implemented NILs as largest value for stable
2364 : * sort, so NILs come first for ascending and last for
2365 : * descending */
2366 34407 : assert(!stable || reverse == nilslast);
2367 :
2368 34407 : if (b == NULL) {
2369 0 : GDKerror("b must exist\n");
2370 0 : return GDK_FAIL;
2371 : }
2372 34407 : if (stable && reverse != nilslast) {
2373 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2374 0 : return GDK_FAIL;
2375 : }
2376 34407 : if (!ATOMlinear(b->ttype)) {
2377 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2378 0 : return GDK_FAIL;
2379 : }
2380 34407 : MT_lock_set(&b->theaplock);
2381 34412 : if (b->ttype == TYPE_void) {
2382 149 : b->tsorted = true;
2383 223 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2384 0 : b->trevsorted = !b->trevsorted;
2385 : }
2386 298 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2387 0 : b->tkey = !b->tkey;
2388 : }
2389 34263 : } else if (b->batCount <= 1) {
2390 12547 : if (!b->tsorted || !b->trevsorted) {
2391 23 : b->tsorted = b->trevsorted = true;
2392 : }
2393 : }
2394 34412 : MT_lock_unset(&b->theaplock);
2395 34412 : if (o != NULL &&
2396 17407 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2397 17407 : BATcount(o) != BATcount(b) || /* same size as b */
2398 6899 : (o->ttype == TYPE_void && /* no nil tail */
2399 2732 : BATcount(o) != 0 &&
2400 2732 : is_oid_nil(o->tseqbase)))) {
2401 0 : GDKerror("o must have type oid and same size as b\n");
2402 0 : return GDK_FAIL;
2403 : }
2404 34412 : if (g != NULL &&
2405 17406 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2406 17406 : !g->tsorted || /* sorted */
2407 17406 : BATcount(g) != BATcount(b) || /* same size as b */
2408 7025 : (g->ttype == TYPE_void && /* no nil tail */
2409 2858 : BATcount(g) != 0 &&
2410 2858 : is_oid_nil(g->tseqbase)))) {
2411 0 : GDKerror("g must have type oid, sorted on the tail, "
2412 : "and same size as b\n");
2413 0 : return GDK_FAIL;
2414 : }
2415 34412 : if (sorted == NULL && order == NULL) {
2416 : /* no place to put result, so we're done quickly */
2417 0 : GDKerror("no place to put the result.\n");
2418 0 : return GDK_FAIL;
2419 : }
2420 34412 : if (g == NULL && !stable) {
2421 : /* pre-ordering doesn't make sense if we're not
2422 : * subsorting and the sort is not stable */
2423 16724 : o = NULL;
2424 : }
2425 34412 : if (b->tnonil) {
2426 : /* if there are no nils, placement of nils doesn't
2427 : * matter, so set nilslast such that ordered bits can
2428 : * be used */
2429 26217 : nilslast = reverse;
2430 : }
2431 34412 : pbi = bat_iterator(NULL);
2432 35808 : if (BATcount(b) <= 1 ||
2433 21663 : (reverse == nilslast &&
2434 : (reverse ? b->trevsorted : b->tsorted) &&
2435 5724 : o == NULL && g == NULL &&
2436 3215 : (groups == NULL || BATtkey(b) ||
2437 : (reverse ? b->tsorted : b->trevsorted)))) {
2438 : /* trivially (sub)sorted, and either we don't need to
2439 : * return group information, or we can trivially
2440 : * deduce the groups */
2441 14767 : if (sorted) {
2442 12852 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2443 12852 : if (bn == NULL)
2444 0 : goto error;
2445 12852 : *sorted = bn;
2446 : }
2447 14767 : if (order) {
2448 13867 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2449 13867 : if (on == NULL)
2450 0 : goto error;
2451 13867 : *order = on;
2452 : }
2453 14767 : if (groups) {
2454 6505 : if (BATtkey(b)) {
2455 : /* singleton groups */
2456 5862 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2457 5862 : if (gn == NULL)
2458 0 : goto error;
2459 : } else {
2460 : /* single group */
2461 643 : const oid *o = 0;
2462 643 : assert(BATcount(b) == 1 ||
2463 : (b->tsorted && b->trevsorted));
2464 643 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2465 644 : if (gn == NULL)
2466 0 : goto error;
2467 : }
2468 6506 : *groups = gn;
2469 : }
2470 14768 : bat_iterator_end(&pbi);
2471 14768 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2472 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2473 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2474 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2475 : ALGOOPTBATFMT " -- trivial (" LLFMT
2476 : " usec)\n",
2477 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2478 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2479 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2480 : ALGOOPTBATPAR(on), GDKusec() - t0);
2481 14768 : return GDK_SUCCEED;
2482 : }
2483 19636 : if (VIEWtparent(b)) {
2484 4218 : pb = BATdescriptor(VIEWtparent(b));
2485 4224 : if (pb != NULL &&
2486 4224 : (b->tbaseoff != pb->tbaseoff ||
2487 3345 : BATcount(b) != BATcount(pb) ||
2488 3068 : b->hseqbase != pb->hseqbase ||
2489 3058 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2490 1167 : BBPunfix(pb->batCacheid);
2491 1167 : pb = NULL;
2492 : }
2493 : } else {
2494 : pb = b;
2495 : }
2496 19643 : bat_iterator_end(&pbi);
2497 19642 : pbi = bat_iterator(pb);
2498 : /* when we will create an order index if it doesn't already exist */
2499 19642 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2500 19642 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2501 6403 : (void) BATcheckorderidx(pb);
2502 6403 : MT_lock_set(&pb->batIdxLock);
2503 6398 : if (pb->torderidx) {
2504 66 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2505 : /* we can use the order index */
2506 66 : oidxh = pb->torderidx;
2507 66 : HEAPincref(oidxh);
2508 : }
2509 : mkorderidx = false;
2510 6332 : } else if (b != pb) {
2511 : /* don't build orderidx on parent bat */
2512 : mkorderidx = false;
2513 5263 : } else if (mkorderidx) {
2514 : /* keep lock when going to create */
2515 4637 : orderidxlock = true;
2516 : }
2517 6398 : if (!orderidxlock)
2518 1762 : MT_lock_unset(&pb->batIdxLock);
2519 : }
2520 19639 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2521 : /* there is an order index that we can use */
2522 66 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2523 66 : if (on == NULL)
2524 0 : goto error;
2525 66 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2526 66 : BATsetcount(on, BATcount(b));
2527 66 : HEAPdecref(oidxh, false);
2528 66 : oidxh = NULL;
2529 66 : on->tkey = true;
2530 66 : on->tnil = false;
2531 66 : on->tnonil = true;
2532 66 : on->tsorted = on->trevsorted = false;
2533 66 : on->tseqbase = oid_nil;
2534 66 : if (sorted || groups) {
2535 66 : bn = BATproject(on, b);
2536 66 : if (bn == NULL)
2537 0 : goto error;
2538 66 : bn->tsorted = true;
2539 66 : if (groups) {
2540 11 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2541 0 : goto error;
2542 11 : if (sorted &&
2543 11 : (*groups)->tkey &&
2544 : g == NULL) {
2545 : /* if new groups bat is key
2546 : * and since there is no input
2547 : * groups bat, we know the
2548 : * result bat is key */
2549 4 : bn->tkey = true;
2550 : }
2551 : }
2552 66 : if (sorted)
2553 66 : *sorted = bn;
2554 : else {
2555 0 : BBPunfix(bn->batCacheid);
2556 0 : bn = NULL;
2557 : }
2558 : }
2559 66 : if (order)
2560 16 : *order = on;
2561 : else {
2562 50 : BBPunfix(on->batCacheid);
2563 50 : on = NULL;
2564 : }
2565 66 : bat_iterator_end(&pbi);
2566 66 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2567 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2568 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2569 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2570 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2571 : " usec)\n",
2572 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2573 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2574 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2575 : ALGOOPTBATPAR(on), GDKusec() - t0);
2576 66 : if (pb != NULL && pb != b)
2577 53 : BBPunfix(pb->batCacheid);
2578 66 : return GDK_SUCCEED;
2579 12650 : } else if (oidxh) {
2580 0 : HEAPdecref(oidxh, false);
2581 0 : oidxh = NULL;
2582 : }
2583 19572 : if (o) {
2584 12098 : bn = BATproject(o, b);
2585 12100 : if (bn == NULL)
2586 0 : goto error;
2587 12100 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2588 5484 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2589 2742 : BBPunfix(bn->batCacheid);
2590 2742 : bn = b2;
2591 : }
2592 12101 : if (pb) {
2593 11549 : bat_iterator_end(&pbi);
2594 11549 : if (pb != b)
2595 1629 : BBPunfix(pb->batCacheid);
2596 11548 : pbi = bat_iterator(NULL);
2597 11548 : pb = NULL;
2598 : }
2599 : } else {
2600 7474 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2601 : }
2602 19564 : if (bn == NULL)
2603 0 : goto error;
2604 19564 : if (order) {
2605 : /* prepare order bat */
2606 16645 : if (o) {
2607 : /* make copy of input so that we can refine it;
2608 : * copy can be read-only if we take the shortcut
2609 : * below in the case g is "key" */
2610 10336 : on = COLcopy(o, TYPE_oid,
2611 10336 : g == NULL ||
2612 10336 : !(g->tkey || g->ttype == TYPE_void),
2613 : TRANSIENT);
2614 10338 : if (on == NULL)
2615 0 : goto error;
2616 10338 : BAThseqbase(on, b->hseqbase);
2617 10338 : on->tminpos = BUN_NONE;
2618 10338 : on->tmaxpos = BUN_NONE;
2619 : } else {
2620 : /* create new order */
2621 6309 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2622 6316 : if (on == NULL)
2623 0 : goto error;
2624 6316 : ords = (oid *) Tloc(on, 0);
2625 29957146 : for (p = 0, q = BATcount(bn); p < q; p++)
2626 29950830 : ords[p] = p + b->hseqbase;
2627 6316 : BATsetcount(on, BATcount(bn));
2628 6316 : on->tkey = true;
2629 6316 : on->tnil = false;
2630 6316 : on->tnonil = true;
2631 : }
2632 : /* COLcopy above can create TYPE_void */
2633 16654 : if (on->ttype != TYPE_void) {
2634 15890 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2635 15890 : on->tseqbase = oid_nil; /* and hence not dense */
2636 15890 : on->tnosorted = on->tnorevsorted = 0;
2637 : }
2638 16654 : *order = on;
2639 16654 : ords = (oid *) Tloc(on, 0);
2640 : } else {
2641 : ords = NULL;
2642 : }
2643 19573 : if (g) {
2644 12099 : if (g->tkey || g->ttype == TYPE_void) {
2645 : /* if g is "key", all groups are size 1, so no
2646 : * subsorting needed */
2647 5089 : if (sorted) {
2648 4604 : *sorted = bn;
2649 : } else {
2650 485 : BBPunfix(bn->batCacheid);
2651 485 : bn = NULL;
2652 : }
2653 5089 : if (order) {
2654 3911 : *order = on;
2655 3911 : if (o) {
2656 : /* we can inherit sortedness
2657 : * after all */
2658 3911 : on->tsorted = o->tsorted;
2659 3911 : on->trevsorted = o->trevsorted;
2660 3911 : if (o->tnosorted)
2661 56 : on->tnosorted = o->tnosorted;
2662 3911 : if (o->tnorevsorted)
2663 74 : on->tnorevsorted = o->tnorevsorted;
2664 : } else {
2665 : /* we didn't rearrange, so
2666 : * still sorted */
2667 0 : on->tsorted = true;
2668 0 : on->trevsorted = false;
2669 : }
2670 3911 : if (BATcount(on) <= 1) {
2671 0 : on->tsorted = true;
2672 0 : on->trevsorted = true;
2673 : }
2674 : }
2675 5089 : if (groups) {
2676 2912 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2677 2912 : if (gn == NULL)
2678 0 : goto error;
2679 2912 : *groups = gn;
2680 : }
2681 5089 : bat_iterator_end(&pbi);
2682 5089 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2683 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2684 : ",reverse=%d,nilslast=%d,stable=%d"
2685 : ") = (" ALGOOPTBATFMT ","
2686 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2687 : " -- key group (" LLFMT " usec)\n",
2688 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2689 : ALGOBATPAR(g), reverse, nilslast,
2690 : stable, ALGOOPTBATPAR(bn),
2691 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2692 : GDKusec() - t0);
2693 5089 : if (pb != NULL && pb != b)
2694 0 : BBPunfix(pb->batCacheid);
2695 5089 : return GDK_SUCCEED;
2696 : }
2697 7010 : assert(g->ttype == TYPE_oid);
2698 7010 : grps = (oid *) Tloc(g, 0);
2699 7010 : prev = grps[0];
2700 7010 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2701 0 : goto error;
2702 44632620 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2703 44625609 : if (grps[p] != prev) {
2704 : /* sub sort [r,p) */
2705 7606515 : if (do_sort(Tloc(bn, r),
2706 3668503 : ords ? ords + r : NULL,
2707 3938012 : bn->tvheap ? bn->tvheap->base : NULL,
2708 3938012 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2709 3938012 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2710 0 : goto error;
2711 3938011 : r = p;
2712 3938011 : prev = grps[p];
2713 : }
2714 : }
2715 : /* sub sort [r,q) */
2716 13438 : if (do_sort(Tloc(bn, r),
2717 6427 : ords ? ords + r : NULL,
2718 7011 : bn->tvheap ? bn->tvheap->base : NULL,
2719 7011 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2720 7011 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2721 0 : goto error;
2722 : /* if single group (r==0) the result is (rev)sorted,
2723 : * otherwise (maybe) not */
2724 7011 : bn->tsorted = r == 0 && !reverse && !nilslast;
2725 13987 : bn->trevsorted = r == 0 && reverse && nilslast;
2726 : } else {
2727 7474 : Heap *m = NULL;
2728 : /* only invest in creating an order index if the BAT
2729 : * is persistent */
2730 7474 : if (mkorderidx) {
2731 4636 : assert(orderidxlock);
2732 4636 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2733 : ords == NULL) {
2734 0 : ords = (oid *) m->base + ORDERIDXOFF;
2735 0 : if (o && o->ttype != TYPE_void)
2736 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2737 0 : else if (o)
2738 0 : for (p = 0, q = BATcount(o); p < q; p++)
2739 0 : ords[p] = p + o->tseqbase;
2740 : else
2741 0 : for (p = 0, q = BATcount(b); p < q; p++)
2742 0 : ords[p] = p + b->hseqbase;
2743 : }
2744 : }
2745 7474 : if ((reverse != nilslast ||
2746 14057 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2747 13372 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2748 6689 : do_sort(Tloc(bn, 0),
2749 : ords,
2750 6689 : bn->tvheap ? bn->tvheap->base : NULL,
2751 6689 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2752 6689 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2753 0 : if (m != NULL) {
2754 0 : HEAPfree(m, true);
2755 0 : GDKfree(m);
2756 : }
2757 0 : goto error;
2758 : }
2759 7476 : bn->tsorted = !reverse && !nilslast;
2760 7476 : bn->trevsorted = reverse && nilslast;
2761 7476 : if (m != NULL) {
2762 4637 : assert(orderidxlock);
2763 4637 : if (pb->torderidx == NULL) {
2764 4637 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2765 4637 : memcpy((oid *) m->base + ORDERIDXOFF,
2766 : ords,
2767 4637 : pbi.count * sizeof(oid));
2768 : }
2769 4637 : pb->torderidx = m;
2770 4637 : persistOIDX(pb);
2771 : } else {
2772 0 : HEAPfree(m, true);
2773 0 : GDKfree(m);
2774 : }
2775 : }
2776 : }
2777 14487 : if (orderidxlock) {
2778 4637 : MT_lock_unset(&pb->batIdxLock);
2779 4637 : orderidxlock = false;
2780 : }
2781 14488 : bn->theap->dirty = true;
2782 14488 : bn->tnosorted = 0;
2783 14488 : bn->tnorevsorted = 0;
2784 14488 : bn->tnokey[0] = bn->tnokey[1] = 0;
2785 14488 : bn->tminpos = BUN_NONE;
2786 14488 : bn->tmaxpos = BUN_NONE;
2787 14488 : if (groups) {
2788 8804 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2789 0 : goto error;
2790 8800 : if ((*groups)->tkey &&
2791 966 : (g == NULL || (g->tsorted && g->trevsorted))) {
2792 : /* if new groups bat is key and the input
2793 : * group bat has a single value (both sorted
2794 : * and revsorted), we know the result bat is
2795 : * key */
2796 1172 : bn->tkey = true;
2797 : }
2798 : }
2799 :
2800 14484 : bat_iterator_end(&pbi);
2801 14485 : if (sorted)
2802 10584 : *sorted = bn;
2803 : else {
2804 3901 : BBPunfix(bn->batCacheid);
2805 3901 : bn = NULL;
2806 : }
2807 :
2808 14491 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2809 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2810 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2811 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2812 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2813 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2814 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2815 : g ? "grouped " : "", GDKusec() - t0);
2816 14491 : if (pb && pb != b)
2817 1373 : BBPunfix(pb->batCacheid);
2818 : return GDK_SUCCEED;
2819 :
2820 0 : error:
2821 0 : bat_iterator_end(&pbi);
2822 0 : if (orderidxlock)
2823 0 : MT_lock_unset(&pb->batIdxLock);
2824 0 : if (oidxh)
2825 0 : HEAPdecref(oidxh, false);
2826 0 : BBPreclaim(bn);
2827 0 : if (pb && pb != b)
2828 0 : BBPunfix(pb->batCacheid);
2829 0 : BBPreclaim(on);
2830 0 : if (sorted)
2831 0 : *sorted = NULL;
2832 0 : if (order)
2833 0 : *order = NULL;
2834 0 : if (groups)
2835 0 : *groups = NULL;
2836 : return GDK_FAIL;
2837 : }
2838 :
2839 : /* return a new BAT of length n with seqbase hseq, and the constant v
2840 : * in the tail */
2841 : BAT *
2842 2135214 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2843 : {
2844 2135214 : BAT *bn;
2845 2135214 : void *restrict p;
2846 2135214 : BUN i;
2847 2135214 : lng t0 = 0;
2848 :
2849 2135214 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2850 2135214 : if (v == NULL)
2851 : return NULL;
2852 2135214 : bn = COLnew(hseq, tailtype, n, role);
2853 2140097 : if (bn != NULL && n > 0) {
2854 91430 : p = Tloc(bn, 0);
2855 91430 : switch (ATOMstorage(tailtype)) {
2856 25 : case TYPE_void:
2857 25 : v = &oid_nil;
2858 25 : BATtseqbase(bn, oid_nil);
2859 25 : break;
2860 0 : case TYPE_msk:
2861 0 : if (*(msk*)v) {
2862 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2863 0 : if (n & 31) {
2864 0 : uint32_t *m = p;
2865 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2866 : }
2867 : } else
2868 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2869 : break;
2870 14884 : case TYPE_bte:
2871 14884 : memset(p, *(bte*)v, n);
2872 14884 : break;
2873 : case TYPE_sht:
2874 7161740 : for (i = 0; i < n; i++)
2875 7143056 : ((sht *) p)[i] = *(sht *) v;
2876 : break;
2877 : case TYPE_int:
2878 : case TYPE_flt:
2879 : assert(sizeof(int) == sizeof(flt));
2880 222069987 : for (i = 0; i < n; i++)
2881 222063629 : ((int *) p)[i] = *(int *) v;
2882 : break;
2883 : case TYPE_lng:
2884 : case TYPE_dbl:
2885 : assert(sizeof(lng) == sizeof(dbl));
2886 235583615 : for (i = 0; i < n; i++)
2887 235549664 : ((lng *) p)[i] = *(lng *) v;
2888 : break;
2889 : #ifdef HAVE_HGE
2890 : case TYPE_hge:
2891 25205739 : for (i = 0; i < n; i++)
2892 25204117 : ((hge *) p)[i] = *(hge *) v;
2893 : break;
2894 : #endif
2895 : case TYPE_uuid:
2896 200047 : for (i = 0; i < n; i++)
2897 200038 : ((uuid *) p)[i] = *(uuid *) v;
2898 : break;
2899 15747 : case TYPE_str:
2900 : /* insert the first value, then just copy the
2901 : * offset lots of times */
2902 15747 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2903 0 : BBPreclaim(bn);
2904 0 : return NULL;
2905 : }
2906 15757 : char val[sizeof(var_t)];
2907 15757 : memcpy(val, Tloc(bn, 0), bn->twidth);
2908 15757 : if (bn->twidth == 1 && n > 1) {
2909 : /* single byte value: we have a
2910 : * function for that */
2911 8114 : memset(Tloc(bn, 1), val[0], n - 1);
2912 : } else {
2913 7643 : char *p = Tloc(bn, 0);
2914 7663 : for (i = 1; i < n; i++) {
2915 20 : p += bn->twidth;
2916 20 : memcpy(p, val, bn->twidth);
2917 : }
2918 : }
2919 : break;
2920 : default:
2921 333712 : for (i = 0; i < n; i++)
2922 333563 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2923 0 : BBPreclaim(bn);
2924 0 : return NULL;
2925 : }
2926 : break;
2927 : }
2928 91439 : bn->theap->dirty = true;
2929 91439 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2930 91363 : BATsetcount(bn, n);
2931 91676 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2932 91676 : bn->tnonil = !bn->tnil;
2933 91676 : bn->tkey = BATcount(bn) <= 1;
2934 : }
2935 2140343 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2936 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2937 : return bn;
2938 : }
2939 :
2940 : /*
2941 : * BAT Aggregates
2942 : *
2943 : * We retain the size() and card() aggregate results in the column
2944 : * descriptor. We would like to have such functionality in an
2945 : * extensible way for many aggregates, for DD (1) we do not want to
2946 : * change the binary BAT format on disk and (2) aggr and size are the
2947 : * most relevant aggregates.
2948 : *
2949 : * It is all hacked into the aggr[3] records; three adjacent integers
2950 : * that were left over in the column record. We refer to these as if
2951 : * it where an int aggr[3] array. The below routines set and retrieve
2952 : * the aggregate values from the tail of the BAT, as many
2953 : * aggregate-manipulating BAT functions work on tail.
2954 : *
2955 : * The rules are as follows: aggr[0] contains the alignment ID of the
2956 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2957 : * equal to b->talign, the precomputed aggregate values in
2958 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2959 : * of them may be set at the time. This is encoded by the value
2960 : * int_nil, which cannot occur in these two aggregates.
2961 : *
2962 : * This was now extended to record the property whether we know there
2963 : * is a nil value present by mis-using the highest bits of both
2964 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2965 : */
2966 :
2967 : void
2968 49738097 : PROPdestroy_nolock(BAT *b)
2969 : {
2970 49738097 : PROPrec *p = b->tprops;
2971 49738097 : PROPrec *n;
2972 :
2973 49738097 : b->tprops = NULL;
2974 49773971 : while (p) {
2975 4969 : n = p->next;
2976 4969 : assert(p->id != (enum prop_t) 20);
2977 4969 : VALclear(&p->v);
2978 4969 : GDKfree(p);
2979 4969 : p = n;
2980 : }
2981 49769002 : }
2982 :
2983 : void
2984 440 : PROPdestroy(BAT *b)
2985 : {
2986 440 : MT_lock_set(&b->theaplock);
2987 440 : PROPdestroy_nolock(b);
2988 440 : MT_lock_unset(&b->theaplock);
2989 440 : }
2990 :
2991 : ValPtr
2992 216123396 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2993 : {
2994 216123396 : PROPrec *p;
2995 :
2996 216123396 : p = b->tprops;
2997 216130577 : while (p && p->id != idx)
2998 7181 : p = p->next;
2999 216123396 : return p ? &p->v : NULL;
3000 : }
3001 :
3002 : void
3003 426328 : BATrmprop_nolock(BAT *b, enum prop_t idx)
3004 : {
3005 426328 : PROPrec *prop = b->tprops, *prev = NULL;
3006 :
3007 426563 : while (prop) {
3008 426436 : if (prop->id == idx) {
3009 426201 : if (prev)
3010 106 : prev->next = prop->next;
3011 : else
3012 426095 : b->tprops = prop->next;
3013 426201 : VALclear(&prop->v);
3014 426201 : GDKfree(prop);
3015 426201 : return;
3016 : }
3017 235 : prev = prop;
3018 235 : prop = prop->next;
3019 : }
3020 : }
3021 :
3022 : ValPtr
3023 431209 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3024 : {
3025 431209 : PROPrec *p;
3026 :
3027 431209 : p = b->tprops;
3028 438945 : while (p && p->id != idx)
3029 7736 : p = p->next;
3030 431209 : if (p == NULL) {
3031 431202 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3032 : /* properties are hints, so if we can't create
3033 : * one we ignore the error */
3034 0 : GDKclrerr();
3035 0 : return NULL;
3036 : }
3037 431202 : p->id = idx;
3038 431202 : p->next = b->tprops;
3039 431202 : p->v.vtype = 0;
3040 431202 : b->tprops = p;
3041 : } else {
3042 7 : VALclear(&p->v);
3043 : }
3044 431209 : if (VALinit(&p->v, type, v) == NULL) {
3045 : /* failed to initialize, so remove property */
3046 0 : BATrmprop_nolock(b, idx);
3047 0 : GDKclrerr();
3048 0 : p = NULL;
3049 : }
3050 0 : return p ? &p->v : NULL;
3051 : }
3052 :
3053 : ValPtr
3054 3460823 : BATgetprop(BAT *b, enum prop_t idx)
3055 : {
3056 3460823 : ValPtr p;
3057 :
3058 3460823 : MT_lock_set(&b->theaplock);
3059 3464219 : p = BATgetprop_nolock(b, idx);
3060 3464088 : MT_lock_unset(&b->theaplock);
3061 3465180 : return p;
3062 : }
3063 :
3064 : ValPtr
3065 4779 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3066 : {
3067 4779 : ValPtr p;
3068 4779 : MT_lock_set(&b->theaplock);
3069 4779 : p = BATsetprop_nolock(b, idx, type, v);
3070 4779 : MT_lock_unset(&b->theaplock);
3071 4779 : return p;
3072 : }
3073 :
3074 : void
3075 2 : BATrmprop(BAT *b, enum prop_t idx)
3076 : {
3077 2 : MT_lock_set(&b->theaplock);
3078 2 : BATrmprop_nolock(b, idx);
3079 2 : MT_lock_unset(&b->theaplock);
3080 2 : }
3081 :
3082 : /*
3083 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3084 : * non-nil tail value.
3085 : * This function does not fail (the callers currently don't check for failure).
3086 : */
3087 : BUN
3088 2254 : BATcount_no_nil(BAT *b, BAT *s)
3089 : {
3090 2254 : BUN cnt = 0;
3091 2254 : const void *restrict p, *restrict nil;
3092 2254 : const char *restrict base;
3093 2254 : int t;
3094 2254 : int (*cmp)(const void *, const void *);
3095 2254 : struct canditer ci;
3096 2254 : oid hseq;
3097 :
3098 2254 : BATcheck(b, 0);
3099 :
3100 2254 : hseq = b->hseqbase;
3101 2254 : canditer_init(&ci, b, s);
3102 2256 : BATiter bi = bat_iterator(b);
3103 2254 : if (bi.nonil) {
3104 2017 : bat_iterator_end(&bi);
3105 2019 : return ci.ncand;
3106 : }
3107 237 : p = bi.base;
3108 237 : t = ATOMbasetype(bi.type);
3109 237 : switch (t) {
3110 0 : case TYPE_void:
3111 0 : cnt = ci.ncand * BATtdensebi(&bi);
3112 0 : break;
3113 0 : case TYPE_msk:
3114 0 : cnt = ci.ncand;
3115 0 : break;
3116 16 : case TYPE_bte:
3117 32 : CAND_LOOP(&ci)
3118 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3119 : break;
3120 0 : case TYPE_sht:
3121 0 : CAND_LOOP(&ci)
3122 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3123 : break;
3124 111 : case TYPE_int:
3125 2644496 : CAND_LOOP(&ci)
3126 2644385 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3127 : break;
3128 85 : case TYPE_lng:
3129 156873 : CAND_LOOP(&ci)
3130 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 : #ifdef HAVE_HGE
3133 0 : case TYPE_hge:
3134 0 : CAND_LOOP(&ci)
3135 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3136 : break;
3137 : #endif
3138 0 : case TYPE_flt:
3139 0 : CAND_LOOP(&ci)
3140 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3141 : break;
3142 0 : case TYPE_dbl:
3143 0 : CAND_LOOP(&ci)
3144 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3145 : break;
3146 0 : case TYPE_uuid:
3147 0 : CAND_LOOP(&ci)
3148 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3149 : break;
3150 25 : case TYPE_str:
3151 25 : base = bi.vh->base;
3152 25 : switch (bi.width) {
3153 23 : case 1:
3154 4829 : CAND_LOOP(&ci)
3155 4806 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3156 : break;
3157 2 : case 2:
3158 312 : CAND_LOOP(&ci)
3159 310 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3160 : break;
3161 0 : case 4:
3162 0 : CAND_LOOP(&ci)
3163 0 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3164 : break;
3165 : #if SIZEOF_VAR_T == 8
3166 0 : case 8:
3167 0 : CAND_LOOP(&ci)
3168 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3169 : break;
3170 : #endif
3171 : default:
3172 0 : MT_UNREACHABLE();
3173 : }
3174 : break;
3175 0 : default:
3176 0 : nil = ATOMnilptr(t);
3177 0 : cmp = ATOMcompare(t);
3178 0 : if (nil == NULL) {
3179 0 : cnt = ci.ncand;
3180 0 : } else if (b->tvheap) {
3181 0 : base = b->tvheap->base;
3182 0 : CAND_LOOP(&ci)
3183 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3184 : } else {
3185 0 : CAND_LOOP(&ci)
3186 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3187 : }
3188 : break;
3189 : }
3190 237 : if (cnt == bi.count) {
3191 35 : MT_lock_set(&b->theaplock);
3192 35 : if (cnt == BATcount(b) && bi.h == b->theap) {
3193 : /* we learned something */
3194 35 : b->tnonil = true;
3195 35 : assert(!b->tnil);
3196 35 : b->tnil = false;
3197 : }
3198 35 : bat pbid = VIEWtparent(b);
3199 35 : MT_lock_unset(&b->theaplock);
3200 35 : if (pbid) {
3201 21 : BAT *pb = BATdescriptor(pbid);
3202 21 : if (pb) {
3203 21 : MT_lock_set(&pb->theaplock);
3204 21 : if (cnt == BATcount(pb) &&
3205 0 : bi.h == pb->theap &&
3206 0 : !pb->tnonil) {
3207 0 : pb->tnonil = true;
3208 0 : assert(!pb->tnil);
3209 0 : pb->tnil = false;
3210 : }
3211 21 : MT_lock_unset(&pb->theaplock);
3212 21 : BBPunfix(pb->batCacheid);
3213 : }
3214 : }
3215 : }
3216 237 : bat_iterator_end(&bi);
3217 237 : return cnt;
3218 : }
|