Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 56798756 : unshare_varsized_heap(BAT *b)
27 : {
28 56798756 : if (ATOMvarsized(b->ttype) &&
29 22225882 : b->tvheap->parentid != b->batCacheid) {
30 1027 : Heap *h = GDKmalloc(sizeof(Heap));
31 1034 : if (h == NULL)
32 : return GDK_FAIL;
33 1034 : MT_thread_setalgorithm("unshare vheap");
34 2064 : *h = (Heap) {
35 1030 : .parentid = b->batCacheid,
36 1034 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 1030 : strconcat_len(h->filename, sizeof(h->filename),
40 1030 : BBP_physical(b->batCacheid), ".theap", NULL);
41 1033 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 1033 : MT_lock_set(&b->theaplock);
47 1032 : Heap *oh = b->tvheap;
48 1032 : b->tvheap = h;
49 1032 : MT_lock_unset(&b->theaplock);
50 1034 : BBPrelease(oh->parentid);
51 1034 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 85913 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 85913 : size_t toff = ~(size_t) 0; /* tail offset */
66 85913 : BUN p, r; /* loop variables */
67 85913 : const void *tp = NULL; /* tail value pointer */
68 85913 : var_t v;
69 85913 : size_t off; /* offset within n's string heap */
70 85913 : BUN cnt = ci->ncand;
71 85913 : BUN oldcnt = BATcount(b);
72 :
73 85913 : assert(b->ttype == TYPE_str);
74 85913 : assert(b->tbaseoff == 0);
75 85913 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 85913 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 85913 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 85911 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 20510 : toff = 0;
85 20510 : MT_thread_setalgorithm("shared vheap");
86 65401 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 51081 : MT_lock_set(&b->theaplock);
90 51276 : bat bid = b->tvheap->parentid;
91 51276 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 51402 : HEAPincref(ni->vh);
93 51389 : b->tvheap = ni->vh;
94 51389 : b->tascii = ni->ascii;
95 51389 : MT_lock_unset(&b->theaplock);
96 51408 : BBPretain(ni->vh->parentid);
97 51366 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 51366 : toff = 0;
100 51366 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 15354 : if (b->tvheap->parentid != b->batCacheid &&
106 1030 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 14324 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 91 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5880887 : for (int i = 0; i < 1024; i++) {
120 5875083 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5875149 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5875149 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5804 : len = (len + 512) / 1024; /* rounded average length */
125 5804 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5804 : BUN ecnt = ci->ncand;
129 5804 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 55 : ecnt = (BUN)ni->b->tunique_est;
131 5804 : if (r < ecnt)
132 929 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4875 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5804 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5796 : if (oldcnt == 0) {
140 5759 : toff = 0;
141 5759 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 37 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 37 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5796 : MT_lock_set(&b->theaplock);
148 5796 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5793 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5793 : b->tvheap->free = toff + ni->vhfree;
154 5793 : b->tvheap->dirty = true;
155 5793 : b->tascii &= ni->ascii;
156 5793 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 77669 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 77607 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 86201 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 86075 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 77308 : MT_thread_setalgorithm("memcpy offsets");
177 77445 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 8767 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 184 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 184 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 184 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 184 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 184 : MT_thread_setalgorithm("copy offset values");
197 184 : r = b->batCount;
198 3527929 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3527365 : p = canditer_next(ci) - ni->b->hseqbase;
200 3527365 : switch (ni->width) {
201 106 : case 1:
202 106 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 106 : break;
204 4660 : case 2:
205 4660 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 4660 : break;
207 3522599 : case 4:
208 3522599 : v = (var_t) tip[p];
209 3522599 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3527365 : v = (var_t) ((size_t) v + toff);
219 3527365 : assert(v >= GDK_VAROFFSET);
220 3527365 : assert((size_t) v < b->tvheap->free);
221 3527365 : switch (b->twidth) {
222 4662 : case 1:
223 4662 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 4662 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 4662 : break;
226 761 : case 2:
227 761 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 761 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 761 : break;
230 3521942 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3521942 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3521942 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3521942 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3527365 : MT_UNREACHABLE();
243 : }
244 : }
245 8583 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 8530 : r = b->batCount;
253 8530 : oid hseq = ni->b->hseqbase;
254 8530 : MT_thread_setalgorithm("insert string values");
255 11768838 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 11751263 : p = canditer_next(ci) - hseq;
257 11647777 : tp = BUNtvar(*ni, p);
258 11647777 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 11751306 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 53 : r = b->batCount;
271 53 : MT_thread_setalgorithm("insert string values with check");
272 5739 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 5633 : p = canditer_next(ci) - ni->b->hseqbase;
274 5633 : off = BUNtvaroff(*ni, p); /* the offset */
275 5633 : tp = ni->vh->base + off; /* the string */
276 5633 : if (off < b->tvheap->free &&
277 5633 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 1766 : v = (var_t) off;
284 1766 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 1762 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 1762 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 1762 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 1762 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3867 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 5633 : r++;
313 : }
314 : }
315 86268 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 85968 : MT_rwlock_wrlock(&b->thashlock);
317 86362 : MT_lock_set(&b->theaplock);
318 86227 : BATsetcount(b, oldcnt + ci->ncand);
319 86290 : assert(b->batCapacity >= b->batCount);
320 86290 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 89361 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 3026 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 86335 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 86335 : MT_rwlock_wrunlock(&b->thashlock);
327 86342 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 517 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 517 : BUN cnt = ci->ncand, r;
339 517 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 517 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 517 : assert(b->twidth == ni->width);
345 517 : assert(b->twidth == SIZEOF_VAR_T);
346 517 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 517 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 517 : if (mayshare) {
360 517 : MT_lock_set(&b->theaplock);
361 518 : if (BATcount(b) == 0 &&
362 234 : b->batRole == TRANSIENT &&
363 148 : ni->restricted == BAT_READ &&
364 148 : b->tvheap != ni->vh) {
365 : /* if b is still empty, in the transient farm,
366 : * and n is read-only, we replace b's vheap with
367 : * a reference to n's */
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : BBPretain(ni->vh->parentid);
373 148 : if (bid != b->batCacheid)
374 0 : BBPrelease(bid);
375 : }
376 518 : MT_lock_unset(&b->theaplock);
377 : }
378 518 : if (b->tvheap == ni->vh) {
379 : /* if b and n use the same vheap, we only need to copy
380 : * the offsets from n to b */
381 342 : if (ci->tpe == cand_dense) {
382 : /* fast memcpy since we copy a consecutive
383 : * chunk of memory */
384 342 : memcpy(Tloc(b, BATcount(b)),
385 342 : (const var_t *) ni->base + (ci->seq - hseq),
386 342 : cnt << b->tshift);
387 : } else {
388 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
389 0 : const var_t *restrict src = (const var_t *) ni->base;
390 0 : while (cnt > 0) {
391 0 : cnt--;
392 0 : *dst++ = src[canditer_next(ci) - hseq];
393 : }
394 : }
395 342 : MT_rwlock_wrlock(&b->thashlock);
396 342 : MT_lock_set(&b->theaplock);
397 342 : BATsetcount(b, BATcount(b) + ci->ncand);
398 342 : MT_lock_unset(&b->theaplock);
399 : /* maintain hash table */
400 342 : for (BUN i = BATcount(b) - ci->ncand;
401 342 : b->thash && i < BATcount(b);
402 0 : i++) {
403 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
404 : }
405 342 : BUN nunique = b->thash ? b->thash->nunique : 0;
406 342 : MT_rwlock_wrunlock(&b->thashlock);
407 342 : if (nunique != 0) {
408 0 : MT_lock_set(&b->theaplock);
409 0 : b->tunique_est = (double) nunique;
410 0 : MT_lock_unset(&b->theaplock);
411 : }
412 342 : return GDK_SUCCEED;
413 : }
414 : /* b and n do not share their vheap, so we need to copy data */
415 176 : if (b->tvheap->parentid != b->batCacheid) {
416 : /* if b shares its vheap with some other bat, unshare it */
417 18 : Heap *h = GDKmalloc(sizeof(Heap));
418 18 : if (h == NULL) {
419 : return GDK_FAIL;
420 : }
421 36 : *h = (Heap) {
422 18 : .parentid = b->batCacheid,
423 18 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
424 : .refs = ATOMIC_VAR_INIT(1),
425 : };
426 18 : strconcat_len(h->filename, sizeof(h->filename),
427 18 : BBP_physical(b->batCacheid), ".theap", NULL);
428 18 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
429 0 : HEAPfree(h, true);
430 0 : GDKfree(h);
431 0 : return GDK_FAIL;
432 : }
433 18 : MT_lock_set(&b->theaplock);
434 18 : Heap *oh = b->tvheap;
435 18 : b->tvheap = h;
436 18 : MT_lock_unset(&b->theaplock);
437 18 : if (oh->parentid != b->batCacheid)
438 18 : BBPrelease(oh->parentid);
439 18 : HEAPdecref(oh, false);
440 : }
441 176 : if (BATcount(b) == 0 &&
442 86 : ci->tpe == cand_dense && ci->ncand == ni->count) {
443 : /* just copy the heaps */
444 86 : MT_lock_set(&b->theaplock);
445 86 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
446 0 : MT_lock_unset(&b->theaplock);
447 0 : return GDK_FAIL;
448 : }
449 85 : memcpy(b->theap->base, ni->base, ni->hfree);
450 85 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
451 85 : b->theap->free = ni->hfree;
452 85 : b->theap->dirty = true;
453 85 : b->tvheap->free = ni->vhfree;
454 85 : b->tvheap->dirty = true;
455 85 : BATsetcount(b, ni->count);
456 86 : b->tnil = ni->nil;
457 86 : b->tnonil = ni->nonil;
458 86 : b->tsorted = ni->sorted;
459 86 : b->tnosorted = ni->nosorted;
460 86 : b->trevsorted = ni->revsorted;
461 86 : b->tnorevsorted = ni->norevsorted;
462 86 : b->tkey = ni->key;
463 86 : b->tnokey[0] = ni->nokey[0];
464 86 : b->tnokey[1] = ni->nokey[1];
465 86 : b->tminpos = ni->minpos;
466 86 : b->tmaxpos = ni->maxpos;
467 86 : b->tunique_est = ni->unique_est;
468 86 : MT_lock_unset(&b->theaplock);
469 86 : return GDK_SUCCEED;
470 : }
471 : /* copy data from n to b */
472 : r = BATcount(b);
473 380593 : for (BUN i = 0; i < cnt; i++) {
474 380505 : BUN p = canditer_next(ci) - hseq;
475 380503 : const void *t = BUNtvar(*ni, p);
476 380503 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
477 : return GDK_FAIL;
478 : }
479 380503 : r++;
480 : }
481 88 : MT_rwlock_wrlock(&b->thashlock);
482 89 : if (b->thash) {
483 0 : r -= cnt;
484 0 : BATiter bi = bat_iterator_nolock(b);
485 0 : for (BUN i = 0; i < cnt; i++) {
486 0 : const void *t = BUNtvar(bi, r);
487 0 : HASHappend_locked(b, r, t);
488 0 : r++;
489 : }
490 : }
491 89 : BUN nunique = b->thash ? b->thash->nunique : 0;
492 89 : MT_lock_set(&b->theaplock);
493 90 : BATsetcount(b, r);
494 90 : if (nunique != 0)
495 0 : b->tunique_est = (double) nunique;
496 90 : MT_lock_unset(&b->theaplock);
497 90 : MT_rwlock_wrunlock(&b->thashlock);
498 90 : return GDK_SUCCEED;
499 : }
500 :
501 : static gdk_return
502 138 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
503 : {
504 138 : if (ci->ncand == 0)
505 : return GDK_SUCCEED;
506 138 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
507 : return GDK_FAIL;
508 :
509 138 : MT_lock_set(&b->theaplock);
510 :
511 138 : uint32_t boff = b->batCount % 32;
512 138 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
513 138 : b->batCount += ci->ncand;
514 138 : if (ci->tpe == cand_dense) {
515 138 : const uint32_t *np;
516 138 : uint32_t noff, mask;
517 138 : BUN cnt;
518 138 : noff = (ci->seq - ni->b->hseqbase) % 32;
519 138 : cnt = ci->ncand;
520 138 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
521 138 : if (boff == noff) {
522 : /* words of b and n are aligned, so we don't
523 : * need to shift bits around */
524 55 : if (boff + cnt <= 32) {
525 : /* all new bits within one word */
526 49 : if (cnt == 32) {
527 0 : *bp = *np;
528 : } else {
529 49 : mask = ((1U << cnt) - 1) << boff;
530 49 : *bp &= ~mask;
531 49 : *bp |= *np & mask;
532 : }
533 : } else {
534 : /* multiple words of b are affected */
535 6 : if (boff != 0) {
536 : /* first fill up the rest of the first
537 : * word */
538 0 : mask = ~0U << boff;
539 0 : *bp &= ~mask;
540 0 : *bp++ |= *np++ & mask;
541 0 : cnt -= 32 - boff;
542 : }
543 6 : if (cnt >= 32) {
544 : /* copy an integral number of words fast */
545 6 : BUN nw = cnt / 32;
546 6 : memcpy(bp, np, nw*sizeof(int));
547 6 : bp += nw;
548 6 : np += nw;
549 6 : cnt %= 32;
550 : }
551 6 : if (cnt > 0) {
552 : /* do the left over bits */
553 6 : mask = (1U << cnt) - 1;
554 6 : *bp = *np & mask;
555 : }
556 : }
557 83 : } else if (boff > noff) {
558 83 : if (boff + cnt <= 32) {
559 : /* we only need to copy bits from a
560 : * single word of n to a single word
561 : * of b */
562 : /* boff > 0, so cnt < 32, hence the
563 : * shift is ok */
564 74 : mask = (1U << cnt) - 1;
565 74 : *bp &= ~(mask << boff);
566 74 : *bp |= (*np & (mask << noff)) << (boff - noff);
567 : } else {
568 : /* first fill the rest of the last partial
569 : * word of b, so that's 32-boff bits */
570 9 : mask = (1U << (32 - boff)) - 1;
571 9 : *bp &= ~(mask << boff);
572 9 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
573 9 : cnt -= 32 - boff;
574 :
575 : /* set boff and noff to the amount we need to
576 : * shift bits in consecutive words of n around
577 : * to fit into the next word of b; set mask to
578 : * the mask of the bottom bits of n that fit
579 : * in a word of b (and the complement are the
580 : * top bits that go to another word of b) */
581 9 : boff -= noff;
582 9 : noff = 32 - boff;
583 9 : mask = (1U << noff) - 1;
584 141 : while (cnt >= 32) {
585 132 : *bp = (*np++ & ~mask) >> noff;
586 132 : *bp++ |= (*np & mask) << boff;
587 132 : cnt -= 32;
588 : }
589 9 : if (cnt > noff) {
590 : /* the last bits come from two words
591 : * in n */
592 2 : *bp = (*np++ & ~mask) >> noff;
593 2 : cnt -= noff;
594 2 : mask = (1U << cnt) - 1;
595 2 : *bp++ |= (*np & mask) << boff;
596 7 : } else if (cnt > 0) {
597 : /* the last bits come from a single
598 : * word in n */
599 7 : mask = ((1U << cnt) - 1) << noff;
600 7 : *bp = (*np & mask) >> noff;
601 : }
602 : }
603 : } else {
604 : /* boff < noff */
605 0 : if (noff + cnt <= 32) {
606 : /* only need part of the first word of n */
607 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
608 0 : mask = (1U << cnt) - 1;
609 0 : *bp &= ~(mask << boff);
610 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
611 0 : } else if (boff + cnt <= 32) {
612 : /* only need to fill a single word of
613 : * b, but from two of n */
614 0 : if (cnt < 32)
615 0 : *bp &= ~(((1U << cnt) - 1) << boff);
616 : else
617 0 : *bp = 0;
618 0 : mask = ~((1U << noff) - 1);
619 0 : *bp |= (*np++ & mask) >> (noff - boff);
620 0 : cnt -= 32 - noff;
621 0 : mask = (1U << cnt) - 1;
622 0 : *bp |= (*np & mask) << (32 - noff);
623 : } else {
624 0 : if (boff > 0) {
625 : /* fill the rest of the first word of b */
626 0 : cnt -= 32 - boff;
627 0 : *bp &= (1U << boff) - 1;
628 0 : mask = ~((1U << noff) - 1);
629 0 : noff -= boff;
630 0 : boff = 32 - noff;
631 0 : *bp |= (*np++ & mask) >> noff;
632 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
633 : } else {
634 0 : boff = 32 - noff;
635 : }
636 0 : mask = (1U << noff) - 1;
637 0 : while (cnt >= 32) {
638 0 : *bp = (*np++ & ~mask) >> noff;
639 0 : *bp++ |= (*np & mask) << boff;
640 0 : cnt -= 32;
641 : }
642 0 : if (cnt > 0) {
643 0 : *bp = (*np++ & ~mask) >> noff;
644 0 : if (cnt > noff)
645 0 : *bp++ |= (*np & mask) << boff;
646 : }
647 : }
648 : }
649 : } else {
650 0 : oid o;
651 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
652 0 : do {
653 0 : for (uint32_t i = boff; i < 32; i++) {
654 0 : o = canditer_next(ci);
655 0 : if (is_oid_nil(o))
656 : break;
657 0 : o -= ni->b->hseqbase;
658 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
659 : }
660 0 : *bp++ = v;
661 0 : v = 0;
662 0 : boff = 0;
663 0 : } while (!is_oid_nil(o));
664 : }
665 138 : b->theap->dirty = true;
666 138 : b->theap->free = ((b->batCount + 31) / 32) * 4;
667 138 : MT_lock_unset(&b->theaplock);
668 138 : return GDK_SUCCEED;
669 : }
670 :
671 : /* Append the contents of BAT n (subject to the optional candidate
672 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
673 : * was passed in, and else the seqbase of n. */
674 : static gdk_return
675 2274610 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
676 : {
677 2274610 : struct canditer ci;
678 2274610 : BUN r;
679 2274610 : oid hseq = n->hseqbase;
680 2274610 : char buf[64];
681 2274610 : lng t0 = 0;
682 2274610 : const ValRecord *prop = NULL;
683 2274610 : ValRecord minprop, maxprop;
684 2274610 : const void *minbound = NULL, *maxbound = NULL;
685 2274610 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
686 2274610 : bool hlocked = false;
687 :
688 2274610 : if (b == NULL || n == NULL || BATcount(n) == 0) {
689 : return GDK_SUCCEED;
690 : }
691 1323186 : assert(b->theap->parentid == b->batCacheid);
692 :
693 1323186 : TRC_DEBUG_IF(ALGO) {
694 0 : t0 = GDKusec();
695 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
696 : }
697 :
698 1323186 : ALIGNapp(b, force, GDK_FAIL);
699 :
700 3715218 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
701 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
702 0 : return GDK_FAIL;
703 : }
704 :
705 1326200 : if (BATttype(b) != BATttype(n) &&
706 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
707 0 : TRC_DEBUG(CHECK, "Interpreting %s as %s.\n",
708 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
709 : }
710 :
711 1326200 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
712 :
713 1326089 : BATiter ni = bat_iterator(n);
714 :
715 1327181 : canditer_init(&ci, n, s);
716 1325176 : if (ci.ncand == 0) {
717 0 : goto doreturn;
718 : }
719 :
720 1325176 : if (BATcount(b) + ci.ncand > BUN_MAX) {
721 0 : bat_iterator_end(&ni);
722 0 : GDKerror("combined BATs too large\n");
723 0 : return GDK_FAIL;
724 : }
725 :
726 1325176 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
727 0 : bat_iterator_end(&ni);
728 0 : GDKerror("overflow of head value\n");
729 0 : return GDK_FAIL;
730 : }
731 :
732 1325176 : OIDXdestroy(b);
733 1327455 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
734 1327484 : RTREEdestroy(b);
735 :
736 1325346 : MT_lock_set(&b->theaplock);
737 1326054 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
738 1326108 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
739 48 : VALcopy(&minprop, prop) != NULL) {
740 48 : minbound = VALptr(&minprop);
741 48 : if (ci.ncand == BATcount(n) &&
742 62 : ni.minpos != BUN_NONE &&
743 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
744 0 : assert(0);
745 : GDKerror("value out of bounds\n");
746 : MT_lock_unset(&b->theaplock);
747 : goto bailout;
748 : }
749 : }
750 1325820 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
751 40 : VALcopy(&maxprop, prop) != NULL) {
752 40 : maxbound = VALptr(&maxprop);
753 40 : if (ci.ncand == BATcount(n) &&
754 52 : ni.maxpos != BUN_NONE &&
755 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
756 0 : assert(0);
757 : GDKerror("value out of bounds\n");
758 : MT_lock_unset(&b->theaplock);
759 : goto bailout;
760 : }
761 : }
762 :
763 1324481 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
764 401684 : if (ni.maxpos != BUN_NONE) {
765 153937 : BATiter bi = bat_iterator_nolock(b);
766 153937 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
767 147930 : if (s == NULL) {
768 147790 : b->tmaxpos = BATcount(b) + ni.maxpos;
769 : } else {
770 140 : b->tmaxpos = BUN_NONE;
771 : }
772 : }
773 : } else {
774 247747 : b->tmaxpos = BUN_NONE;
775 : }
776 : }
777 1324481 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
778 401035 : if (ni.minpos != BUN_NONE) {
779 153447 : BATiter bi = bat_iterator_nolock(b);
780 153447 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
781 146258 : if (s == NULL) {
782 146121 : b->tminpos = BATcount(b) + ni.minpos;
783 : } else {
784 137 : b->tminpos = BUN_NONE;
785 : }
786 : }
787 : } else {
788 247588 : b->tminpos = BUN_NONE;
789 : }
790 : }
791 1324470 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
792 1322857 : b->tunique_est = 0;
793 : }
794 1324470 : MT_lock_unset(&b->theaplock);
795 : /* load hash so that we can maintain it */
796 1327586 : (void) BATcheckhash(b);
797 :
798 1325243 : if (b->ttype == TYPE_void) {
799 : /* b does not have storage, keep it that way if we can */
800 131776 : HASHdestroy(b); /* we're not maintaining the hash here */
801 131754 : MT_lock_set(&b->theaplock);
802 131767 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
803 129144 : (BATcount(b) == 0 ||
804 112090 : (BATtdense(b) &&
805 112090 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
806 : /* n is also dense and consecutive with b */
807 129083 : if (BATcount(b) == 0) {
808 17054 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
809 0 : assert(0);
810 : GDKerror("value not within bounds\n");
811 : MT_lock_unset(&b->theaplock);
812 : goto bailout;
813 : }
814 17054 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
815 : }
816 129057 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
817 0 : assert(0);
818 : GDKerror("value not within bounds\n");
819 : MT_lock_unset(&b->theaplock);
820 : goto bailout;
821 : }
822 129057 : BATsetcount(b, BATcount(b) + ci.ncand);
823 129031 : MT_lock_unset(&b->theaplock);
824 129076 : goto doreturn;
825 : }
826 2684 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
827 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
828 : /* both b and n are void/nil */
829 0 : if (notnull) {
830 0 : assert(0);
831 : GDKerror("NULL value not within bounds\n");
832 : MT_lock_unset(&b->theaplock);
833 : goto bailout;
834 : }
835 0 : BATtseqbase(b, oid_nil);
836 0 : BATsetcount(b, BATcount(b) + ci.ncand);
837 0 : MT_lock_unset(&b->theaplock);
838 0 : goto doreturn;
839 : }
840 : /* we need to materialize b; allocate enough capacity */
841 2684 : MT_lock_unset(&b->theaplock);
842 2684 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
843 0 : goto bailout;
844 : }
845 : }
846 :
847 : /* property setting */
848 1196151 : MT_lock_set(&b->theaplock);
849 1196837 : r = BATcount(b);
850 :
851 1196837 : if (BATcount(b) == 0) {
852 375378 : b->tsorted = ni.sorted;
853 375378 : b->trevsorted = ni.revsorted;
854 375378 : b->tseqbase = oid_nil;
855 375378 : b->tnonil = ni.nonil;
856 375378 : b->tnil = ni.nil && ci.ncand == BATcount(n);
857 375378 : if (ci.tpe == cand_dense) {
858 375218 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
859 375218 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
860 375218 : if (BATtdensebi(&ni)) {
861 2137 : b->tseqbase = n->tseqbase + ci.seq - hseq;
862 : }
863 : } else {
864 160 : b->tnosorted = 0;
865 160 : b->tnorevsorted = 0;
866 : }
867 375378 : b->tkey = ni.key;
868 375378 : if (ci.ncand == BATcount(n)) {
869 373776 : b->tnokey[0] = ni.nokey[0];
870 373776 : b->tnokey[1] = ni.nokey[1];
871 : } else {
872 1602 : b->tnokey[0] = b->tnokey[1] = 0;
873 : }
874 : } else {
875 821459 : BUN last = r - 1;
876 821459 : BATiter bi = bat_iterator_nolock(b);
877 821459 : int xx = ATOMcmp(b->ttype,
878 : BUNtail(ni, ci.seq - hseq),
879 : BUNtail(bi, last));
880 820696 : if (b->tsorted && (!ni.sorted || xx < 0)) {
881 22070 : b->tsorted = false;
882 22070 : b->tnosorted = 0;
883 22070 : b->tseqbase = oid_nil;
884 : }
885 820696 : if (b->trevsorted &&
886 68573 : (!ni.revsorted || xx > 0)) {
887 18677 : b->trevsorted = false;
888 18677 : b->tnorevsorted = 0;
889 : }
890 820696 : if (b->tkey &&
891 75328 : (!(b->tsorted || b->trevsorted) ||
892 60477 : !ni.key || xx == 0)) {
893 20872 : BATkey(b, false);
894 : }
895 820634 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
896 2855 : (!BATtdensebi(&ni) ||
897 552 : ci.tpe != cand_dense ||
898 552 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
899 2323 : b->tseqbase = oid_nil;
900 : }
901 820634 : b->tnonil &= ni.nonil;
902 1628945 : b->tnil |= ni.nil && ci.ncand == ni.count;
903 : }
904 1196012 : MT_lock_unset(&b->theaplock);
905 1197990 : if (b->ttype == TYPE_str) {
906 86172 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
907 0 : goto bailout;
908 : }
909 1111818 : } else if (ATOMvarsized(b->ttype)) {
910 517 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
911 0 : goto bailout;
912 : }
913 1111301 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
914 : /* no bounds and NOT_NULL property on MSK bats */
915 138 : assert(minbound == NULL && maxbound == NULL && !notnull);
916 138 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
917 0 : goto bailout;
918 : }
919 : } else {
920 1111163 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
921 : /* if needed space exceeds a normal growth
922 : * extend just with what's needed */
923 10707 : BUN ncap = BATcount(b) + ci.ncand;
924 10707 : BUN grows = BATgrows(b);
925 :
926 10674 : if (ncap > grows)
927 : grows = ncap;
928 10674 : if (BATextend(b, grows) != GDK_SUCCEED) {
929 0 : goto bailout;
930 : }
931 : }
932 1111263 : MT_rwlock_wrlock(&b->thashlock);
933 1111908 : hlocked = true;
934 1111908 : if (b->ttype != TYPE_void &&
935 1111596 : ni.type != TYPE_void &&
936 1107547 : ci.tpe == cand_dense) {
937 : /* use fast memcpy if we can */
938 1107491 : memcpy(Tloc(b, BATcount(b)),
939 1107491 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
940 1107491 : ci.ncand << ni.shift);
941 1107504 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
942 4018 : HASHappend_locked(b, r, Tloc(b, r));
943 13 : r++;
944 : }
945 : } else {
946 4417 : const void *atomnil = ATOMnilptr(b->ttype);
947 2640054 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
948 2631199 : BUN p = canditer_next(&ci) - hseq;
949 2628530 : const void *t = BUNtail(ni, p);
950 2631754 : bool isnil = atomcmp(t, atomnil) == 0;
951 2629217 : if (notnull && isnil) {
952 0 : assert(0);
953 : GDKerror("NULL value not within bounds\n");
954 : goto bailout;
955 2629217 : } else if (minbound &&
956 2629217 : !isnil &&
957 0 : atomcmp(t, minbound) < 0) {
958 0 : assert(0);
959 : GDKerror("value not within bounds\n");
960 : goto bailout;
961 2629217 : } else if (maxbound &&
962 0 : !isnil &&
963 0 : atomcmp(t, maxbound) >= 0) {
964 0 : assert(0);
965 : GDKerror("value not within bounds\n");
966 : goto bailout;
967 2629217 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
968 0 : goto bailout;
969 : }
970 2631150 : if (b->thash)
971 0 : HASHappend_locked(b, r, t);
972 2631202 : r++;
973 : }
974 4417 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
975 : }
976 1107900 : BUN nunique;
977 1107900 : nunique = b->thash ? b->thash->nunique : 0;
978 1107900 : MT_lock_set(&b->theaplock);
979 1110411 : BATsetcount(b, b->batCount + ci.ncand);
980 1110327 : if (nunique != 0)
981 5 : b->tunique_est = (double) nunique;
982 1110327 : MT_lock_unset(&b->theaplock);
983 1111842 : assert(hlocked);
984 1111842 : MT_rwlock_wrunlock(&b->thashlock);
985 1111842 : hlocked = false;
986 : }
987 :
988 1327461 : doreturn:
989 1327461 : bat_iterator_end(&ni);
990 1324942 : if (minbound)
991 48 : VALclear(&minprop);
992 1325353 : if (maxbound)
993 40 : VALclear(&maxprop);
994 1325353 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
995 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
996 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
997 : GDKusec() - t0);
998 :
999 : return GDK_SUCCEED;
1000 : bailout:
1001 0 : if (hlocked)
1002 0 : MT_rwlock_wrunlock(&b->thashlock);
1003 0 : if (minbound)
1004 0 : VALclear(&minprop);
1005 0 : if (maxbound)
1006 0 : VALclear(&maxprop);
1007 0 : bat_iterator_end(&ni);
1008 0 : return GDK_FAIL;
1009 : }
1010 :
1011 : gdk_return
1012 2282397 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1013 : {
1014 2282397 : return BATappend2(b, n, s, force, true);
1015 : }
1016 :
1017 : gdk_return
1018 4 : BATdel(BAT *b, BAT *d)
1019 : {
1020 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1021 4 : MT_lock_set(&b->theaplock);
1022 4 : BATiter bi = bat_iterator_nolock(b);
1023 4 : MT_lock_unset(&b->theaplock);
1024 :
1025 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1026 4 : assert(d->tsorted);
1027 4 : assert(d->tkey);
1028 4 : if (BATcount(d) == 0)
1029 : return GDK_SUCCEED;
1030 4 : OIDXdestroy(b);
1031 4 : HASHdestroy(b);
1032 4 : PROPdestroy(b);
1033 4 : STRMPdestroy(b);
1034 4 : RTREEdestroy(b);
1035 4 : if (BATtdense(d)) {
1036 2 : oid o = d->tseqbase;
1037 2 : BUN c = BATcount(d);
1038 :
1039 2 : if (o + c <= b->hseqbase)
1040 : return GDK_SUCCEED;
1041 2 : if (o < b->hseqbase) {
1042 0 : c -= b->hseqbase - o;
1043 0 : o = b->hseqbase;
1044 : }
1045 2 : if (o - b->hseqbase < b->batInserted) {
1046 0 : GDKerror("cannot delete committed values\n");
1047 0 : return GDK_FAIL;
1048 : }
1049 2 : if (o + c > b->hseqbase + BATcount(b))
1050 0 : c = b->hseqbase + BATcount(b) - o;
1051 2 : if (c == 0)
1052 : return GDK_SUCCEED;
1053 2 : if (atmdel) {
1054 0 : BUN p = o - b->hseqbase;
1055 0 : BUN q = p + c;
1056 0 : while (p < q) {
1057 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1058 0 : p++;
1059 : }
1060 : }
1061 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1062 : return GDK_FAIL;
1063 2 : MT_lock_set(&b->theaplock);
1064 2 : if (o + c < b->hseqbase + BATcount(b)) {
1065 0 : o -= b->hseqbase;
1066 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1067 0 : BUN n = BATcount(b) - (o + c);
1068 : /* not very efficient, but first see
1069 : * how much this is used */
1070 0 : for (BUN i = 0; i < n; i++)
1071 0 : mskSetVal(b, o + i,
1072 0 : mskGetVal(b, o + c + i));
1073 : } else {
1074 0 : memmove(Tloc(b, o),
1075 0 : Tloc(b, o + c),
1076 0 : b->twidth * (BATcount(b) - (o + c)));
1077 : }
1078 0 : b->theap->dirty = true;
1079 : // o += b->hseqbase; // if this were to be used again
1080 : }
1081 2 : b->batCount -= c;
1082 : } else {
1083 2 : BATiter di = bat_iterator(d);
1084 2 : const oid *o = (const oid *) di.base;
1085 2 : const oid *s;
1086 2 : BUN c = di.count;
1087 2 : BUN nd = 0;
1088 2 : BUN pos;
1089 2 : char *p = NULL;
1090 :
1091 2 : if (o[c - 1] <= b->hseqbase) {
1092 0 : bat_iterator_end(&di);
1093 0 : return GDK_SUCCEED;
1094 : }
1095 2 : while (*o < b->hseqbase) {
1096 0 : o++;
1097 0 : c--;
1098 : }
1099 2 : if (*o - b->hseqbase < b->batInserted) {
1100 0 : bat_iterator_end(&di);
1101 0 : GDKerror("cannot delete committed values\n");
1102 0 : return GDK_FAIL;
1103 : }
1104 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1105 0 : bat_iterator_end(&di);
1106 0 : return GDK_FAIL;
1107 : }
1108 2 : s = o;
1109 2 : pos = *o - b->hseqbase;
1110 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1111 2 : p = Tloc(b, pos);
1112 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1113 4 : size_t n;
1114 4 : if (atmdel)
1115 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1116 4 : o++;
1117 4 : c--;
1118 4 : nd++;
1119 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1120 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1121 2 : else if ((oid) (o - s) < *o - *s)
1122 2 : n = o[0] - o[-1] - 1;
1123 : else
1124 : n = 0;
1125 4 : if (n > 0) {
1126 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1127 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1128 : /* not very efficient, but
1129 : * first see how much this is
1130 : * used */
1131 0 : for (BUN i = 0; i < n; i++) {
1132 0 : mskSetVal(b, pos + i,
1133 0 : mskGetVal(b, opos + i));
1134 : }
1135 0 : pos += n;
1136 : } else {
1137 2 : n *= b->twidth;
1138 2 : memmove(p,
1139 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1140 : n);
1141 2 : p += n;
1142 : }
1143 : s = o;
1144 : }
1145 : }
1146 2 : bat_iterator_end(&di);
1147 2 : MT_lock_set(&b->theaplock);
1148 2 : b->theap->dirty = true;
1149 2 : b->batCount -= nd;
1150 : }
1151 4 : if (b->batCount <= 1) {
1152 : /* some trivial properties */
1153 2 : b->tkey = true;
1154 2 : b->tsorted = b->trevsorted = true;
1155 2 : if (b->batCount == 0) {
1156 2 : b->tnil = false;
1157 2 : b->tnonil = true;
1158 : }
1159 : }
1160 : /* not sure about these anymore */
1161 4 : b->tnosorted = b->tnorevsorted = 0;
1162 4 : b->tnokey[0] = b->tnokey[1] = 0;
1163 4 : b->tminpos = BUN_NONE;
1164 4 : b->tmaxpos = BUN_NONE;
1165 4 : b->tunique_est = 0.0;
1166 4 : MT_lock_unset(&b->theaplock);
1167 :
1168 4 : return GDK_SUCCEED;
1169 : }
1170 :
1171 : /*
1172 : * Replace all values in b with values from n whose location is given by
1173 : * the oid in either p or positions.
1174 : * If positions is used, autoincr specifies whether it is the first of a
1175 : * dense range of positions or whether it is a full-blown array of
1176 : * position.
1177 : * If mayappend is set, the position in p/positions may refer to
1178 : * locations beyond the end of b.
1179 : */
1180 : static gdk_return
1181 217037 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1182 : bool mayappend, bool autoincr, bool force)
1183 : {
1184 217037 : lng t0 = GDKusec();
1185 217107 : oid pos = oid_nil;
1186 217107 : BUN nunique = 0;
1187 :
1188 217107 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1189 : return GDK_SUCCEED;
1190 : }
1191 : /* either p or positions */
1192 217107 : assert((p == NULL) != (positions == NULL));
1193 217107 : if (p != NULL) {
1194 216931 : if (BATcount(p) != BATcount(n)) {
1195 0 : GDKerror("update BATs not the same size\n");
1196 0 : return GDK_FAIL;
1197 : }
1198 216931 : if (ATOMtype(p->ttype) != TYPE_oid) {
1199 0 : GDKerror("positions BAT not type OID\n");
1200 0 : return GDK_FAIL;
1201 : }
1202 216931 : if (BATtdense(p)) {
1203 207984 : pos = p->tseqbase;
1204 207984 : positions = &pos;
1205 207984 : autoincr = true;
1206 207984 : p = NULL;
1207 8947 : } else if (p->ttype != TYPE_void) {
1208 8945 : positions = (const oid *) Tloc(p, 0);
1209 8945 : autoincr = false;
1210 : } else {
1211 : autoincr = false;
1212 : }
1213 176 : } else if (autoincr) {
1214 176 : pos = *positions;
1215 : }
1216 217107 : if (BATcount(n) == 0) {
1217 : return GDK_SUCCEED;
1218 : }
1219 :
1220 29397 : BATiter ni = bat_iterator(n);
1221 :
1222 29403 : OIDXdestroy(b);
1223 29408 : STRMPdestroy(b);
1224 29408 : RTREEdestroy(b);
1225 : /* load hash so that we can maintain it */
1226 29395 : (void) BATcheckhash(b);
1227 :
1228 29393 : MT_lock_set(&b->theaplock);
1229 29378 : if (!force && (b->batRestricted != BAT_WRITE ||
1230 49 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1231 0 : MT_lock_unset(&b->theaplock);
1232 0 : bat_iterator_end(&ni);
1233 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1234 0 : return GDK_FAIL;
1235 : }
1236 29378 : BATiter bi = bat_iterator_nolock(b);
1237 29378 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1238 28689 : b->tunique_est = 0;
1239 : }
1240 :
1241 29378 : b->tsorted = b->trevsorted = false;
1242 29378 : b->tnosorted = b->tnorevsorted = 0;
1243 29378 : b->tseqbase = oid_nil;
1244 29378 : b->tkey = false;
1245 29378 : b->tnokey[0] = b->tnokey[1] = 0;
1246 :
1247 29378 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1248 29378 : const void *nil = ATOMnilptr(b->ttype);
1249 29378 : oid hseqend = b->hseqbase + BATcount(b);
1250 :
1251 29378 : MT_lock_unset(&b->theaplock);
1252 :
1253 30235 : bool anynil = false;
1254 30235 : bool locked = false;
1255 :
1256 30235 : if (b->tvheap) {
1257 1205214 : for (BUN i = 0; i < ni.count; i++) {
1258 1202329 : oid updid;
1259 1202329 : if (positions) {
1260 1201316 : updid = autoincr ? pos++ : *positions++;
1261 : } else {
1262 1013 : updid = BUNtoid(p, i);
1263 : }
1264 :
1265 1202329 : if (updid < b->hseqbase ||
1266 1202329 : (!mayappend && updid >= hseqend)) {
1267 0 : GDKerror("id out of range\n");
1268 0 : goto bailout;
1269 : }
1270 1202329 : updid -= b->hseqbase;
1271 1202329 : if (!force && updid < b->batInserted) {
1272 0 : GDKerror("updating committed value\n");
1273 0 : goto bailout;
1274 : }
1275 :
1276 1202329 : const void *new = BUNtvar(ni, i);
1277 :
1278 1202329 : if (updid >= BATcount(b)) {
1279 23585 : assert(mayappend);
1280 23585 : if (locked) {
1281 4 : MT_rwlock_wrunlock(&b->thashlock);
1282 4 : locked = false;
1283 : }
1284 23585 : if (b->tminpos != bi.minpos ||
1285 23584 : b->tmaxpos != bi.maxpos) {
1286 1 : MT_lock_set(&b->theaplock);
1287 1 : b->tminpos = bi.minpos;
1288 1 : b->tmaxpos = bi.maxpos;
1289 1 : MT_lock_unset(&b->theaplock);
1290 : }
1291 23585 : if (BATcount(b) < updid &&
1292 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1293 0 : bat_iterator_end(&ni);
1294 0 : return GDK_FAIL;
1295 : }
1296 23585 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1297 0 : bat_iterator_end(&ni);
1298 0 : return GDK_FAIL;
1299 : }
1300 23585 : bi = bat_iterator_nolock(b);
1301 134477 : continue;
1302 : }
1303 :
1304 : /* it is possible that a previous run was killed
1305 : * after an update (with a mmapped tail file)
1306 : * but before that was committed, then the
1307 : * offset may point outside of the vheap */
1308 1178744 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1309 :
1310 1170882 : if (old && atomcmp(old, new) == 0) {
1311 : /* replacing with the same value:
1312 : * nothing to do */
1313 110892 : continue;
1314 : }
1315 :
1316 1067399 : bool isnil = atomcmp(new, nil) == 0;
1317 1066584 : anynil |= isnil;
1318 1066584 : MT_lock_set(&b->theaplock);
1319 1066906 : if (old == NULL ||
1320 1066906 : (b->tnil &&
1321 785 : !anynil &&
1322 785 : atomcmp(old, nil) == 0)) {
1323 : /* if old value is nil and no new
1324 : * value is, we're not sure anymore
1325 : * about the nil property, so we must
1326 : * clear it */
1327 779 : b->tnil = false;
1328 : }
1329 1066906 : b->tnonil &= !isnil;
1330 1066906 : b->tnil |= isnil;
1331 1066906 : MT_lock_unset(&b->theaplock);
1332 1067215 : if (bi.maxpos != BUN_NONE) {
1333 4236 : if (!isnil &&
1334 2118 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1335 : /* new value is larger than
1336 : * previous largest */
1337 24 : bi.maxpos = updid;
1338 4188 : } else if (old == NULL ||
1339 2107 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1340 13 : atomcmp(new, old) != 0)) {
1341 : /* old value is equal to
1342 : * largest and new value is
1343 : * smaller, so we don't know
1344 : * anymore which is the
1345 : * largest */
1346 13 : bi.maxpos = BUN_NONE;
1347 : }
1348 : }
1349 1067215 : if (bi.minpos != BUN_NONE) {
1350 4224 : if (!isnil &&
1351 2112 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1352 : /* new value is smaller than
1353 : * previous smallest */
1354 14 : bi.minpos = updid;
1355 4196 : } else if (old == NULL ||
1356 2120 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1357 22 : atomcmp(new, old) != 0)) {
1358 : /* old value is equal to
1359 : * smallest and new value is
1360 : * larger, so we don't know
1361 : * anymore which is the
1362 : * smallest */
1363 22 : bi.minpos = BUN_NONE;
1364 : }
1365 : }
1366 1067215 : if (!locked) {
1367 2388 : MT_rwlock_wrlock(&b->thashlock);
1368 2388 : locked = true;
1369 : }
1370 1067216 : if (old)
1371 1067216 : HASHdelete_locked(&bi, updid, old);
1372 0 : else if (b->thash) {
1373 0 : doHASHdestroy(b, b->thash);
1374 0 : b->thash = NULL;
1375 : }
1376 :
1377 1067186 : var_t d;
1378 1067186 : switch (b->twidth) {
1379 1058651 : case 1:
1380 1058651 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1381 1058651 : break;
1382 6503 : case 2:
1383 6503 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1384 6503 : break;
1385 1997 : case 4:
1386 1997 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1387 1997 : break;
1388 : #if SIZEOF_VAR_T == 8
1389 35 : case 8:
1390 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1391 35 : break;
1392 : #endif
1393 : default:
1394 0 : MT_UNREACHABLE();
1395 : }
1396 1067186 : MT_lock_set(&b->theaplock);
1397 1067159 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1398 1067112 : MT_lock_unset(&b->theaplock);
1399 1067059 : if (rc != GDK_SUCCEED) {
1400 0 : goto bailout;
1401 : }
1402 1067059 : if (b->twidth < SIZEOF_VAR_T &&
1403 1067106 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1404 : /* doesn't fit in current heap, upgrade it */
1405 16 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1406 0 : goto bailout;
1407 : }
1408 : }
1409 : /* in case ATOMreplaceVAR and/or
1410 : * GDKupgradevarheap replaces a heap, we need to
1411 : * reinitialize the iterator */
1412 : {
1413 : /* save and restore minpos/maxpos */
1414 1067059 : BUN minpos = bi.minpos;
1415 1067059 : BUN maxpos = bi.maxpos;
1416 1067059 : bi = bat_iterator_nolock(b);
1417 1067059 : bi.minpos = minpos;
1418 1067059 : bi.maxpos = maxpos;
1419 : }
1420 1067059 : switch (b->twidth) {
1421 1058508 : case 1:
1422 1058508 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1423 1058508 : break;
1424 6519 : case 2:
1425 6519 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1426 6519 : break;
1427 1997 : case 4:
1428 1997 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1429 1997 : break;
1430 : #if SIZEOF_VAR_T == 8
1431 35 : case 8:
1432 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1433 35 : break;
1434 : #endif
1435 : default:
1436 0 : MT_UNREACHABLE();
1437 : }
1438 1067059 : HASHinsert_locked(&bi, updid, new);
1439 :
1440 : }
1441 2885 : if (locked) {
1442 2383 : if (b->thash)
1443 2 : nunique = b->thash->nunique;
1444 2383 : MT_rwlock_wrunlock(&b->thashlock);
1445 2383 : locked = false;
1446 : }
1447 2886 : MT_lock_set(&b->theaplock);
1448 2885 : b->tvheap->dirty = true;
1449 2885 : MT_lock_unset(&b->theaplock);
1450 26512 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1451 0 : assert(b->thash == NULL);
1452 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1453 0 : for (BUN i = 0; i < ni.count; i++) {
1454 0 : oid updid;
1455 0 : if (positions) {
1456 0 : updid = autoincr ? pos++ : *positions++;
1457 : } else {
1458 0 : updid = BUNtoid(p, i);
1459 : }
1460 :
1461 0 : if (updid < b->hseqbase ||
1462 0 : (!mayappend && updid >= hseqend)) {
1463 0 : GDKerror("id out of range\n");
1464 0 : bat_iterator_end(&ni);
1465 0 : return GDK_FAIL;
1466 : }
1467 0 : updid -= b->hseqbase;
1468 0 : if (!force && updid < b->batInserted) {
1469 0 : GDKerror("updating committed value\n");
1470 0 : bat_iterator_end(&ni);
1471 0 : return GDK_FAIL;
1472 : }
1473 0 : if (updid >= BATcount(b)) {
1474 0 : assert(mayappend);
1475 0 : if (BATcount(b) < updid &&
1476 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1477 0 : bat_iterator_end(&ni);
1478 0 : return GDK_FAIL;
1479 : }
1480 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1481 0 : bat_iterator_end(&ni);
1482 0 : return GDK_FAIL;
1483 : }
1484 0 : continue;
1485 : }
1486 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1487 : }
1488 0 : bi = bat_iterator_nolock(b);
1489 26512 : } else if (autoincr) {
1490 18059 : if (pos < b->hseqbase ||
1491 17093 : (!mayappend && pos + ni.count > hseqend)) {
1492 0 : GDKerror("id out of range\n");
1493 0 : bat_iterator_end(&ni);
1494 0 : return GDK_FAIL;
1495 : }
1496 18059 : pos -= b->hseqbase;
1497 18059 : if (!force && pos < b->batInserted) {
1498 0 : GDKerror("updating committed value\n");
1499 0 : bat_iterator_end(&ni);
1500 0 : return GDK_FAIL;
1501 : }
1502 :
1503 18059 : if (pos >= BATcount(b)) {
1504 513 : assert(mayappend);
1505 513 : bat_iterator_end(&ni);
1506 513 : if (BATcount(b) < pos &&
1507 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1508 : return GDK_FAIL;
1509 : }
1510 513 : return BATappend(b, n, NULL, force);
1511 : }
1512 17554 : if (pos + ni.count > BATcount(b) &&
1513 8 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1514 0 : bat_iterator_end(&ni);
1515 0 : return GDK_FAIL;
1516 : }
1517 17546 : bi = bat_iterator_nolock(b);
1518 :
1519 : /* we copy all of n, so if there are nils in n we get
1520 : * nils in b (and else we don't know) */
1521 17546 : b->tnil = ni.nil;
1522 : /* we may not copy over all of b, so we only know that
1523 : * there are no nils in b afterward if there weren't
1524 : * any in either b or n to begin with */
1525 17546 : b->tnonil &= ni.nonil;
1526 : /* if there is no hash, we don't start the loop, if
1527 : * there is only a persisted hash, it will get destroyed
1528 : * in the first iteration, after which there is no hash
1529 : * and the loop ends */
1530 17546 : MT_rwlock_wrlock(&b->thashlock);
1531 17557 : locked = true;
1532 17557 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1533 27 : HASHdelete_locked(&bi, i, Tloc(b, i));
1534 17530 : if (ni.type == TYPE_void) {
1535 0 : assert(b->ttype == TYPE_oid);
1536 0 : oid *o = Tloc(b, pos);
1537 0 : if (is_oid_nil(ni.tseq)) {
1538 : /* we may or may not overwrite the old
1539 : * min/max values */
1540 0 : bi.minpos = BUN_NONE;
1541 0 : bi.maxpos = BUN_NONE;
1542 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1543 0 : o[i] = oid_nil;
1544 0 : b->tnil = true;
1545 : } else {
1546 0 : oid v = ni.tseq;
1547 : /* we know min/max of n, so we know
1548 : * the new min/max of b if those of n
1549 : * are smaller/larger than the old */
1550 0 : if (bi.minpos != BUN_NONE) {
1551 0 : if (v <= BUNtoid(b, bi.minpos))
1552 0 : bi.minpos = pos;
1553 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1554 0 : bi.minpos = BUN_NONE;
1555 : }
1556 0 : if (complex_cand(n)) {
1557 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1558 0 : o[i] = *(oid *)Tpos(&ni, i);
1559 : /* last value */
1560 0 : v = o[ni.count - 1];
1561 : } else {
1562 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1563 0 : o[i] = v++;
1564 : /* last value added (not one beyond) */
1565 0 : v--;
1566 : }
1567 0 : if (bi.maxpos != BUN_NONE) {
1568 0 : if (v >= BUNtoid(b, bi.maxpos))
1569 0 : bi.maxpos = pos + ni.count - 1;
1570 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1571 0 : bi.maxpos = BUN_NONE;
1572 : }
1573 : }
1574 : } else {
1575 : /* if the extremes of n are at least as
1576 : * extreme as those of b, we can replace b's
1577 : * min/max, else we don't know what b's new
1578 : * min/max are*/
1579 17858 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1580 328 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1581 213 : bi.minpos = pos + ni.minpos;
1582 : } else {
1583 17317 : bi.minpos = BUN_NONE;
1584 : }
1585 17882 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1586 352 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1587 266 : bi.maxpos = pos + ni.maxpos;
1588 : } else {
1589 17264 : bi.maxpos = BUN_NONE;
1590 : }
1591 17530 : memcpy(Tloc(b, pos), ni.base,
1592 17530 : ni.count << b->tshift);
1593 : }
1594 : /* either we have a hash that was updated above, or we
1595 : * have no hash; we cannot have the case where there is
1596 : * only a persisted (unloaded) hash since it would have
1597 : * been destroyed above */
1598 17530 : if (b->thash != NULL) {
1599 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1600 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1601 0 : if (b->thash)
1602 0 : nunique = b->thash->nunique;
1603 : }
1604 17544 : MT_rwlock_wrunlock(&b->thashlock);
1605 17554 : locked = false;
1606 17554 : if (ni.count == BATcount(b)) {
1607 : /* if we replaced all values of b by values
1608 : * from n, we can also copy the min/max
1609 : * properties */
1610 8822 : bi.minpos = ni.minpos;
1611 8822 : bi.maxpos = ni.maxpos;
1612 8822 : if (BATtdensebi(&ni)) {
1613 : /* replaced all of b with a dense sequence */
1614 47 : MT_lock_set(&b->theaplock);
1615 47 : BATtseqbase(b, ni.tseq);
1616 47 : MT_lock_unset(&b->theaplock);
1617 : }
1618 : }
1619 : } else {
1620 168747361 : for (BUN i = 0; i < ni.count; i++) {
1621 168738911 : oid updid;
1622 168738911 : if (positions) {
1623 : /* assert(!autoincr) */
1624 168738911 : updid = *positions++;
1625 : } else {
1626 0 : updid = BUNtoid(p, i);
1627 : }
1628 :
1629 168738911 : if (updid < b->hseqbase ||
1630 168738911 : (!mayappend && updid >= hseqend)) {
1631 0 : GDKerror("id out of range\n");
1632 0 : goto bailout;
1633 : }
1634 168738911 : updid -= b->hseqbase;
1635 168738911 : if (!force && updid < b->batInserted) {
1636 0 : GDKerror("updating committed value\n");
1637 0 : goto bailout;
1638 : }
1639 :
1640 168738911 : const void *new = BUNtloc(ni, i);
1641 :
1642 168738911 : if (updid >= BATcount(b)) {
1643 16059 : assert(mayappend);
1644 16059 : if (locked) {
1645 10 : MT_rwlock_wrunlock(&b->thashlock);
1646 10 : locked = false;
1647 : }
1648 16059 : if (b->tminpos != bi.minpos ||
1649 16057 : b->tmaxpos != bi.maxpos) {
1650 3 : MT_lock_set(&b->theaplock);
1651 3 : b->tminpos = bi.minpos;
1652 3 : b->tmaxpos = bi.maxpos;
1653 3 : MT_lock_unset(&b->theaplock);
1654 : }
1655 16059 : if (BATcount(b) < updid &&
1656 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1657 0 : goto bailout;
1658 : }
1659 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1660 0 : bat_iterator_end(&ni);
1661 0 : return GDK_FAIL;
1662 : }
1663 16059 : bi = bat_iterator_nolock(b);
1664 16059 : continue;
1665 : }
1666 :
1667 168722852 : const void *old = BUNtloc(bi, updid);
1668 168722852 : bool isnil = atomcmp(new, nil) == 0;
1669 165148408 : anynil |= isnil;
1670 165148408 : if (b->tnil &&
1671 1908 : !anynil &&
1672 1908 : atomcmp(old, nil) == 0) {
1673 : /* if old value is nil and no new
1674 : * value is, we're not sure anymore
1675 : * about the nil property, so we must
1676 : * clear it */
1677 1904 : b->tnil = false;
1678 : }
1679 165148408 : b->tnonil &= !isnil;
1680 165148408 : b->tnil |= isnil;
1681 165148408 : if (bi.maxpos != BUN_NONE) {
1682 10672 : if (!isnil &&
1683 5334 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1684 : /* new value is larger than
1685 : * previous largest */
1686 137 : bi.maxpos = updid;
1687 5208 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1688 7 : atomcmp(new, old) != 0) {
1689 : /* old value is equal to
1690 : * largest and new value is
1691 : * smaller, so we don't know
1692 : * anymore which is the
1693 : * largest */
1694 7 : bi.maxpos = BUN_NONE;
1695 : }
1696 : }
1697 165148408 : if (bi.minpos != BUN_NONE) {
1698 10672 : if (!isnil &&
1699 5334 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1700 : /* new value is smaller than
1701 : * previous smallest */
1702 6 : bi.minpos = updid;
1703 5338 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1704 6 : atomcmp(new, old) != 0) {
1705 : /* old value is equal to
1706 : * smallest and new value is
1707 : * larger, so we don't know
1708 : * anymore which is the
1709 : * smallest */
1710 6 : bi.minpos = BUN_NONE;
1711 : }
1712 : }
1713 :
1714 165148408 : if (!locked) {
1715 8441 : MT_rwlock_wrlock(&b->thashlock);
1716 8441 : locked = true;
1717 : }
1718 165148417 : HASHdelete_locked(&bi, updid, old);
1719 165175095 : switch (b->twidth) {
1720 27276727 : case 1:
1721 27276727 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1722 27276727 : break;
1723 525065 : case 2:
1724 525065 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1725 525065 : break;
1726 33218280 : case 4:
1727 33218280 : ((int *) b->theap->base)[updid] = * (int *) new;
1728 33218280 : break;
1729 98941916 : case 8:
1730 98941916 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1731 98941916 : break;
1732 5213107 : case 16:
1733 : #ifdef HAVE_HGE
1734 5213107 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1735 : #else
1736 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1737 : #endif
1738 5213107 : break;
1739 0 : default:
1740 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1741 0 : break;
1742 : }
1743 165175095 : HASHinsert_locked(&bi, updid, new);
1744 : }
1745 8450 : if (locked) {
1746 8440 : if (b->thash)
1747 0 : nunique = b->thash->nunique;
1748 8440 : MT_rwlock_wrunlock(&b->thashlock);
1749 8440 : locked = false;
1750 : }
1751 : }
1752 28891 : bat_iterator_end(&ni);
1753 28879 : MT_lock_set(&b->theaplock);
1754 28882 : if (nunique != 0)
1755 2 : b->tunique_est = (double) nunique;
1756 28882 : b->tminpos = bi.minpos;
1757 28882 : b->tmaxpos = bi.maxpos;
1758 28882 : b->theap->dirty = true;
1759 28882 : MT_lock_unset(&b->theaplock);
1760 28895 : TRC_DEBUG(ALGO,
1761 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1762 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1763 : GDKusec() - t0);
1764 : return GDK_SUCCEED;
1765 :
1766 0 : bailout:
1767 0 : bat_iterator_end(&ni);
1768 0 : if (locked) {
1769 0 : Hash *h = b->thash;
1770 0 : b->thash = NULL;
1771 0 : MT_rwlock_wrunlock(&b->thashlock);
1772 0 : doHASHdestroy(b, h);
1773 : }
1774 : return GDK_FAIL;
1775 : }
1776 :
1777 : /* replace values from b at locations specified in p with values in n */
1778 : gdk_return
1779 216092 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1780 : {
1781 216092 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1782 : }
1783 :
1784 : /* like BATreplace, but p may specify locations beyond the end of b */
1785 : gdk_return
1786 1110 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1787 : {
1788 1110 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1789 : }
1790 :
1791 : #if 0 /* not used */
1792 : /* like BATreplace, but the positions are given by an array of oid values */
1793 : gdk_return
1794 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1795 : {
1796 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1797 : }
1798 : #endif
1799 :
1800 : /* like BATreplace, but the positions are given by an array of oid
1801 : * values, and they may specify locations beyond the end of b */
1802 : gdk_return
1803 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1804 : {
1805 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1806 : }
1807 :
1808 : /*
1809 : * BAT Selections
1810 : * The BAT selectors are among the most heavily used operators.
1811 : * Their efficient implementation is therefore mandatory.
1812 : *
1813 : * BAT slice
1814 : * This function returns a horizontal slice from a BAT. It optimizes
1815 : * execution by avoiding to copy when the BAT is memory mapped (in
1816 : * this case, an independent submap is created) or else when it is
1817 : * read-only, then a VIEW bat is created as a result.
1818 : *
1819 : * If a new copy has to be created, this function takes care to
1820 : * preserve void-columns (in this case, the seqbase has to be
1821 : * recomputed in the result).
1822 : *
1823 : * The selected range is excluding the high value.
1824 : */
1825 : BAT *
1826 12519814 : BATslice(BAT *b, BUN l, BUN h)
1827 : {
1828 12519814 : BUN low = l;
1829 12519814 : BAT *bn = NULL;
1830 :
1831 12519814 : BATcheck(b, NULL);
1832 12519814 : BATiter bi = bat_iterator(b);
1833 12532772 : if (l > bi.count)
1834 : l = bi.count;
1835 12532772 : if (h > bi.count)
1836 : h = bi.count;
1837 12532772 : if (h < l)
1838 : h = l;
1839 :
1840 12532772 : if (complex_cand(b)) {
1841 : /* slicing a candidate list with exceptions */
1842 95 : struct canditer ci;
1843 95 : canditer_init(&ci, NULL, b);
1844 95 : if (b->hseqbase + l >= ci.hseq) {
1845 95 : l = b->hseqbase + l - ci.hseq;
1846 95 : h = b->hseqbase + h - ci.hseq;
1847 : } else {
1848 0 : l = 0;
1849 0 : if (b->hseqbase + h >= ci.hseq)
1850 0 : h = b->hseqbase + h - ci.hseq;
1851 : else
1852 : h = 0;
1853 : }
1854 95 : bn = canditer_slice(&ci, l, h);
1855 95 : goto doreturn;
1856 : }
1857 : /* If the source BAT is readonly, then we can obtain a VIEW
1858 : * that just reuses the memory of the source. */
1859 12532677 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1860 : /* forget about slices for bit masks: we can't deal
1861 : * with difference in alignment, so we'll just make a
1862 : * copy */
1863 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1864 : /* we use BATappend with a candidate list to easily
1865 : * copy the part of b that we need */
1866 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1867 0 : if (bn == NULL ||
1868 0 : s == NULL ||
1869 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1870 0 : BBPreclaim(bn);
1871 0 : BBPreclaim(s);
1872 0 : bn = NULL;
1873 0 : goto doreturn;
1874 : }
1875 0 : BBPunfix(s->batCacheid);
1876 0 : goto doreturn;
1877 : }
1878 12532677 : restrict_t prestricted;
1879 14530387 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1880 1993131 : BAT *pb = BBP_desc(VIEWtparent(b));
1881 1993131 : MT_lock_set(&pb->theaplock);
1882 1992684 : prestricted = pb->batRestricted;
1883 1992684 : MT_lock_unset(&pb->theaplock);
1884 : } else {
1885 : prestricted = BAT_WRITE; /* just initialize with anything */
1886 : }
1887 12537256 : if (bi.restricted == BAT_READ &&
1888 12482387 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1889 12482284 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1890 12482284 : if (bn == NULL)
1891 : goto doreturn;
1892 : } else {
1893 : /* create a new BAT and put everything into it */
1894 54972 : BUN p = l;
1895 54972 : BUN q = h;
1896 :
1897 54972 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1898 54994 : if (bn == NULL)
1899 0 : goto doreturn;
1900 :
1901 54994 : if (bn->ttype == TYPE_void) {
1902 37148 : BATsetcount(bn, h - l);
1903 37100 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1904 17846 : } else if (bn->tvheap == NULL) {
1905 11273 : assert(BATatoms[bn->ttype].atomPut == NULL);
1906 11273 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1907 11273 : (q - p) << bn->tshift);
1908 11273 : bn->theap->dirty = true;
1909 11273 : BATsetcount(bn, h - l);
1910 : } else {
1911 1717163 : for (; p < q; p++) {
1912 1710624 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1913 0 : BBPreclaim(bn);
1914 0 : bn = NULL;
1915 0 : goto doreturn;
1916 : }
1917 : }
1918 : }
1919 54895 : bn->theap->dirty = true;
1920 54895 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1921 54895 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1922 54895 : bn->tkey = bi.key || bn->batCount <= 1;
1923 54895 : bn->tnonil = bi.nonil;
1924 54895 : bn->tnil = false; /* we don't know */
1925 54895 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1926 2451 : bn->tnosorted = bi.nosorted - l;
1927 : else
1928 52444 : bn->tnosorted = 0;
1929 54895 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1930 4878 : bn->tnorevsorted = bi.norevsorted - l;
1931 : else
1932 50017 : bn->tnorevsorted = 0;
1933 54895 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1934 44713 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1935 428 : bi.nokey[0] != bi.nokey[1] &&
1936 : !bn->tkey) {
1937 427 : bn->tnokey[0] = bi.nokey[0] - l;
1938 427 : bn->tnokey[1] = bi.nokey[1] - l;
1939 : } else {
1940 54468 : bn->tnokey[0] = bn->tnokey[1] = 0;
1941 : }
1942 : }
1943 12536187 : doreturn:
1944 12536187 : bat_iterator_end(&bi);
1945 12538547 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1946 : ALGOOPTBATFMT "\n",
1947 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1948 : return bn;
1949 : }
1950 :
1951 : #define BAT_ORDERED(TPE) \
1952 : do { \
1953 : const TPE *restrict vals = Tloc(b, 0); \
1954 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1955 : if (vals[p - 1] > vals[p]) { \
1956 : b->tnosorted = p; \
1957 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1958 : goto doreturn; \
1959 : } else if (vals[p - 1] < vals[p]) { \
1960 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1961 : b->tnorevsorted = p; \
1962 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1963 : } \
1964 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1965 : b->tnokey[0] = p - 1; \
1966 : b->tnokey[1] = p; \
1967 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1968 : } \
1969 : } \
1970 : } while (0)
1971 :
1972 : #define BAT_ORDERED_FP(TPE) \
1973 : do { \
1974 : const TPE *restrict vals = Tloc(b, 0); \
1975 : TPE prev = vals[0]; \
1976 : bool prevnil = is_##TPE##_nil(prev); \
1977 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1978 : TPE next = vals[p]; \
1979 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1980 : prev = next; \
1981 : if (cmp > 0) { \
1982 : b->tnosorted = bi.nosorted = p; \
1983 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1984 : goto doreturn; \
1985 : } else if (cmp < 0) { \
1986 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1987 : b->tnorevsorted = bi.norevsorted = p; \
1988 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1989 : } \
1990 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1991 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1992 : b->tnokey[1] = bi.nokey[1] = p; \
1993 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1994 : } \
1995 : } \
1996 : } while (0)
1997 :
1998 : /* Return whether the BAT is ordered or not. If we don't know, invest
1999 : * in a scan and record the results in the bat descriptor. If during
2000 : * the scan we happen to find evidence that the BAT is not reverse
2001 : * sorted, we record the location. */
2002 : bool
2003 3171122 : BATordered(BAT *b)
2004 : {
2005 3171122 : lng t0 = GDKusec();
2006 3171845 : bool sorted;
2007 :
2008 3171845 : MT_rwlock_rdlock(&b->thashlock);
2009 3173207 : MT_lock_set(&b->theaplock);
2010 3172300 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2011 527495 : MT_lock_unset(&b->theaplock);
2012 527842 : MT_rwlock_rdunlock(&b->thashlock);
2013 527842 : return true;
2014 : }
2015 2644805 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2016 2132597 : MT_lock_unset(&b->theaplock);
2017 2132806 : MT_rwlock_rdunlock(&b->thashlock);
2018 2132806 : return false;
2019 : }
2020 :
2021 : /* There are a few reasons why we need a lock here. It may be
2022 : * that multiple threads call this functions at the same time
2023 : * (happens a lot with mitosis/mergetable), but we only need to
2024 : * scan the bat in one thread: the others can reap the rewards
2025 : * when that one thread is done. Also, we need the heap to
2026 : * remain accessible (could have used bat_iterator for that),
2027 : * and, and this is the killer argument, we may need to make
2028 : * changes to the bat descriptor. */
2029 512208 : BATiter bi = bat_iterator_nolock(b);
2030 512208 : if (!b->tsorted && b->tnosorted == 0) {
2031 923447 : switch (ATOMbasetype(b->ttype)) {
2032 108499 : case TYPE_bte:
2033 130974542 : BAT_ORDERED(bte);
2034 : break;
2035 10530 : case TYPE_sht:
2036 1642930 : BAT_ORDERED(sht);
2037 : break;
2038 329114 : case TYPE_int:
2039 120095695 : BAT_ORDERED(int);
2040 : break;
2041 10253 : case TYPE_lng:
2042 61012366 : BAT_ORDERED(lng);
2043 : break;
2044 : #ifdef HAVE_HGE
2045 376 : case TYPE_hge:
2046 8002678 : BAT_ORDERED(hge);
2047 : break;
2048 : #endif
2049 975 : case TYPE_flt:
2050 8007246 : BAT_ORDERED_FP(flt);
2051 : break;
2052 798 : case TYPE_dbl:
2053 8240059 : BAT_ORDERED_FP(dbl);
2054 : break;
2055 : case TYPE_str:
2056 21030307 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2057 21015475 : int c;
2058 21015475 : const char *p1 = BUNtvar(bi, p - 1);
2059 21015475 : const char *p2 = BUNtvar(bi, p);
2060 21015475 : if (p1 == p2)
2061 : c = 0;
2062 2262799 : else if (p1[0] == '\200') {
2063 1780 : if (p2[0] == '\200')
2064 : c = 0;
2065 : else
2066 : c = -1;
2067 2261019 : } else if (p2[0] == '\200')
2068 : c = 1;
2069 : else
2070 2259885 : c = strcmp(p1, p2);
2071 2259885 : if (c > 0) {
2072 36654 : b->tnosorted = bi.nosorted = p;
2073 36654 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2074 36654 : goto doreturn;
2075 20978821 : } else if (c < 0) {
2076 2225718 : assert(!b->trevsorted);
2077 2225718 : if (b->tnorevsorted == 0) {
2078 17456 : b->tnorevsorted = bi.norevsorted = p;
2079 17456 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2080 : }
2081 18753103 : } else if (b->tnokey[1] == 0) {
2082 4925 : assert(!b->tkey);
2083 4925 : b->tnokey[0] = bi.nokey[0] = p - 1;
2084 4925 : b->tnokey[1] = bi.nokey[1] = p;
2085 20978821 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2086 : }
2087 : }
2088 : break;
2089 186 : default: {
2090 186 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2091 2482 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2092 2438 : int c;
2093 2438 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2094 142 : b->tnosorted = bi.nosorted = p;
2095 142 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2096 142 : goto doreturn;
2097 2296 : } else if (c < 0) {
2098 2246 : if (!b->trevsorted && b->tnorevsorted == 0) {
2099 90 : b->tnorevsorted = bi.norevsorted = p;
2100 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2101 : }
2102 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2103 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2104 8 : b->tnokey[1] = bi.nokey[1] = p;
2105 2296 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2106 : }
2107 : }
2108 : break;
2109 : }
2110 : }
2111 : /* we only get here if we completed the scan; note that
2112 : * if we didn't record evidence about *reverse*
2113 : * sortedness, we know that the BAT is also reverse
2114 : * sorted; similarly, if we didn't record evidence about
2115 : * keyness, we know the BAT is key */
2116 167204 : b->tsorted = bi.sorted = true;
2117 167204 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2118 167577 : if (!b->trevsorted && b->tnorevsorted == 0) {
2119 80233 : b->trevsorted = bi.revsorted = true;
2120 80233 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 167577 : if (!b->tkey && b->tnokey[1] == 0) {
2123 52708 : b->tkey = bi.key = true;
2124 52708 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2125 : }
2126 : }
2127 167577 : doreturn:
2128 512590 : MT_rwlock_rdunlock(&b->thashlock);
2129 512521 : sorted = b->tsorted;
2130 512521 : bat pbid = VIEWtparent(b);
2131 512521 : MT_lock_unset(&b->theaplock);
2132 512938 : if (pbid) {
2133 282695 : BAT *pb = BBP_desc(pbid);
2134 282695 : MT_lock_set(&pb->theaplock);
2135 282692 : if (bi.count == BATcount(pb) &&
2136 207108 : bi.h == pb->theap &&
2137 207108 : bi.type == pb->ttype) {
2138 : /* add to knowledge in parent bat */
2139 207105 : pb->tsorted |= bi.sorted;
2140 207105 : if (pb->tnosorted == 0)
2141 207105 : pb->tnosorted = bi.nosorted;
2142 207105 : pb->trevsorted |= bi.revsorted;
2143 207105 : if (pb->tnorevsorted == 0)
2144 33558 : pb->tnorevsorted = bi.norevsorted;
2145 207105 : pb->tkey |= bi.key;
2146 207105 : if (pb->tnokey[1] == 0) {
2147 183628 : pb->tnokey[0] = bi.nokey[0];
2148 183628 : pb->tnokey[1] = bi.nokey[1];
2149 : }
2150 : }
2151 282692 : MT_lock_unset(&pb->theaplock);
2152 : }
2153 : return sorted;
2154 : }
2155 :
2156 : #define BAT_REVORDERED(TPE) \
2157 : do { \
2158 : const TPE *restrict vals = Tloc(b, 0); \
2159 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2160 : if (vals[p - 1] < vals[p]) { \
2161 : b->tnorevsorted = p; \
2162 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2163 : goto doreturn; \
2164 : } \
2165 : } \
2166 : } while (0)
2167 :
2168 : #define BAT_REVORDERED_FP(TPE) \
2169 : do { \
2170 : const TPE *restrict vals = Tloc(b, 0); \
2171 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2172 : TPE prev = vals[p - 1], next = vals[p]; \
2173 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2174 : if (cmp < 0) { \
2175 : b->tnorevsorted = bi.norevsorted = p; \
2176 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2177 : goto doreturn; \
2178 : } \
2179 : } \
2180 : } while (0)
2181 :
2182 : /* Return whether the BAT is reverse ordered or not. If we don't
2183 : * know, invest in a scan and record the results in the bat
2184 : * descriptor. */
2185 : bool
2186 2957618 : BATordered_rev(BAT *b)
2187 : {
2188 2957618 : lng t0 = GDKusec();
2189 2958251 : bool revsorted;
2190 :
2191 2958251 : if (b == NULL || !ATOMlinear(b->ttype))
2192 : return false;
2193 2958245 : MT_rwlock_rdlock(&b->thashlock);
2194 2958184 : MT_lock_set(&b->theaplock);
2195 2958190 : if (BATcount(b) <= 1 || b->trevsorted) {
2196 280067 : MT_lock_unset(&b->theaplock);
2197 280151 : MT_rwlock_rdunlock(&b->thashlock);
2198 280151 : return true;
2199 : }
2200 2678123 : if (b->ttype == TYPE_void) {
2201 23195 : MT_lock_unset(&b->theaplock);
2202 23208 : MT_rwlock_rdunlock(&b->thashlock);
2203 23209 : return is_oid_nil(b->tseqbase);
2204 : }
2205 2654928 : if (BATtdense(b) || b->tnorevsorted > 0) {
2206 2553156 : MT_lock_unset(&b->theaplock);
2207 2553512 : MT_rwlock_rdunlock(&b->thashlock);
2208 2553512 : return false;
2209 : }
2210 101772 : BATiter bi = bat_iterator_nolock(b);
2211 101772 : if (!b->trevsorted && b->tnorevsorted == 0) {
2212 166082 : switch (ATOMbasetype(b->ttype)) {
2213 35580 : case TYPE_bte:
2214 10428064 : BAT_REVORDERED(bte);
2215 : break;
2216 4460 : case TYPE_sht:
2217 3170778 : BAT_REVORDERED(sht);
2218 : break;
2219 36198 : case TYPE_int:
2220 1208669 : BAT_REVORDERED(int);
2221 : break;
2222 4814 : case TYPE_lng:
2223 2697662 : BAT_REVORDERED(lng);
2224 : break;
2225 : #ifdef HAVE_HGE
2226 123 : case TYPE_hge:
2227 992 : BAT_REVORDERED(hge);
2228 : break;
2229 : #endif
2230 511 : case TYPE_flt:
2231 1997 : BAT_REVORDERED_FP(flt);
2232 : break;
2233 322 : case TYPE_dbl:
2234 485514 : BAT_REVORDERED_FP(dbl);
2235 : break;
2236 19764 : default: {
2237 19764 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2238 579875 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2239 576565 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2240 16454 : b->tnorevsorted = p;
2241 16454 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2242 16454 : goto doreturn;
2243 : }
2244 : }
2245 : break;
2246 : }
2247 : }
2248 26394 : b->trevsorted = bi.revsorted = true;
2249 26394 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2250 : }
2251 26394 : doreturn:
2252 101772 : MT_rwlock_rdunlock(&b->thashlock);
2253 101888 : revsorted = b->trevsorted;
2254 101888 : bat pbid = VIEWtparent(b);
2255 101888 : MT_lock_unset(&b->theaplock);
2256 102032 : if (pbid) {
2257 34665 : BAT *pb = BBP_desc(pbid);
2258 34665 : MT_lock_set(&pb->theaplock);
2259 34669 : if (bi.count == BATcount(pb) &&
2260 6469 : bi.h == pb->theap &&
2261 6469 : bi.type == pb->ttype) {
2262 : /* add to knowledge in parent bat */
2263 6468 : pb->trevsorted |= bi.revsorted;
2264 6468 : if (pb->tnorevsorted == 0)
2265 6468 : pb->tnorevsorted = bi.norevsorted;
2266 : }
2267 34669 : MT_lock_unset(&pb->theaplock);
2268 : }
2269 : return revsorted;
2270 : }
2271 :
2272 : /* figure out which sort function is to be called
2273 : * stable sort can produce an error (not enough memory available),
2274 : * "quick" sort does not produce errors */
2275 : static gdk_return
2276 3952520 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2277 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2278 : bool stable)
2279 : {
2280 3952520 : if (n <= 1) /* trivially sorted */
2281 : return GDK_SUCCEED;
2282 2764148 : switch (tpe) {
2283 2715817 : case TYPE_bte:
2284 : case TYPE_sht:
2285 : case TYPE_int:
2286 : case TYPE_lng:
2287 : #ifdef HAVE_HGE
2288 : case TYPE_hge:
2289 : #endif
2290 : case TYPE_date:
2291 : case TYPE_daytime:
2292 : case TYPE_timestamp:
2293 2715817 : assert(base == NULL);
2294 2715817 : if (nilslast == reverse && (stable || n > 100))
2295 20323 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2296 : break;
2297 4 : case TYPE_uuid:
2298 4 : assert(base == NULL);
2299 4 : if (nilslast == reverse && (stable || n > 100))
2300 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2301 : break;
2302 : default:
2303 : break;
2304 : }
2305 48439 : if (stable) {
2306 27 : if (reverse)
2307 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2308 : else
2309 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2310 : } else {
2311 2743797 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2312 : }
2313 2743797 : return GDK_SUCCEED;
2314 : }
2315 :
2316 : /* Sort the bat b according to both o and g. The stable and reverse
2317 : * parameters indicate whether the sort should be stable or descending
2318 : * respectively. The parameter b is required, o and g are optional
2319 : * (i.e., they may be NULL).
2320 : *
2321 : * A sorted copy is returned through the sorted parameter, the new
2322 : * ordering is returned through the order parameter, group information
2323 : * is returned through the groups parameter. All three output
2324 : * parameters may be NULL. If they're all NULL, this function does
2325 : * nothing.
2326 : *
2327 : * If o is specified, it is used to first rearrange b according to the
2328 : * order specified in o, after which b is sorted taking g into
2329 : * account.
2330 : *
2331 : * If g is specified, it indicates groups which should be individually
2332 : * ordered. Each row of consecutive equal values in g indicates a
2333 : * group which is sorted according to stable and reverse. g is used
2334 : * after the order in b was rearranged according to o.
2335 : *
2336 : * The outputs order and groups can be used in subsequent calls to
2337 : * this function. This can be used if multiple BATs need to be sorted
2338 : * together. The BATs should then be sorted in order of significance,
2339 : * and each following call should use the original unordered BAT plus
2340 : * the order and groups bat from the previous call. In this case, the
2341 : * sorted BATs are not of much use, so the sorted output parameter
2342 : * does not need to be specified.
2343 : * Apart from error checking and maintaining reference counts, sorting
2344 : * three columns (col1, col2, col3) could look like this with the
2345 : * sorted results in (col1s, col2s, col3s):
2346 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2347 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2348 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2349 : * Note that the "reverse" parameter can be different for each call.
2350 : */
2351 : gdk_return
2352 34434 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2353 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2354 : {
2355 34434 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2356 34434 : BATiter pbi;
2357 34434 : oid *restrict grps, *restrict ords, prev;
2358 34434 : BUN p, q, r;
2359 34434 : lng t0 = GDKusec();
2360 34441 : bool mkorderidx, orderidxlock = false;
2361 34441 : Heap *oidxh = NULL;
2362 :
2363 : /* we haven't implemented NILs as largest value for stable
2364 : * sort, so NILs come first for ascending and last for
2365 : * descending */
2366 34441 : assert(!stable || reverse == nilslast);
2367 :
2368 34441 : if (b == NULL) {
2369 0 : GDKerror("b must exist\n");
2370 0 : return GDK_FAIL;
2371 : }
2372 34441 : if (stable && reverse != nilslast) {
2373 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2374 0 : return GDK_FAIL;
2375 : }
2376 34441 : if (!ATOMlinear(b->ttype)) {
2377 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2378 0 : return GDK_FAIL;
2379 : }
2380 34441 : MT_lock_set(&b->theaplock);
2381 34444 : if (b->ttype == TYPE_void) {
2382 149 : b->tsorted = true;
2383 223 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2384 0 : b->trevsorted = !b->trevsorted;
2385 : }
2386 298 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2387 0 : b->tkey = !b->tkey;
2388 : }
2389 34295 : } else if (b->batCount <= 1) {
2390 12549 : if (!b->tsorted || !b->trevsorted) {
2391 23 : b->tsorted = b->trevsorted = true;
2392 : }
2393 : }
2394 34444 : MT_lock_unset(&b->theaplock);
2395 34443 : if (o != NULL &&
2396 17428 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2397 17428 : BATcount(o) != BATcount(b) || /* same size as b */
2398 6903 : (o->ttype == TYPE_void && /* no nil tail */
2399 2732 : BATcount(o) != 0 &&
2400 2732 : is_oid_nil(o->tseqbase)))) {
2401 0 : GDKerror("o must have type oid and same size as b\n");
2402 0 : return GDK_FAIL;
2403 : }
2404 34443 : if (g != NULL &&
2405 17428 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2406 17428 : !g->tsorted || /* sorted */
2407 17428 : BATcount(g) != BATcount(b) || /* same size as b */
2408 7031 : (g->ttype == TYPE_void && /* no nil tail */
2409 2860 : BATcount(g) != 0 &&
2410 2860 : is_oid_nil(g->tseqbase)))) {
2411 0 : GDKerror("g must have type oid, sorted on the tail, "
2412 : "and same size as b\n");
2413 0 : return GDK_FAIL;
2414 : }
2415 34443 : if (sorted == NULL && order == NULL) {
2416 : /* no place to put result, so we're done quickly */
2417 0 : GDKerror("no place to put the result.\n");
2418 0 : return GDK_FAIL;
2419 : }
2420 34443 : if (g == NULL && !stable) {
2421 : /* pre-ordering doesn't make sense if we're not
2422 : * subsorting and the sort is not stable */
2423 16736 : o = NULL;
2424 : }
2425 34443 : if (b->tnonil) {
2426 : /* if there are no nils, placement of nils doesn't
2427 : * matter, so set nilslast such that ordered bits can
2428 : * be used */
2429 26250 : nilslast = reverse;
2430 : }
2431 34443 : pbi = bat_iterator(NULL);
2432 35847 : if (BATcount(b) <= 1 ||
2433 21694 : (reverse == nilslast &&
2434 : (reverse ? b->trevsorted : b->tsorted) &&
2435 5735 : o == NULL && g == NULL &&
2436 3220 : (groups == NULL || BATtkey(b) ||
2437 : (reverse ? b->tsorted : b->trevsorted)))) {
2438 : /* trivially (sub)sorted, and either we don't need to
2439 : * return group information, or we can trivially
2440 : * deduce the groups */
2441 14770 : if (sorted) {
2442 12843 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2443 12843 : if (bn == NULL)
2444 0 : goto error;
2445 12843 : *sorted = bn;
2446 : }
2447 14770 : if (order) {
2448 13869 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2449 13870 : if (on == NULL)
2450 0 : goto error;
2451 13870 : *order = on;
2452 : }
2453 14771 : if (groups) {
2454 6510 : if (BATtkey(b)) {
2455 : /* singleton groups */
2456 5866 : gn = BATdense(b->hseqbase, 0, BATcount(b));
2457 5866 : if (gn == NULL)
2458 0 : goto error;
2459 : } else {
2460 : /* single group */
2461 644 : const oid *o = 0;
2462 644 : assert(BATcount(b) == 1 ||
2463 : (b->tsorted && b->trevsorted));
2464 644 : gn = BATconstant(b->hseqbase, TYPE_oid, &o, BATcount(b), TRANSIENT);
2465 644 : if (gn == NULL)
2466 0 : goto error;
2467 : }
2468 6510 : *groups = gn;
2469 : }
2470 14771 : bat_iterator_end(&pbi);
2471 14771 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2472 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2473 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2474 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2475 : ALGOOPTBATFMT " -- trivial (" LLFMT
2476 : " usec)\n",
2477 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2478 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2479 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2480 : ALGOOPTBATPAR(on), GDKusec() - t0);
2481 14771 : return GDK_SUCCEED;
2482 : }
2483 19670 : if (VIEWtparent(b)) {
2484 4236 : pb = BATdescriptor(VIEWtparent(b));
2485 4241 : if (pb != NULL &&
2486 4241 : (b->tbaseoff != pb->tbaseoff ||
2487 3354 : BATcount(b) != BATcount(pb) ||
2488 3075 : b->hseqbase != pb->hseqbase ||
2489 3063 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2490 1177 : BBPunfix(pb->batCacheid);
2491 1177 : pb = NULL;
2492 : }
2493 : } else {
2494 : pb = b;
2495 : }
2496 19676 : bat_iterator_end(&pbi);
2497 19675 : pbi = bat_iterator(pb);
2498 : /* when we will create an order index if it doesn't already exist */
2499 19673 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2500 19673 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2501 6409 : (void) BATcheckorderidx(pb);
2502 6409 : MT_lock_set(&pb->batIdxLock);
2503 6408 : if (pb->torderidx) {
2504 66 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2505 : /* we can use the order index */
2506 66 : oidxh = pb->torderidx;
2507 66 : HEAPincref(oidxh);
2508 : }
2509 : mkorderidx = false;
2510 6342 : } else if (b != pb) {
2511 : /* don't build orderidx on parent bat */
2512 : mkorderidx = false;
2513 5270 : } else if (mkorderidx) {
2514 : /* keep lock when going to create */
2515 4644 : orderidxlock = true;
2516 : }
2517 6408 : if (!orderidxlock)
2518 1763 : MT_lock_unset(&pb->batIdxLock);
2519 : }
2520 19673 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2521 : /* there is an order index that we can use */
2522 66 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2523 66 : if (on == NULL)
2524 0 : goto error;
2525 66 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2526 66 : BATsetcount(on, BATcount(b));
2527 66 : HEAPdecref(oidxh, false);
2528 66 : oidxh = NULL;
2529 66 : on->tkey = true;
2530 66 : on->tnil = false;
2531 66 : on->tnonil = true;
2532 66 : on->tsorted = on->trevsorted = false;
2533 66 : on->tseqbase = oid_nil;
2534 66 : if (sorted || groups) {
2535 66 : bn = BATproject(on, b);
2536 66 : if (bn == NULL)
2537 0 : goto error;
2538 66 : bn->tsorted = true;
2539 66 : if (groups) {
2540 11 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2541 0 : goto error;
2542 11 : if (sorted &&
2543 11 : (*groups)->tkey &&
2544 : g == NULL) {
2545 : /* if new groups bat is key
2546 : * and since there is no input
2547 : * groups bat, we know the
2548 : * result bat is key */
2549 4 : bn->tkey = true;
2550 : }
2551 : }
2552 66 : if (sorted)
2553 66 : *sorted = bn;
2554 : else {
2555 0 : BBPunfix(bn->batCacheid);
2556 0 : bn = NULL;
2557 : }
2558 : }
2559 66 : if (order)
2560 16 : *order = on;
2561 : else {
2562 50 : BBPunfix(on->batCacheid);
2563 50 : on = NULL;
2564 : }
2565 66 : bat_iterator_end(&pbi);
2566 66 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2567 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2568 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2569 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2570 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2571 : " usec)\n",
2572 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2573 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2574 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2575 : ALGOOPTBATPAR(on), GDKusec() - t0);
2576 66 : if (pb != NULL && pb != b)
2577 53 : BBPunfix(pb->batCacheid);
2578 66 : return GDK_SUCCEED;
2579 12671 : } else if (oidxh) {
2580 0 : HEAPdecref(oidxh, false);
2581 0 : oidxh = NULL;
2582 : }
2583 19606 : if (o) {
2584 12118 : bn = BATproject(o, b);
2585 12118 : if (bn == NULL)
2586 0 : goto error;
2587 12118 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2588 5482 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2589 2741 : BBPunfix(bn->batCacheid);
2590 2741 : bn = b2;
2591 : }
2592 12119 : if (pb) {
2593 11562 : bat_iterator_end(&pbi);
2594 11561 : if (pb != b)
2595 1634 : BBPunfix(pb->batCacheid);
2596 11562 : pbi = bat_iterator(NULL);
2597 11562 : pb = NULL;
2598 : }
2599 : } else {
2600 7488 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2601 : }
2602 19603 : if (bn == NULL)
2603 0 : goto error;
2604 19603 : if (order) {
2605 : /* prepare order bat */
2606 16679 : if (o) {
2607 : /* make copy of input so that we can refine it;
2608 : * copy can be read-only if we take the shortcut
2609 : * below in the case g is "key" */
2610 10356 : on = COLcopy(o, TYPE_oid,
2611 10356 : g == NULL ||
2612 10356 : !(g->tkey || g->ttype == TYPE_void),
2613 : TRANSIENT);
2614 10356 : if (on == NULL)
2615 0 : goto error;
2616 10356 : BAThseqbase(on, b->hseqbase);
2617 10355 : on->tminpos = BUN_NONE;
2618 10355 : on->tmaxpos = BUN_NONE;
2619 : } else {
2620 : /* create new order */
2621 6323 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2622 6329 : if (on == NULL)
2623 0 : goto error;
2624 6329 : ords = (oid *) Tloc(on, 0);
2625 29975679 : for (p = 0, q = BATcount(bn); p < q; p++)
2626 29969350 : ords[p] = p + b->hseqbase;
2627 6329 : BATsetcount(on, BATcount(bn));
2628 6330 : on->tkey = true;
2629 6330 : on->tnil = false;
2630 6330 : on->tnonil = true;
2631 : }
2632 : /* COLcopy above can create TYPE_void */
2633 16685 : if (on->ttype != TYPE_void) {
2634 15919 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2635 15919 : on->tseqbase = oid_nil; /* and hence not dense */
2636 15919 : on->tnosorted = on->tnorevsorted = 0;
2637 : }
2638 16685 : *order = on;
2639 16685 : ords = (oid *) Tloc(on, 0);
2640 : } else {
2641 : ords = NULL;
2642 : }
2643 19609 : if (g) {
2644 12119 : if (g->tkey || g->ttype == TYPE_void) {
2645 : /* if g is "key", all groups are size 1, so no
2646 : * subsorting needed */
2647 5092 : if (sorted) {
2648 4604 : *sorted = bn;
2649 : } else {
2650 488 : BBPunfix(bn->batCacheid);
2651 488 : bn = NULL;
2652 : }
2653 5092 : if (order) {
2654 3914 : *order = on;
2655 3914 : if (o) {
2656 : /* we can inherit sortedness
2657 : * after all */
2658 3914 : on->tsorted = o->tsorted;
2659 3914 : on->trevsorted = o->trevsorted;
2660 3914 : if (o->tnosorted)
2661 56 : on->tnosorted = o->tnosorted;
2662 3914 : if (o->tnorevsorted)
2663 74 : on->tnorevsorted = o->tnorevsorted;
2664 : } else {
2665 : /* we didn't rearrange, so
2666 : * still sorted */
2667 0 : on->tsorted = true;
2668 0 : on->trevsorted = false;
2669 : }
2670 3914 : if (BATcount(on) <= 1) {
2671 0 : on->tsorted = true;
2672 0 : on->trevsorted = true;
2673 : }
2674 : }
2675 5092 : if (groups) {
2676 2912 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2677 2912 : if (gn == NULL)
2678 0 : goto error;
2679 2912 : *groups = gn;
2680 : }
2681 5092 : bat_iterator_end(&pbi);
2682 5092 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2683 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2684 : ",reverse=%d,nilslast=%d,stable=%d"
2685 : ") = (" ALGOOPTBATFMT ","
2686 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2687 : " -- key group (" LLFMT " usec)\n",
2688 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2689 : ALGOBATPAR(g), reverse, nilslast,
2690 : stable, ALGOOPTBATPAR(bn),
2691 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2692 : GDKusec() - t0);
2693 5092 : if (pb != NULL && pb != b)
2694 0 : BBPunfix(pb->batCacheid);
2695 5092 : return GDK_SUCCEED;
2696 : }
2697 7027 : assert(g->ttype == TYPE_oid);
2698 7027 : grps = (oid *) Tloc(g, 0);
2699 7027 : prev = grps[0];
2700 7027 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2701 0 : goto error;
2702 44642703 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2703 44635677 : if (grps[p] != prev) {
2704 : /* sub sort [r,p) */
2705 7608101 : if (do_sort(Tloc(bn, r),
2706 3669296 : ords ? ords + r : NULL,
2707 3938805 : bn->tvheap ? bn->tvheap->base : NULL,
2708 3938805 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2709 3938805 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2710 0 : goto error;
2711 3938805 : r = p;
2712 3938805 : prev = grps[p];
2713 : }
2714 : }
2715 : /* sub sort [r,q) */
2716 13468 : if (do_sort(Tloc(bn, r),
2717 6442 : ords ? ords + r : NULL,
2718 7026 : bn->tvheap ? bn->tvheap->base : NULL,
2719 7026 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2720 7026 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2721 0 : goto error;
2722 : /* if single group (r==0) the result is (rev)sorted,
2723 : * otherwise (maybe) not */
2724 7026 : bn->tsorted = r == 0 && !reverse && !nilslast;
2725 14017 : bn->trevsorted = r == 0 && reverse && nilslast;
2726 : } else {
2727 7490 : Heap *m = NULL;
2728 : /* only invest in creating an order index if the BAT
2729 : * is persistent */
2730 7490 : if (mkorderidx) {
2731 4644 : assert(orderidxlock);
2732 4644 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2733 : ords == NULL) {
2734 0 : ords = (oid *) m->base + ORDERIDXOFF;
2735 0 : if (o && o->ttype != TYPE_void)
2736 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2737 0 : else if (o)
2738 0 : for (p = 0, q = BATcount(o); p < q; p++)
2739 0 : ords[p] = p + o->tseqbase;
2740 : else
2741 0 : for (p = 0, q = BATcount(b); p < q; p++)
2742 0 : ords[p] = p + b->hseqbase;
2743 : }
2744 : }
2745 7490 : if ((reverse != nilslast ||
2746 14080 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2747 13392 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2748 6696 : do_sort(Tloc(bn, 0),
2749 : ords,
2750 6696 : bn->tvheap ? bn->tvheap->base : NULL,
2751 6696 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2752 6696 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2753 0 : if (m != NULL) {
2754 0 : HEAPfree(m, true);
2755 0 : GDKfree(m);
2756 : }
2757 0 : goto error;
2758 : }
2759 7490 : bn->tsorted = !reverse && !nilslast;
2760 7490 : bn->trevsorted = reverse && nilslast;
2761 7490 : if (m != NULL) {
2762 4645 : assert(orderidxlock);
2763 4645 : if (pb->torderidx == NULL) {
2764 4645 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2765 4645 : memcpy((oid *) m->base + ORDERIDXOFF,
2766 : ords,
2767 4645 : pbi.count * sizeof(oid));
2768 : }
2769 4645 : pb->torderidx = m;
2770 4645 : persistOIDX(pb);
2771 : } else {
2772 0 : HEAPfree(m, true);
2773 0 : GDKfree(m);
2774 : }
2775 : }
2776 : }
2777 14515 : if (orderidxlock) {
2778 4643 : MT_lock_unset(&pb->batIdxLock);
2779 4644 : orderidxlock = false;
2780 : }
2781 14517 : bn->theap->dirty = true;
2782 14517 : bn->tnosorted = 0;
2783 14517 : bn->tnorevsorted = 0;
2784 14517 : bn->tnokey[0] = bn->tnokey[1] = 0;
2785 14517 : bn->tminpos = BUN_NONE;
2786 14517 : bn->tmaxpos = BUN_NONE;
2787 14517 : if (groups) {
2788 8825 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2789 0 : goto error;
2790 8824 : if ((*groups)->tkey &&
2791 968 : (g == NULL || (g->tsorted && g->trevsorted))) {
2792 : /* if new groups bat is key and the input
2793 : * group bat has a single value (both sorted
2794 : * and revsorted), we know the result bat is
2795 : * key */
2796 1173 : bn->tkey = true;
2797 : }
2798 : }
2799 :
2800 14516 : bat_iterator_end(&pbi);
2801 14514 : if (sorted)
2802 10587 : *sorted = bn;
2803 : else {
2804 3927 : BBPunfix(bn->batCacheid);
2805 3927 : bn = NULL;
2806 : }
2807 :
2808 14514 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2809 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2810 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2811 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2812 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2813 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2814 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2815 : g ? "grouped " : "", GDKusec() - t0);
2816 14514 : if (pb && pb != b)
2817 1376 : BBPunfix(pb->batCacheid);
2818 : return GDK_SUCCEED;
2819 :
2820 0 : error:
2821 0 : bat_iterator_end(&pbi);
2822 0 : if (orderidxlock)
2823 0 : MT_lock_unset(&pb->batIdxLock);
2824 0 : if (oidxh)
2825 0 : HEAPdecref(oidxh, false);
2826 0 : BBPreclaim(bn);
2827 0 : if (pb && pb != b)
2828 0 : BBPunfix(pb->batCacheid);
2829 0 : BBPreclaim(on);
2830 0 : if (sorted)
2831 0 : *sorted = NULL;
2832 0 : if (order)
2833 0 : *order = NULL;
2834 0 : if (groups)
2835 0 : *groups = NULL;
2836 : return GDK_FAIL;
2837 : }
2838 :
2839 : /* return a new BAT of length n with seqbase hseq, and the constant v
2840 : * in the tail */
2841 : BAT *
2842 2133185 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2843 : {
2844 2133185 : BAT *bn;
2845 2133185 : void *restrict p;
2846 2133185 : BUN i;
2847 2133185 : lng t0 = 0;
2848 :
2849 2133185 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2850 2133185 : if (v == NULL)
2851 : return NULL;
2852 2133185 : bn = COLnew(hseq, tailtype, n, role);
2853 2139850 : if (bn != NULL && n > 0) {
2854 91408 : p = Tloc(bn, 0);
2855 91408 : switch (ATOMstorage(tailtype)) {
2856 25 : case TYPE_void:
2857 25 : v = &oid_nil;
2858 25 : BATtseqbase(bn, oid_nil);
2859 25 : break;
2860 0 : case TYPE_msk:
2861 0 : if (*(msk*)v) {
2862 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2863 0 : if (n & 31) {
2864 0 : uint32_t *m = p;
2865 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2866 : }
2867 : } else
2868 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2869 : break;
2870 14876 : case TYPE_bte:
2871 14876 : memset(p, *(bte*)v, n);
2872 14876 : break;
2873 : case TYPE_sht:
2874 7161668 : for (i = 0; i < n; i++)
2875 7142985 : ((sht *) p)[i] = *(sht *) v;
2876 : break;
2877 : case TYPE_int:
2878 : case TYPE_flt:
2879 : assert(sizeof(int) == sizeof(flt));
2880 222856201 : for (i = 0; i < n; i++)
2881 222849837 : ((int *) p)[i] = *(int *) v;
2882 : break;
2883 : case TYPE_lng:
2884 : case TYPE_dbl:
2885 : assert(sizeof(lng) == sizeof(dbl));
2886 235225914 : for (i = 0; i < n; i++)
2887 235191972 : ((lng *) p)[i] = *(lng *) v;
2888 : break;
2889 : #ifdef HAVE_HGE
2890 : case TYPE_hge:
2891 24398227 : for (i = 0; i < n; i++)
2892 24396603 : ((hge *) p)[i] = *(hge *) v;
2893 : break;
2894 : #endif
2895 : case TYPE_uuid:
2896 200047 : for (i = 0; i < n; i++)
2897 200038 : ((uuid *) p)[i] = *(uuid *) v;
2898 : break;
2899 15735 : case TYPE_str:
2900 : /* insert the first value, then just copy the
2901 : * offset lots of times */
2902 15735 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2903 0 : BBPreclaim(bn);
2904 0 : return NULL;
2905 : }
2906 15742 : char val[sizeof(var_t)];
2907 15742 : memcpy(val, Tloc(bn, 0), bn->twidth);
2908 15742 : if (bn->twidth == 1 && n > 1) {
2909 : /* single byte value: we have a
2910 : * function for that */
2911 8102 : memset(Tloc(bn, 1), val[0], n - 1);
2912 : } else {
2913 7640 : char *p = Tloc(bn, 0);
2914 7660 : for (i = 1; i < n; i++) {
2915 20 : p += bn->twidth;
2916 20 : memcpy(p, val, bn->twidth);
2917 : }
2918 : }
2919 : break;
2920 : default:
2921 333713 : for (i = 0; i < n; i++)
2922 333564 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2923 0 : BBPreclaim(bn);
2924 0 : return NULL;
2925 : }
2926 : break;
2927 : }
2928 91414 : bn->theap->dirty = true;
2929 91414 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2930 91325 : BATsetcount(bn, n);
2931 91634 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2932 91634 : bn->tnonil = !bn->tnil;
2933 91634 : bn->tkey = BATcount(bn) <= 1;
2934 : }
2935 2140076 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2936 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2937 : return bn;
2938 : }
2939 :
2940 : /*
2941 : * BAT Aggregates
2942 : *
2943 : * We retain the size() and card() aggregate results in the column
2944 : * descriptor. We would like to have such functionality in an
2945 : * extensible way for many aggregates, for DD (1) we do not want to
2946 : * change the binary BAT format on disk and (2) aggr and size are the
2947 : * most relevant aggregates.
2948 : *
2949 : * It is all hacked into the aggr[3] records; three adjacent integers
2950 : * that were left over in the column record. We refer to these as if
2951 : * it where an int aggr[3] array. The below routines set and retrieve
2952 : * the aggregate values from the tail of the BAT, as many
2953 : * aggregate-manipulating BAT functions work on tail.
2954 : *
2955 : * The rules are as follows: aggr[0] contains the alignment ID of the
2956 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2957 : * equal to b->talign, the precomputed aggregate values in
2958 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2959 : * of them may be set at the time. This is encoded by the value
2960 : * int_nil, which cannot occur in these two aggregates.
2961 : *
2962 : * This was now extended to record the property whether we know there
2963 : * is a nil value present by mis-using the highest bits of both
2964 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2965 : */
2966 :
2967 : void
2968 49782454 : PROPdestroy_nolock(BAT *b)
2969 : {
2970 49782454 : PROPrec *p = b->tprops;
2971 49782454 : PROPrec *n;
2972 :
2973 49782454 : b->tprops = NULL;
2974 49819511 : while (p) {
2975 5000 : n = p->next;
2976 5000 : assert(p->id != (enum prop_t) 20);
2977 5000 : VALclear(&p->v);
2978 5000 : GDKfree(p);
2979 5000 : p = n;
2980 : }
2981 49814511 : }
2982 :
2983 : void
2984 439 : PROPdestroy(BAT *b)
2985 : {
2986 439 : MT_lock_set(&b->theaplock);
2987 439 : PROPdestroy_nolock(b);
2988 439 : MT_lock_unset(&b->theaplock);
2989 439 : }
2990 :
2991 : ValPtr
2992 215806091 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2993 : {
2994 215806091 : PROPrec *p;
2995 :
2996 215806091 : p = b->tprops;
2997 215813257 : while (p && p->id != idx)
2998 7166 : p = p->next;
2999 215806091 : return p ? &p->v : NULL;
3000 : }
3001 :
3002 : void
3003 423138 : BATrmprop_nolock(BAT *b, enum prop_t idx)
3004 : {
3005 423138 : PROPrec *prop = b->tprops, *prev = NULL;
3006 :
3007 423373 : while (prop) {
3008 423246 : if (prop->id == idx) {
3009 423011 : if (prev)
3010 106 : prev->next = prop->next;
3011 : else
3012 422905 : b->tprops = prop->next;
3013 423011 : VALclear(&prop->v);
3014 423011 : GDKfree(prop);
3015 423011 : return;
3016 : }
3017 235 : prev = prop;
3018 235 : prop = prop->next;
3019 : }
3020 : }
3021 :
3022 : ValPtr
3023 428050 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3024 : {
3025 428050 : PROPrec *p;
3026 :
3027 428050 : p = b->tprops;
3028 436645 : while (p && p->id != idx)
3029 8595 : p = p->next;
3030 428050 : if (p == NULL) {
3031 428043 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3032 : /* properties are hints, so if we can't create
3033 : * one we ignore the error */
3034 0 : GDKclrerr();
3035 0 : return NULL;
3036 : }
3037 428043 : p->id = idx;
3038 428043 : p->next = b->tprops;
3039 428043 : p->v.vtype = 0;
3040 428043 : b->tprops = p;
3041 : } else {
3042 7 : VALclear(&p->v);
3043 : }
3044 428050 : if (VALinit(&p->v, type, v) == NULL) {
3045 : /* failed to initialize, so remove property */
3046 0 : BATrmprop_nolock(b, idx);
3047 0 : GDKclrerr();
3048 0 : p = NULL;
3049 : }
3050 0 : return p ? &p->v : NULL;
3051 : }
3052 :
3053 : ValPtr
3054 3453493 : BATgetprop(BAT *b, enum prop_t idx)
3055 : {
3056 3453493 : ValPtr p;
3057 :
3058 3453493 : MT_lock_set(&b->theaplock);
3059 3456931 : p = BATgetprop_nolock(b, idx);
3060 3456689 : MT_lock_unset(&b->theaplock);
3061 3457953 : return p;
3062 : }
3063 :
3064 : ValPtr
3065 4810 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3066 : {
3067 4810 : ValPtr p;
3068 4810 : MT_lock_set(&b->theaplock);
3069 4810 : p = BATsetprop_nolock(b, idx, type, v);
3070 4810 : MT_lock_unset(&b->theaplock);
3071 4810 : return p;
3072 : }
3073 :
3074 : void
3075 2 : BATrmprop(BAT *b, enum prop_t idx)
3076 : {
3077 2 : MT_lock_set(&b->theaplock);
3078 2 : BATrmprop_nolock(b, idx);
3079 2 : MT_lock_unset(&b->theaplock);
3080 2 : }
3081 :
3082 : /*
3083 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3084 : * non-nil tail value.
3085 : * This function does not fail (the callers currently don't check for failure).
3086 : */
3087 : BUN
3088 2255 : BATcount_no_nil(BAT *b, BAT *s)
3089 : {
3090 2255 : BUN cnt = 0;
3091 2255 : const void *restrict p, *restrict nil;
3092 2255 : const char *restrict base;
3093 2255 : int t;
3094 2255 : int (*cmp)(const void *, const void *);
3095 2255 : struct canditer ci;
3096 2255 : oid hseq;
3097 :
3098 2255 : BATcheck(b, 0);
3099 :
3100 2255 : hseq = b->hseqbase;
3101 2255 : canditer_init(&ci, b, s);
3102 2256 : BATiter bi = bat_iterator(b);
3103 2254 : if (bi.nonil) {
3104 2017 : bat_iterator_end(&bi);
3105 2019 : return ci.ncand;
3106 : }
3107 237 : p = bi.base;
3108 237 : t = ATOMbasetype(bi.type);
3109 237 : switch (t) {
3110 0 : case TYPE_void:
3111 0 : cnt = ci.ncand * BATtdensebi(&bi);
3112 0 : break;
3113 0 : case TYPE_msk:
3114 0 : cnt = ci.ncand;
3115 0 : break;
3116 16 : case TYPE_bte:
3117 32 : CAND_LOOP(&ci)
3118 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3119 : break;
3120 0 : case TYPE_sht:
3121 0 : CAND_LOOP(&ci)
3122 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3123 : break;
3124 111 : case TYPE_int:
3125 2379558 : CAND_LOOP(&ci)
3126 2379447 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3127 : break;
3128 85 : case TYPE_lng:
3129 156873 : CAND_LOOP(&ci)
3130 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 : #ifdef HAVE_HGE
3133 0 : case TYPE_hge:
3134 0 : CAND_LOOP(&ci)
3135 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3136 : break;
3137 : #endif
3138 0 : case TYPE_flt:
3139 0 : CAND_LOOP(&ci)
3140 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3141 : break;
3142 0 : case TYPE_dbl:
3143 0 : CAND_LOOP(&ci)
3144 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3145 : break;
3146 0 : case TYPE_uuid:
3147 0 : CAND_LOOP(&ci)
3148 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3149 : break;
3150 25 : case TYPE_str:
3151 25 : base = bi.vh->base;
3152 25 : switch (bi.width) {
3153 23 : case 1:
3154 4829 : CAND_LOOP(&ci)
3155 4806 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3156 : break;
3157 2 : case 2:
3158 312 : CAND_LOOP(&ci)
3159 310 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3160 : break;
3161 0 : case 4:
3162 0 : CAND_LOOP(&ci)
3163 0 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3164 : break;
3165 : #if SIZEOF_VAR_T == 8
3166 0 : case 8:
3167 0 : CAND_LOOP(&ci)
3168 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3169 : break;
3170 : #endif
3171 : default:
3172 0 : MT_UNREACHABLE();
3173 : }
3174 : break;
3175 0 : default:
3176 0 : nil = ATOMnilptr(t);
3177 0 : cmp = ATOMcompare(t);
3178 0 : if (nil == NULL) {
3179 0 : cnt = ci.ncand;
3180 0 : } else if (b->tvheap) {
3181 0 : base = b->tvheap->base;
3182 0 : CAND_LOOP(&ci)
3183 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3184 : } else {
3185 0 : CAND_LOOP(&ci)
3186 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3187 : }
3188 : break;
3189 : }
3190 237 : if (cnt == bi.count) {
3191 35 : MT_lock_set(&b->theaplock);
3192 35 : if (cnt == BATcount(b) && bi.h == b->theap) {
3193 : /* we learned something */
3194 35 : b->tnonil = true;
3195 35 : assert(!b->tnil);
3196 35 : b->tnil = false;
3197 : }
3198 35 : bat pbid = VIEWtparent(b);
3199 35 : MT_lock_unset(&b->theaplock);
3200 35 : if (pbid) {
3201 21 : BAT *pb = BATdescriptor(pbid);
3202 21 : if (pb) {
3203 21 : MT_lock_set(&pb->theaplock);
3204 21 : if (cnt == BATcount(pb) &&
3205 0 : bi.h == pb->theap &&
3206 0 : !pb->tnonil) {
3207 0 : pb->tnonil = true;
3208 0 : assert(!pb->tnil);
3209 0 : pb->tnil = false;
3210 : }
3211 21 : MT_lock_unset(&pb->theaplock);
3212 21 : BBPunfix(pb->batCacheid);
3213 : }
3214 : }
3215 : }
3216 237 : bat_iterator_end(&bi);
3217 237 : return cnt;
3218 : }
|