Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
15 : * Common BAT Operations
16 : * We factor out all possible overhead by inlining code. This
17 : * includes the macros BUNhead and BUNtail, which do a test to see
18 : * whether the atom resides in the buns or in a variable storage
19 : * heap.
20 : */
21 : #include "monetdb_config.h"
22 : #include "gdk.h"
23 : #include "gdk_private.h"
24 :
25 : gdk_return
26 57414290 : unshare_varsized_heap(BAT *b)
27 : {
28 57414290 : if (ATOMvarsized(b->ttype) &&
29 24042954 : b->tvheap->parentid != b->batCacheid) {
30 974 : Heap *h = GDKmalloc(sizeof(Heap));
31 977 : if (h == NULL)
32 : return GDK_FAIL;
33 977 : MT_thread_setalgorithm("unshare vheap");
34 1952 : *h = (Heap) {
35 976 : .parentid = b->batCacheid,
36 976 : .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
37 : .refs = ATOMIC_VAR_INIT(1),
38 : };
39 976 : strconcat_len(h->filename, sizeof(h->filename),
40 976 : BBP_physical(b->batCacheid), ".theap", NULL);
41 976 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
42 0 : HEAPfree(h, true);
43 0 : GDKfree(h);
44 0 : return GDK_FAIL;
45 : }
46 974 : MT_lock_set(&b->theaplock);
47 976 : Heap *oh = b->tvheap;
48 976 : b->tvheap = h;
49 976 : MT_lock_unset(&b->theaplock);
50 977 : BBPrelease(oh->parentid);
51 974 : HEAPdecref(oh, false);
52 : }
53 : return GDK_SUCCEED;
54 : }
55 :
56 : /* We try to be clever when appending one string bat to another.
57 : * First of all, we try to actually share the string heap so that we
58 : * don't need an extra copy, and if that can't be done, we see whether
59 : * it makes sense to just quickly copy the whole string heap instead
60 : * of inserting individual strings. See the comments in the code for
61 : * more information. */
62 : static gdk_return
63 121761 : insert_string_bat(BAT *b, BATiter *ni, struct canditer *ci, bool force, bool mayshare, QryCtx *qry_ctx)
64 : {
65 121761 : size_t toff = ~(size_t) 0; /* tail offset */
66 121761 : BUN p, r; /* loop variables */
67 121761 : const void *tp = NULL; /* tail value pointer */
68 121761 : var_t v;
69 121761 : size_t off; /* offset within n's string heap */
70 121761 : BUN cnt = ci->ncand;
71 121761 : BUN oldcnt = BATcount(b);
72 :
73 121761 : assert(b->ttype == TYPE_str);
74 121761 : assert(b->tbaseoff == 0);
75 121761 : assert(b->theap->parentid == b->batCacheid);
76 : /* only transient bats can use some other bat's string heap */
77 121761 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
78 121761 : if (cnt == 0)
79 : return GDK_SUCCEED;
80 :
81 121774 : if (b->tvheap == ni->vh) {
82 : /* vheaps are already shared, continue doing so: we just
83 : * need to append the offsets */
84 51519 : toff = 0;
85 51519 : MT_thread_setalgorithm("shared vheap");
86 70255 : } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
87 : /* we can share the vheaps, so we then only need to
88 : * append the offsets */
89 56281 : MT_lock_set(&b->theaplock);
90 56434 : bat bid = b->tvheap->parentid;
91 56434 : HEAPdecref(b->tvheap, bid == b->batCacheid);
92 56511 : HEAPincref(ni->vh);
93 56518 : b->tvheap = ni->vh;
94 56518 : b->tascii = ni->ascii;
95 56518 : MT_lock_unset(&b->theaplock);
96 56533 : BBPretain(ni->vh->parentid);
97 56522 : if (bid != b->batCacheid)
98 0 : BBPrelease(bid);
99 56522 : toff = 0;
100 56522 : MT_thread_setalgorithm("share vheap");
101 : } else {
102 : /* no heap sharing, so also make sure the heap isn't
103 : * shared currently (we're not allowed to write in
104 : * another bat's heap) */
105 14951 : if (b->tvheap->parentid != b->batCacheid &&
106 975 : unshare_varsized_heap(b) != GDK_SUCCEED) {
107 : return GDK_FAIL;
108 : }
109 13976 : if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
110 78 : !GDK_ELIMDOUBLES(ni->vh))) {
111 : /* we'll consider copying the string heap completely
112 : *
113 : * we first estimate how much space the string heap
114 : * should occupy, given the number of rows we need to
115 : * insert, then, if that is way smaller than the actual
116 : * space occupied, we will skip the copy and just insert
117 : * one by one */
118 : size_t len = 0;
119 5784145 : for (int i = 0; i < 1024; i++) {
120 5778469 : p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
121 5736072 : p = canditer_idx(ci, p) - ni->b->hseqbase;
122 5742528 : len += strlen(BUNtvar(*ni, p)) + 1;
123 : }
124 5676 : len = (len + 512) / 1024; /* rounded average length */
125 5676 : r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
126 : /* r is estimate of number of strings in
127 : * double-eliminated area */
128 5676 : BUN ecnt = ci->ncand;
129 5676 : if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
130 54 : ecnt = (BUN)ni->b->tunique_est;
131 5676 : if (r < ecnt)
132 914 : len = GDK_ELIMLIMIT + (ecnt - r) * len;
133 : else
134 4762 : len = GDK_STRHASHSIZE + ecnt * (len + 12);
135 : /* len is total estimated expected size of vheap */
136 :
137 5676 : if (len > ni->vhfree / 2) {
138 : /* we copy the string heap, perhaps appending */
139 5664 : if (oldcnt == 0) {
140 5629 : toff = 0;
141 5629 : MT_thread_setalgorithm("copy vheap");
142 : } else {
143 35 : toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
144 35 : MT_thread_setalgorithm("append vheap");
145 : }
146 :
147 5665 : MT_lock_set(&b->theaplock);
148 5662 : if (HEAPgrow(&b->tvheap, toff + ni->vhfree, force) != GDK_SUCCEED) {
149 0 : MT_lock_unset(&b->theaplock);
150 0 : return GDK_FAIL;
151 : }
152 5663 : memcpy(b->tvheap->base + toff, ni->vh->base, ni->vhfree);
153 5663 : b->tvheap->free = toff + ni->vhfree;
154 5663 : b->tvheap->dirty = true;
155 5663 : b->tascii &= ni->ascii;
156 5663 : MT_lock_unset(&b->theaplock);
157 : }
158 : }
159 : }
160 : /* if toff has the initial value of ~0, we insert strings
161 : * individually, otherwise we only copy (insert) offsets */
162 113700 : if (toff == ~(size_t) 0)
163 : v = GDK_VAROFFSET;
164 : else
165 113617 : v = b->tvheap->free - 1;
166 :
167 : /* make sure there is (vertical) space in the offset heap, we
168 : * may also widen thanks to v, set above */
169 122021 : if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
170 : return GDK_FAIL;
171 : }
172 :
173 121996 : if (toff == 0 && ni->width == b->twidth && ci->tpe == cand_dense) {
174 : /* we don't need to do any translation of offset
175 : * values, so we can use fast memcpy */
176 113387 : MT_thread_setalgorithm("memcpy offsets");
177 113503 : memcpy(Tloc(b, BATcount(b)), (const char *) ni->base + ((ci->seq - ni->b->hseqbase) << ni->shift), cnt << ni->shift);
178 8609 : } else if (toff != ~(size_t) 0) {
179 : /* we don't need to insert any actual strings since we
180 : * have already made sure that they are all in b's
181 : * string heap at known locations (namely the offset
182 : * in n added to toff), so insert offsets from n after
183 : * adding toff into b */
184 : /* note the use of the "restrict" qualifier here: all
185 : * four pointers below point to the same value, but
186 : * only one of them will actually be used, hence we
187 : * still obey the rule for restrict-qualified
188 : * pointers */
189 200 : const uint8_t *restrict tbp = (const uint8_t *) ni->base;
190 200 : const uint16_t *restrict tsp = (const uint16_t *) ni->base;
191 200 : const uint32_t *restrict tip = (const uint32_t *) ni->base;
192 : #if SIZEOF_VAR_T == 8
193 200 : const uint64_t *restrict tlp = (const uint64_t *) ni->base;
194 : #endif
195 :
196 200 : MT_thread_setalgorithm("copy offset values");
197 201 : r = b->batCount;
198 3526968 : TIMEOUT_LOOP(cnt, qry_ctx) {
199 3526371 : p = canditer_next(ci) - ni->b->hseqbase;
200 3526371 : switch (ni->width) {
201 220 : case 1:
202 220 : v = (var_t) tbp[p] + GDK_VAROFFSET;
203 220 : break;
204 4641 : case 2:
205 4641 : v = (var_t) tsp[p] + GDK_VAROFFSET;
206 4641 : break;
207 3521510 : case 4:
208 3521510 : v = (var_t) tip[p];
209 3521510 : break;
210 : #if SIZEOF_VAR_T == 8
211 0 : case 8:
212 0 : v = (var_t) tlp[p];
213 0 : break;
214 : #endif
215 : default:
216 0 : MT_UNREACHABLE();
217 : }
218 3526371 : v = (var_t) ((size_t) v + toff);
219 3526371 : assert(v >= GDK_VAROFFSET);
220 3526371 : assert((size_t) v < b->tvheap->free);
221 3526371 : switch (b->twidth) {
222 4635 : case 1:
223 4635 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
224 4635 : ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
225 4635 : break;
226 875 : case 2:
227 875 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
228 875 : ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
229 875 : break;
230 3520861 : case 4:
231 : #if SIZEOF_VAR_T == 8
232 3520861 : assert(v < ((var_t) 1 << 32));
233 : #endif
234 3520861 : ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
235 3520861 : break;
236 : #if SIZEOF_VAR_T == 8
237 0 : case 8:
238 0 : ((uint64_t *) b->theap->base)[r++] = (uint64_t) v;
239 0 : break;
240 : #endif
241 : default:
242 3526371 : MT_UNREACHABLE();
243 : }
244 : }
245 8409 : } else if (b->tvheap->free < ni->vhfree / 2 ||
246 : GDK_ELIMDOUBLES(b->tvheap)) {
247 : /* if b's string heap is much smaller than n's string
248 : * heap, don't bother checking whether n's string
249 : * values occur in b's string heap; also, if b is
250 : * (still) fully double eliminated, we must continue
251 : * to use the double elimination mechanism */
252 8367 : r = b->batCount;
253 8367 : oid hseq = ni->b->hseqbase;
254 8367 : MT_thread_setalgorithm("insert string values");
255 11422089 : TIMEOUT_LOOP(cnt, qry_ctx) {
256 11404877 : p = canditer_next(ci) - hseq;
257 11246079 : tp = BUNtvar(*ni, p);
258 11262735 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
259 : return GDK_FAIL;
260 : }
261 11404913 : r++;
262 : }
263 : } else {
264 : /* Insert values from n individually into b; however,
265 : * we check whether there is a string in b's string
266 : * heap at the same offset as the string is in n's
267 : * string heap (in case b's string heap is a copy of
268 : * n's). If this is the case, we just copy the
269 : * offset, otherwise we insert normally. */
270 42 : r = b->batCount;
271 42 : MT_thread_setalgorithm("insert string values with check");
272 6594 : TIMEOUT_LOOP(cnt, qry_ctx) {
273 6510 : p = canditer_next(ci) - ni->b->hseqbase;
274 6510 : off = BUNtvaroff(*ni, p); /* the offset */
275 6510 : tp = ni->vh->base + off; /* the string */
276 6510 : if (off < b->tvheap->free &&
277 6510 : strcmp(b->tvheap->base + off, tp) == 0) {
278 : /* we found the string at the same
279 : * offset in b's string heap as it was
280 : * in n's string heap, so we don't
281 : * have to insert a new string into b:
282 : * we can just copy the offset */
283 2870 : v = (var_t) off;
284 2870 : switch (b->twidth) {
285 0 : case 1:
286 0 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
287 0 : ((uint8_t *) b->theap->base)[r] = (uint8_t) (v - GDK_VAROFFSET);
288 0 : break;
289 4 : case 2:
290 4 : assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
291 4 : ((uint16_t *) b->theap->base)[r] = (uint16_t) (v - GDK_VAROFFSET);
292 4 : break;
293 2866 : case 4:
294 : #if SIZEOF_VAR_T == 8
295 2866 : assert(v < ((var_t) 1 << 32));
296 : #endif
297 2866 : ((uint32_t *) b->theap->base)[r] = (uint32_t) v;
298 2866 : break;
299 : #if SIZEOF_VAR_T == 8
300 0 : case 8:
301 0 : ((uint64_t *) b->theap->base)[r] = (uint64_t) v;
302 0 : break;
303 : #endif
304 : default:
305 0 : MT_UNREACHABLE();
306 : }
307 : } else {
308 3640 : if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
309 : return GDK_FAIL;
310 : }
311 : }
312 6510 : r++;
313 : }
314 : }
315 122154 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
316 121840 : MT_rwlock_wrlock(&b->thashlock);
317 122131 : MT_lock_set(&b->theaplock);
318 122050 : BATsetcount(b, oldcnt + ci->ncand);
319 122082 : assert(b->batCapacity >= b->batCount);
320 122082 : MT_lock_unset(&b->theaplock);
321 : /* maintain hash */
322 125173 : for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
323 2989 : HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
324 : }
325 122184 : BUN nunique = b->thash ? b->thash->nunique : 0;
326 122184 : MT_rwlock_wrunlock(&b->thashlock);
327 122158 : if (nunique != 0) {
328 9 : MT_lock_set(&b->theaplock);
329 9 : b->tunique_est = (double) nunique;
330 9 : MT_lock_unset(&b->theaplock);
331 : }
332 : return GDK_SUCCEED;
333 : }
334 :
335 : static gdk_return
336 513 : append_varsized_bat(BAT *b, BATiter *ni, struct canditer *ci, bool mayshare)
337 : {
338 513 : BUN cnt = ci->ncand, r;
339 513 : oid hseq = ni->b->hseqbase;
340 :
341 : /* only transient bats can use some other bat's vheap */
342 513 : assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
343 : /* make sure the bats use var_t */
344 513 : assert(b->twidth == ni->width);
345 513 : assert(b->twidth == SIZEOF_VAR_T);
346 513 : if (cnt == 0)
347 : return GDK_SUCCEED;
348 514 : if (cnt > BATcapacity(b) - BATcount(b)) {
349 : /* if needed space exceeds a normal growth extend just
350 : * with what's needed */
351 18 : BUN ncap = BATcount(b) + cnt;
352 18 : BUN grows = BATgrows(b);
353 :
354 18 : if (ncap > grows)
355 : grows = ncap;
356 18 : if (BATextend(b, grows) != GDK_SUCCEED)
357 : return GDK_FAIL;
358 : }
359 514 : if (mayshare &&
360 515 : BATcount(b) == 0 &&
361 233 : b->batRole == TRANSIENT &&
362 148 : ni->restricted == BAT_READ &&
363 148 : b->tvheap != ni->vh) {
364 : /* if b is still empty, in the transient farm, and n
365 : * is read-only, we replace b's vheap with a reference
366 : * to n's */
367 148 : MT_lock_set(&b->theaplock);
368 148 : bat bid = b->tvheap->parentid;
369 148 : HEAPdecref(b->tvheap, true);
370 148 : HEAPincref(ni->vh);
371 148 : b->tvheap = ni->vh;
372 148 : MT_lock_unset(&b->theaplock);
373 148 : BBPretain(ni->vh->parentid);
374 148 : if (bid != b->batCacheid)
375 0 : BBPrelease(bid);
376 : }
377 514 : if (b->tvheap == ni->vh) {
378 : /* if b and n use the same vheap, we only need to copy
379 : * the offsets from n to b */
380 341 : if (ci->tpe == cand_dense) {
381 : /* fast memcpy since we copy a consecutive
382 : * chunk of memory */
383 341 : memcpy(Tloc(b, BATcount(b)),
384 341 : (const var_t *) ni->base + (ci->seq - hseq),
385 341 : cnt << b->tshift);
386 : } else {
387 0 : var_t *restrict dst = (var_t *) Tloc(b, BATcount(b));
388 0 : const var_t *restrict src = (const var_t *) ni->base;
389 0 : while (cnt > 0) {
390 0 : cnt--;
391 0 : *dst++ = src[canditer_next(ci) - hseq];
392 : }
393 : }
394 341 : MT_rwlock_wrlock(&b->thashlock);
395 341 : MT_lock_set(&b->theaplock);
396 341 : BATsetcount(b, BATcount(b) + ci->ncand);
397 341 : MT_lock_unset(&b->theaplock);
398 : /* maintain hash table */
399 341 : for (BUN i = BATcount(b) - ci->ncand;
400 341 : b->thash && i < BATcount(b);
401 0 : i++) {
402 0 : HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
403 : }
404 341 : BUN nunique = b->thash ? b->thash->nunique : 0;
405 341 : MT_rwlock_wrunlock(&b->thashlock);
406 341 : if (nunique != 0) {
407 0 : MT_lock_set(&b->theaplock);
408 0 : b->tunique_est = (double) nunique;
409 0 : MT_lock_unset(&b->theaplock);
410 : }
411 341 : return GDK_SUCCEED;
412 : }
413 : /* b and n do not share their vheap, so we need to copy data */
414 173 : if (b->tvheap->parentid != b->batCacheid) {
415 : /* if b shares its vheap with some other bat, unshare it */
416 18 : Heap *h = GDKmalloc(sizeof(Heap));
417 18 : if (h == NULL) {
418 : return GDK_FAIL;
419 : }
420 36 : *h = (Heap) {
421 18 : .parentid = b->batCacheid,
422 18 : .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
423 : .refs = ATOMIC_VAR_INIT(1),
424 : };
425 18 : strconcat_len(h->filename, sizeof(h->filename),
426 18 : BBP_physical(b->batCacheid), ".theap", NULL);
427 18 : if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
428 0 : HEAPfree(h, true);
429 0 : GDKfree(h);
430 0 : return GDK_FAIL;
431 : }
432 18 : MT_lock_set(&b->theaplock);
433 18 : Heap *oh = b->tvheap;
434 18 : b->tvheap = h;
435 18 : MT_lock_unset(&b->theaplock);
436 18 : if (oh->parentid != b->batCacheid)
437 18 : BBPrelease(oh->parentid);
438 18 : HEAPdecref(oh, false);
439 : }
440 173 : if (BATcount(b) == 0 &&
441 85 : ci->tpe == cand_dense && ci->ncand == ni->count) {
442 : /* just copy the heaps */
443 85 : MT_lock_set(&b->theaplock);
444 85 : if (HEAPgrow(&b->tvheap, ni->vhfree, false) != GDK_SUCCEED) {
445 0 : MT_lock_unset(&b->theaplock);
446 0 : return GDK_FAIL;
447 : }
448 82 : memcpy(b->theap->base, ni->base, ni->hfree);
449 82 : memcpy(b->tvheap->base, ni->vh->base, ni->vhfree);
450 82 : b->theap->free = ni->hfree;
451 82 : b->theap->dirty = true;
452 82 : b->tvheap->free = ni->vhfree;
453 82 : b->tvheap->dirty = true;
454 82 : BATsetcount(b, ni->count);
455 85 : b->tnil = ni->nil;
456 85 : b->tnonil = ni->nonil;
457 85 : b->tsorted = ni->sorted;
458 85 : b->tnosorted = ni->nosorted;
459 85 : b->trevsorted = ni->revsorted;
460 85 : b->tnorevsorted = ni->norevsorted;
461 85 : b->tkey = ni->key;
462 85 : b->tnokey[0] = ni->nokey[0];
463 85 : b->tnokey[1] = ni->nokey[1];
464 85 : b->tminpos = ni->minpos;
465 85 : b->tmaxpos = ni->maxpos;
466 85 : b->tunique_est = ni->unique_est;
467 85 : MT_lock_unset(&b->theaplock);
468 84 : return GDK_SUCCEED;
469 : }
470 : /* copy data from n to b */
471 : r = BATcount(b);
472 380593 : for (BUN i = 0; i < cnt; i++) {
473 380503 : BUN p = canditer_next(ci) - hseq;
474 380503 : const void *t = BUNtvar(*ni, p);
475 380503 : if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
476 : return GDK_FAIL;
477 : }
478 380505 : r++;
479 : }
480 90 : MT_rwlock_wrlock(&b->thashlock);
481 90 : if (b->thash) {
482 0 : r -= cnt;
483 0 : BATiter bi = bat_iterator_nolock(b);
484 0 : for (BUN i = 0; i < cnt; i++) {
485 0 : const void *t = BUNtvar(bi, r);
486 0 : HASHappend_locked(b, r, t);
487 0 : r++;
488 : }
489 : }
490 90 : BUN nunique = b->thash ? b->thash->nunique : 0;
491 90 : MT_lock_set(&b->theaplock);
492 89 : BATsetcount(b, r);
493 90 : if (nunique != 0)
494 0 : b->tunique_est = (double) nunique;
495 90 : MT_lock_unset(&b->theaplock);
496 90 : MT_rwlock_wrunlock(&b->thashlock);
497 90 : return GDK_SUCCEED;
498 : }
499 :
500 : static gdk_return
501 127 : append_msk_bat(BAT *b, BATiter *ni, struct canditer *ci)
502 : {
503 127 : if (ci->ncand == 0)
504 : return GDK_SUCCEED;
505 127 : if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
506 : return GDK_FAIL;
507 :
508 127 : MT_lock_set(&b->theaplock);
509 :
510 127 : uint32_t boff = b->batCount % 32;
511 127 : uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
512 127 : b->batCount += ci->ncand;
513 127 : b->theap->dirty = true;
514 127 : b->theap->free = ((b->batCount + 31) / 32) * 4;
515 127 : if (ci->tpe == cand_dense) {
516 127 : const uint32_t *np;
517 127 : uint32_t noff, mask;
518 127 : BUN cnt;
519 127 : noff = (ci->seq - ni->b->hseqbase) % 32;
520 127 : cnt = ci->ncand;
521 127 : np = (const uint32_t *) ni->base + (ci->seq - ni->b->hseqbase) / 32;
522 127 : if (boff == noff) {
523 : /* words of b and n are aligned, so we don't
524 : * need to shift bits around */
525 47 : if (boff + cnt <= 32) {
526 : /* all new bits within one word */
527 41 : if (cnt == 32) {
528 0 : *bp = *np;
529 : } else {
530 41 : mask = ((1U << cnt) - 1) << boff;
531 41 : *bp &= ~mask;
532 41 : *bp |= *np & mask;
533 : }
534 : } else {
535 : /* multiple words of b are affected */
536 6 : if (boff != 0) {
537 : /* first fill up the rest of the first
538 : * word */
539 0 : mask = ~0U << boff;
540 0 : *bp &= ~mask;
541 0 : *bp++ |= *np++ & mask;
542 0 : cnt -= 32 - boff;
543 : }
544 6 : if (cnt >= 32) {
545 : /* copy an integral number of words fast */
546 6 : BUN nw = cnt / 32;
547 6 : memcpy(bp, np, nw*sizeof(int));
548 6 : bp += nw;
549 6 : np += nw;
550 6 : cnt %= 32;
551 : }
552 6 : if (cnt > 0) {
553 : /* do the left over bits */
554 6 : mask = (1U << cnt) - 1;
555 6 : *bp = *np & mask;
556 : }
557 : }
558 80 : } else if (boff > noff) {
559 80 : if (boff + cnt <= 32) {
560 : /* we only need to copy bits from a
561 : * single word of n to a single word
562 : * of b */
563 : /* boff > 0, so cnt < 32, hence the
564 : * shift is ok */
565 71 : mask = (1U << cnt) - 1;
566 71 : *bp &= ~(mask << boff);
567 71 : *bp |= (*np & (mask << noff)) << (boff - noff);
568 : } else {
569 : /* first fill the rest of the last partial
570 : * word of b, so that's 32-boff bits */
571 9 : mask = (1U << (32 - boff)) - 1;
572 9 : *bp &= ~(mask << boff);
573 9 : *bp++ |= (*np & (mask << noff)) << (boff - noff);
574 9 : cnt -= 32 - boff;
575 :
576 : /* set boff and noff to the amount we need to
577 : * shift bits in consecutive words of n around
578 : * to fit into the next word of b; set mask to
579 : * the mask of the bottom bits of n that fit
580 : * in a word of b (and the complement are the
581 : * top bits that go to another word of b) */
582 9 : boff -= noff;
583 9 : noff = 32 - boff;
584 9 : mask = (1U << noff) - 1;
585 141 : while (cnt >= 32) {
586 132 : *bp = (*np++ & ~mask) >> noff;
587 132 : *bp++ |= (*np & mask) << boff;
588 132 : cnt -= 32;
589 : }
590 9 : if (cnt > noff) {
591 : /* the last bits come from two words
592 : * in n */
593 5 : *bp = (*np++ & ~mask) >> noff;
594 5 : cnt -= noff;
595 5 : mask = (1U << cnt) - 1;
596 5 : *bp++ |= (*np & mask) << boff;
597 4 : } else if (cnt > 0) {
598 : /* the last bits come from a single
599 : * word in n */
600 4 : mask = ((1U << cnt) - 1) << noff;
601 4 : *bp = (*np & mask) >> noff;
602 : }
603 : }
604 : } else {
605 : /* boff < noff */
606 0 : if (noff + cnt <= 32) {
607 : /* only need part of the first word of n */
608 0 : assert(cnt < 32); /* noff > 0, so cnt < 32 */
609 0 : mask = (1U << cnt) - 1;
610 0 : *bp &= ~(mask << boff);
611 0 : *bp |= (*np & (mask << noff)) >> (noff - boff);
612 0 : } else if (boff + cnt <= 32) {
613 : /* only need to fill a single word of
614 : * b, but from two of n */
615 0 : if (cnt < 32)
616 0 : *bp &= ~(((1U << cnt) - 1) << boff);
617 : else
618 0 : *bp = 0;
619 0 : mask = ~((1U << noff) - 1);
620 0 : *bp |= (*np++ & mask) >> (noff - boff);
621 0 : cnt -= 32 - noff;
622 0 : mask = (1U << cnt) - 1;
623 0 : *bp |= (*np & mask) << (32 - noff);
624 : } else {
625 0 : if (boff > 0) {
626 : /* fill the rest of the first word of b */
627 0 : cnt -= 32 - boff;
628 0 : *bp &= (1U << boff) - 1;
629 0 : mask = ~((1U << noff) - 1);
630 0 : noff -= boff;
631 0 : boff = 32 - noff;
632 0 : *bp |= (*np++ & mask) >> noff;
633 0 : *bp |= (*np & ((1U << noff) - 1)) << boff;
634 : } else {
635 0 : boff = 32 - noff;
636 : }
637 0 : mask = (1U << noff) - 1;
638 0 : while (cnt >= 32) {
639 0 : *bp = (*np++ & ~mask) >> noff;
640 0 : *bp++ |= (*np & mask) << boff;
641 0 : cnt -= 32;
642 : }
643 0 : if (cnt > 0) {
644 0 : *bp = (*np++ & ~mask) >> noff;
645 0 : if (cnt > noff)
646 0 : *bp++ |= (*np & mask) << boff;
647 : }
648 : }
649 : }
650 : } else {
651 0 : oid o;
652 0 : uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
653 0 : do {
654 0 : for (uint32_t i = boff; i < 32; i++) {
655 0 : o = canditer_next(ci);
656 0 : if (is_oid_nil(o))
657 : break;
658 0 : o -= ni->b->hseqbase;
659 0 : v |= (uint32_t) Tmskval(ni, o - ni->b->hseqbase) << i;
660 : }
661 0 : *bp++ = v;
662 0 : v = 0;
663 0 : boff = 0;
664 0 : } while (!is_oid_nil(o));
665 : }
666 127 : MT_lock_unset(&b->theaplock);
667 127 : return GDK_SUCCEED;
668 : }
669 :
670 : /* Append the contents of BAT n (subject to the optional candidate
671 : * list s) to BAT b. If b is empty, b will get the seqbase of s if it
672 : * was passed in, and else the seqbase of n. */
673 : static gdk_return
674 2297124 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
675 : {
676 2297124 : struct canditer ci;
677 2297124 : BUN r;
678 2297124 : oid hseq = n->hseqbase;
679 2297124 : char buf[64];
680 2297124 : lng t0 = 0;
681 2297124 : const ValRecord *prop = NULL;
682 2297124 : ValRecord minprop, maxprop;
683 2297124 : const void *minbound = NULL, *maxbound = NULL;
684 2297124 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
685 2297124 : bool hlocked = false;
686 :
687 2297124 : if (b == NULL || n == NULL || BATcount(n) == 0) {
688 : return GDK_SUCCEED;
689 : }
690 1325765 : assert(b->theap->parentid == b->batCacheid);
691 :
692 1325765 : TRC_DEBUG_IF(ALGO) {
693 0 : t0 = GDKusec();
694 0 : snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
695 : }
696 :
697 1325765 : ALIGNapp(b, force, GDK_FAIL);
698 :
699 3953910 : if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
700 0 : GDKerror("Incompatible operands ("ALGOBATFMT" vs. "ALGOBATFMT").\n", ALGOBATPAR(b), ALGOBATPAR(n));
701 0 : return GDK_FAIL;
702 : }
703 :
704 1329280 : if (BATttype(b) != BATttype(n) &&
705 : ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
706 0 : TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
707 : ATOMname(BATttype(n)), ATOMname(BATttype(b)));
708 : }
709 :
710 1329280 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
711 :
712 1329190 : BATiter ni = bat_iterator(n);
713 :
714 1329137 : canditer_init(&ci, n, s);
715 1329193 : if (ci.ncand == 0) {
716 0 : goto doreturn;
717 : }
718 :
719 1329193 : if (BATcount(b) + ci.ncand > BUN_MAX) {
720 0 : bat_iterator_end(&ni);
721 0 : GDKerror("combined BATs too large\n");
722 0 : return GDK_FAIL;
723 : }
724 :
725 1329193 : if (b->hseqbase + BATcount(b) + ci.ncand >= GDK_oid_max) {
726 0 : bat_iterator_end(&ni);
727 0 : GDKerror("overflow of head value\n");
728 0 : return GDK_FAIL;
729 : }
730 :
731 1329193 : OIDXdestroy(b);
732 1329731 : STRMPdestroy(b); /* TODO: use STRMPappendBitString */
733 1330662 : RTREEdestroy(b);
734 :
735 1328758 : MT_lock_set(&b->theaplock);
736 1328513 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
737 1327107 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
738 48 : VALcopy(&minprop, prop) != NULL) {
739 48 : minbound = VALptr(&minprop);
740 48 : if (ci.ncand == BATcount(n) &&
741 62 : ni.minpos != BUN_NONE &&
742 14 : atomcmp(BUNtail(ni, ni.minpos), minbound) < 0) {
743 0 : assert(0);
744 : GDKerror("value out of bounds\n");
745 : MT_lock_unset(&b->theaplock);
746 : goto bailout;
747 : }
748 : }
749 1327886 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
750 40 : VALcopy(&maxprop, prop) != NULL) {
751 40 : maxbound = VALptr(&maxprop);
752 40 : if (ci.ncand == BATcount(n) &&
753 52 : ni.maxpos != BUN_NONE &&
754 12 : atomcmp(BUNtail(ni, ni.maxpos), maxbound) >= 0) {
755 0 : assert(0);
756 : GDKerror("value out of bounds\n");
757 : MT_lock_unset(&b->theaplock);
758 : goto bailout;
759 : }
760 : }
761 :
762 1325551 : if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
763 399123 : if (ni.maxpos != BUN_NONE) {
764 152389 : BATiter bi = bat_iterator_nolock(b);
765 152389 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
766 146663 : if (s == NULL) {
767 146529 : b->tmaxpos = BATcount(b) + ni.maxpos;
768 : } else {
769 134 : b->tmaxpos = BUN_NONE;
770 : }
771 : }
772 : } else {
773 246734 : b->tmaxpos = BUN_NONE;
774 : }
775 : }
776 1325569 : if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
777 398397 : if (ni.minpos != BUN_NONE) {
778 151911 : BATiter bi = bat_iterator_nolock(b);
779 151911 : if (BATcount(b) == 0 || atomcmp(BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
780 145185 : if (s == NULL) {
781 145053 : b->tminpos = BATcount(b) + ni.minpos;
782 : } else {
783 132 : b->tminpos = BUN_NONE;
784 : }
785 : }
786 : } else {
787 246486 : b->tminpos = BUN_NONE;
788 : }
789 : }
790 1325569 : if (ci.ncand > BATcount(b) / gdk_unique_estimate_keep_fraction) {
791 1323754 : b->tunique_est = 0;
792 : }
793 1325569 : MT_lock_unset(&b->theaplock);
794 : /* load hash so that we can maintain it */
795 1330637 : (void) BATcheckhash(b);
796 :
797 1328597 : if (b->ttype == TYPE_void) {
798 : /* b does not have storage, keep it that way if we can */
799 17363 : HASHdestroy(b); /* we're not maintaining the hash here */
800 17363 : MT_lock_set(&b->theaplock);
801 17363 : if (BATtdensebi(&ni) && ci.tpe == cand_dense &&
802 16726 : (BATcount(b) == 0 ||
803 14019 : (BATtdense(b) &&
804 14019 : b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
805 : /* n is also dense and consecutive with b */
806 16667 : if (BATcount(b) == 0) {
807 2707 : if (minbound && n->tseqbase + ci.seq - hseq < *(const oid *)minbound) {
808 0 : assert(0);
809 : GDKerror("value not within bounds\n");
810 : MT_lock_unset(&b->theaplock);
811 : goto bailout;
812 : }
813 2707 : BATtseqbase(b, n->tseqbase + ci.seq - hseq);
814 : }
815 16666 : if (maxbound && b->tseqbase + BATcount(b) + ci.ncand >= *(const oid *)maxbound) {
816 0 : assert(0);
817 : GDKerror("value not within bounds\n");
818 : MT_lock_unset(&b->theaplock);
819 : goto bailout;
820 : }
821 16666 : BATsetcount(b, BATcount(b) + ci.ncand);
822 16662 : MT_lock_unset(&b->theaplock);
823 16665 : goto doreturn;
824 : }
825 696 : if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
826 20 : ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
827 : /* both b and n are void/nil */
828 0 : if (notnull) {
829 0 : assert(0);
830 : GDKerror("NULL value not within bounds\n");
831 : MT_lock_unset(&b->theaplock);
832 : goto bailout;
833 : }
834 0 : BATtseqbase(b, oid_nil);
835 0 : BATsetcount(b, BATcount(b) + ci.ncand);
836 0 : MT_lock_unset(&b->theaplock);
837 0 : goto doreturn;
838 : }
839 : /* we need to materialize b; allocate enough capacity */
840 696 : MT_lock_unset(&b->theaplock);
841 696 : if (BATmaterialize(b, BATcount(b) + ci.ncand) != GDK_SUCCEED) {
842 0 : goto bailout;
843 : }
844 : }
845 :
846 : /* property setting */
847 1311930 : MT_lock_set(&b->theaplock);
848 1312128 : r = BATcount(b);
849 :
850 1312128 : if (BATcount(b) == 0) {
851 388225 : b->tsorted = ni.sorted;
852 388225 : b->trevsorted = ni.revsorted;
853 388225 : b->tseqbase = oid_nil;
854 388225 : b->tnonil = ni.nonil;
855 388225 : b->tnil = ni.nil && ci.ncand == BATcount(n);
856 388225 : if (ci.tpe == cand_dense) {
857 388077 : b->tnosorted = ci.seq - hseq <= ni.nosorted && ni.nosorted < ci.seq + ci.ncand - hseq ? ni.nosorted + hseq - ci.seq : 0;
858 388077 : b->tnorevsorted = ci.seq - hseq <= ni.norevsorted && ni.norevsorted < ci.seq + ci.ncand - hseq ? ni.norevsorted + hseq - ci.seq : 0;
859 388077 : if (BATtdensebi(&ni)) {
860 1940 : b->tseqbase = n->tseqbase + ci.seq - hseq;
861 : }
862 : } else {
863 148 : b->tnosorted = 0;
864 148 : b->tnorevsorted = 0;
865 : }
866 388225 : b->tkey = ni.key;
867 388225 : if (ci.ncand == BATcount(n)) {
868 387216 : b->tnokey[0] = ni.nokey[0];
869 387216 : b->tnokey[1] = ni.nokey[1];
870 : } else {
871 1009 : b->tnokey[0] = b->tnokey[1] = 0;
872 : }
873 : } else {
874 923903 : BUN last = r - 1;
875 923903 : BATiter bi = bat_iterator_nolock(b);
876 923985 : int xx = ATOMcmp(b->ttype,
877 : BUNtail(ni, ci.seq - hseq),
878 : BUNtail(bi, last));
879 922201 : if (b->tsorted && (!ni.sorted || xx < 0)) {
880 21398 : b->tsorted = false;
881 21398 : b->tnosorted = 0;
882 21398 : b->tseqbase = oid_nil;
883 : }
884 922201 : if (b->trevsorted &&
885 83524 : (!ni.revsorted || xx > 0)) {
886 17775 : b->trevsorted = false;
887 17775 : b->tnorevsorted = 0;
888 : }
889 922201 : if (b->tkey &&
890 70858 : (!(b->tsorted || b->trevsorted) ||
891 56582 : !ni.key || xx == 0)) {
892 19926 : BATkey(b, false);
893 : }
894 922145 : if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
895 764 : (!BATtdensebi(&ni) ||
896 896 : ci.tpe != cand_dense ||
897 448 : 1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
898 335 : b->tseqbase = oid_nil;
899 : }
900 922145 : b->tnonil &= ni.nonil;
901 1832255 : b->tnil |= ni.nil && ci.ncand == ni.count;
902 : }
903 1310370 : MT_lock_unset(&b->theaplock);
904 1313677 : if (b->ttype == TYPE_str) {
905 122078 : if (insert_string_bat(b, &ni, &ci, force, mayshare, qry_ctx) != GDK_SUCCEED) {
906 0 : goto bailout;
907 : }
908 1191599 : } else if (ATOMvarsized(b->ttype)) {
909 516 : if (append_varsized_bat(b, &ni, &ci, mayshare) != GDK_SUCCEED) {
910 0 : goto bailout;
911 : }
912 1191083 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
913 : /* no bounds and NOT_NULL property on MSK bats */
914 127 : assert(minbound == NULL && maxbound == NULL && !notnull);
915 127 : if (append_msk_bat(b, &ni, &ci) != GDK_SUCCEED) {
916 0 : goto bailout;
917 : }
918 : } else {
919 1190956 : if (ci.ncand > BATcapacity(b) - BATcount(b)) {
920 : /* if needed space exceeds a normal growth
921 : * extend just with what's needed */
922 10696 : BUN ncap = BATcount(b) + ci.ncand;
923 10696 : BUN grows = BATgrows(b);
924 :
925 10674 : if (ncap > grows)
926 : grows = ncap;
927 10674 : if (BATextend(b, grows) != GDK_SUCCEED) {
928 0 : goto bailout;
929 : }
930 : }
931 1191046 : MT_rwlock_wrlock(&b->thashlock);
932 1191674 : hlocked = true;
933 1191674 : if (b->ttype != TYPE_void &&
934 1191386 : ni.type != TYPE_void &&
935 1189513 : ci.tpe == cand_dense) {
936 : /* use fast memcpy if we can */
937 1189476 : memcpy(Tloc(b, BATcount(b)),
938 1189476 : (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
939 1189476 : ci.ncand << ni.shift);
940 1189489 : for (BUN i = 0; b->thash && i < ci.ncand; i++) {
941 4123 : HASHappend_locked(b, r, Tloc(b, r));
942 13 : r++;
943 : }
944 : } else {
945 2198 : const void *atomnil = ATOMnilptr(b->ttype);
946 725928 : TIMEOUT_LOOP(ci.ncand, qry_ctx) {
947 721509 : BUN p = canditer_next(&ci) - hseq;
948 721141 : const void *t = BUNtail(ni, p);
949 721685 : bool isnil = atomcmp(t, atomnil) == 0;
950 721046 : if (notnull && isnil) {
951 0 : assert(0);
952 : GDKerror("NULL value not within bounds\n");
953 : goto bailout;
954 721046 : } else if (minbound &&
955 721046 : !isnil &&
956 0 : atomcmp(t, minbound) < 0) {
957 0 : assert(0);
958 : GDKerror("value not within bounds\n");
959 : goto bailout;
960 721046 : } else if (maxbound &&
961 0 : !isnil &&
962 0 : atomcmp(t, maxbound) >= 0) {
963 0 : assert(0);
964 : GDKerror("value not within bounds\n");
965 : goto bailout;
966 721046 : } else if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
967 0 : goto bailout;
968 : }
969 721504 : if (b->thash)
970 0 : HASHappend_locked(b, r, t);
971 721513 : r++;
972 : }
973 2199 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
974 : }
975 1187564 : BUN nunique;
976 1187564 : nunique = b->thash ? b->thash->nunique : 0;
977 1187564 : MT_lock_set(&b->theaplock);
978 1189641 : BATsetcount(b, b->batCount + ci.ncand);
979 1189611 : if (nunique != 0)
980 5 : b->tunique_est = (double) nunique;
981 1189611 : MT_lock_unset(&b->theaplock);
982 1191578 : assert(hlocked);
983 1191578 : MT_rwlock_wrunlock(&b->thashlock);
984 1191578 : hlocked = false;
985 : }
986 :
987 1330512 : doreturn:
988 1330512 : bat_iterator_end(&ni);
989 1328120 : if (minbound)
990 48 : VALclear(&minprop);
991 1328675 : if (maxbound)
992 40 : VALclear(&maxprop);
993 1328675 : TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
994 : " -> " ALGOBATFMT " (" LLFMT " usec)\n",
995 : buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
996 : GDKusec() - t0);
997 :
998 : return GDK_SUCCEED;
999 : bailout:
1000 0 : if (hlocked)
1001 0 : MT_rwlock_wrunlock(&b->thashlock);
1002 0 : if (minbound)
1003 0 : VALclear(&minprop);
1004 0 : if (maxbound)
1005 0 : VALclear(&maxprop);
1006 0 : bat_iterator_end(&ni);
1007 0 : return GDK_FAIL;
1008 : }
1009 :
1010 : gdk_return
1011 2305632 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
1012 : {
1013 2305632 : return BATappend2(b, n, s, force, true);
1014 : }
1015 :
1016 : gdk_return
1017 4 : BATdel(BAT *b, BAT *d)
1018 : {
1019 4 : void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
1020 4 : MT_lock_set(&b->theaplock);
1021 4 : BATiter bi = bat_iterator_nolock(b);
1022 4 : MT_lock_unset(&b->theaplock);
1023 :
1024 4 : assert(ATOMtype(d->ttype) == TYPE_oid);
1025 4 : assert(d->tsorted);
1026 4 : assert(d->tkey);
1027 4 : if (BATcount(d) == 0)
1028 : return GDK_SUCCEED;
1029 4 : OIDXdestroy(b);
1030 4 : HASHdestroy(b);
1031 4 : PROPdestroy(b);
1032 4 : STRMPdestroy(b);
1033 4 : RTREEdestroy(b);
1034 4 : if (BATtdense(d)) {
1035 2 : oid o = d->tseqbase;
1036 2 : BUN c = BATcount(d);
1037 :
1038 2 : if (o + c <= b->hseqbase)
1039 : return GDK_SUCCEED;
1040 2 : if (o < b->hseqbase) {
1041 0 : c -= b->hseqbase - o;
1042 0 : o = b->hseqbase;
1043 : }
1044 2 : if (o - b->hseqbase < b->batInserted) {
1045 0 : GDKerror("cannot delete committed values\n");
1046 0 : return GDK_FAIL;
1047 : }
1048 2 : if (o + c > b->hseqbase + BATcount(b))
1049 0 : c = b->hseqbase + BATcount(b) - o;
1050 2 : if (c == 0)
1051 : return GDK_SUCCEED;
1052 2 : if (atmdel) {
1053 0 : BUN p = o - b->hseqbase;
1054 0 : BUN q = p + c;
1055 0 : while (p < q) {
1056 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
1057 0 : p++;
1058 : }
1059 : }
1060 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1061 : return GDK_FAIL;
1062 2 : MT_lock_set(&b->theaplock);
1063 2 : if (o + c < b->hseqbase + BATcount(b)) {
1064 0 : o -= b->hseqbase;
1065 0 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1066 0 : BUN n = BATcount(b) - (o + c);
1067 : /* not very efficient, but first see
1068 : * how much this is used */
1069 0 : for (BUN i = 0; i < n; i++)
1070 0 : mskSetVal(b, o + i,
1071 0 : mskGetVal(b, o + c + i));
1072 : } else {
1073 0 : memmove(Tloc(b, o),
1074 0 : Tloc(b, o + c),
1075 0 : b->twidth * (BATcount(b) - (o + c)));
1076 : }
1077 0 : b->theap->dirty = true;
1078 : // o += b->hseqbase; // if this were to be used again
1079 : }
1080 2 : b->batCount -= c;
1081 : } else {
1082 2 : BATiter di = bat_iterator(d);
1083 2 : const oid *o = (const oid *) di.base;
1084 2 : const oid *s;
1085 2 : BUN c = di.count;
1086 2 : BUN nd = 0;
1087 2 : BUN pos;
1088 2 : char *p = NULL;
1089 :
1090 2 : if (o[c - 1] <= b->hseqbase) {
1091 0 : bat_iterator_end(&di);
1092 0 : return GDK_SUCCEED;
1093 : }
1094 2 : while (*o < b->hseqbase) {
1095 0 : o++;
1096 0 : c--;
1097 : }
1098 2 : if (*o - b->hseqbase < b->batInserted) {
1099 0 : bat_iterator_end(&di);
1100 0 : GDKerror("cannot delete committed values\n");
1101 0 : return GDK_FAIL;
1102 : }
1103 2 : if (BATtdense(b) && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED) {
1104 0 : bat_iterator_end(&di);
1105 0 : return GDK_FAIL;
1106 : }
1107 2 : s = o;
1108 2 : pos = *o - b->hseqbase;
1109 2 : if (ATOMstorage(b->ttype) != TYPE_msk)
1110 2 : p = Tloc(b, pos);
1111 6 : while (c > 0 && *o < b->hseqbase + BATcount(b)) {
1112 4 : size_t n;
1113 4 : if (atmdel)
1114 0 : (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
1115 4 : o++;
1116 4 : c--;
1117 4 : nd++;
1118 4 : if (c == 0 || *o - b->hseqbase >= BATcount(b))
1119 2 : n = b->hseqbase + BATcount(b) - o[-1] - 1;
1120 2 : else if ((oid) (o - s) < *o - *s)
1121 2 : n = o[0] - o[-1] - 1;
1122 : else
1123 : n = 0;
1124 4 : if (n > 0) {
1125 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1126 0 : BUN opos = o[-1] + 1 - b->hseqbase;
1127 : /* not very efficient, but
1128 : * first see how much this is
1129 : * used */
1130 0 : for (BUN i = 0; i < n; i++) {
1131 0 : mskSetVal(b, pos + i,
1132 0 : mskGetVal(b, opos + i));
1133 : }
1134 0 : pos += n;
1135 : } else {
1136 2 : n *= b->twidth;
1137 2 : memmove(p,
1138 2 : Tloc(b, o[-1] + 1 - b->hseqbase),
1139 : n);
1140 2 : p += n;
1141 : }
1142 : s = o;
1143 : }
1144 : }
1145 2 : bat_iterator_end(&di);
1146 2 : MT_lock_set(&b->theaplock);
1147 2 : b->theap->dirty = true;
1148 2 : b->batCount -= nd;
1149 : }
1150 4 : if (b->batCount <= 1) {
1151 : /* some trivial properties */
1152 2 : b->tkey = true;
1153 2 : b->tsorted = b->trevsorted = true;
1154 2 : if (b->batCount == 0) {
1155 2 : b->tnil = false;
1156 2 : b->tnonil = true;
1157 : }
1158 : }
1159 : /* not sure about these anymore */
1160 4 : b->tnosorted = b->tnorevsorted = 0;
1161 4 : b->tnokey[0] = b->tnokey[1] = 0;
1162 4 : b->tminpos = BUN_NONE;
1163 4 : b->tmaxpos = BUN_NONE;
1164 4 : b->tunique_est = 0.0;
1165 4 : MT_lock_unset(&b->theaplock);
1166 :
1167 4 : return GDK_SUCCEED;
1168 : }
1169 :
1170 : /*
1171 : * Replace all values in b with values from n whose location is given by
1172 : * the oid in either p or positions.
1173 : * If positions is used, autoincr specifies whether it is the first of a
1174 : * dense range of positions or whether it is a full-blown array of
1175 : * position.
1176 : * If mayappend is set, the position in p/positions may refer to
1177 : * locations beyond the end of b.
1178 : */
1179 : static gdk_return
1180 120871 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
1181 : bool mayappend, bool autoincr, bool force)
1182 : {
1183 120871 : lng t0 = GDKusec();
1184 120944 : oid pos = oid_nil;
1185 120944 : BUN nunique = 0;
1186 :
1187 120944 : if (b == NULL || b->ttype == TYPE_void || n == NULL) {
1188 : return GDK_SUCCEED;
1189 : }
1190 : /* either p or positions */
1191 120944 : assert((p == NULL) != (positions == NULL));
1192 120944 : if (p != NULL) {
1193 120768 : if (BATcount(p) != BATcount(n)) {
1194 0 : GDKerror("update BATs not the same size\n");
1195 0 : return GDK_FAIL;
1196 : }
1197 120768 : if (ATOMtype(p->ttype) != TYPE_oid) {
1198 0 : GDKerror("positions BAT not type OID\n");
1199 0 : return GDK_FAIL;
1200 : }
1201 120768 : if (BATtdense(p)) {
1202 112492 : pos = p->tseqbase;
1203 112492 : positions = &pos;
1204 112492 : autoincr = true;
1205 112492 : p = NULL;
1206 8276 : } else if (p->ttype != TYPE_void) {
1207 8274 : positions = (const oid *) Tloc(p, 0);
1208 8274 : autoincr = false;
1209 : } else {
1210 : autoincr = false;
1211 : }
1212 176 : } else if (autoincr) {
1213 176 : pos = *positions;
1214 : }
1215 120944 : if (BATcount(n) == 0) {
1216 : return GDK_SUCCEED;
1217 : }
1218 :
1219 27741 : BATiter ni = bat_iterator(n);
1220 :
1221 27747 : OIDXdestroy(b);
1222 27747 : STRMPdestroy(b);
1223 27752 : RTREEdestroy(b);
1224 : /* load hash so that we can maintain it */
1225 27751 : (void) BATcheckhash(b);
1226 :
1227 27743 : MT_lock_set(&b->theaplock);
1228 27749 : if (!force && (b->batRestricted != BAT_WRITE ||
1229 17 : (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
1230 0 : MT_lock_unset(&b->theaplock);
1231 0 : bat_iterator_end(&ni);
1232 0 : GDKerror("access denied to %s, aborting.\n", BATgetId(b));
1233 0 : return GDK_FAIL;
1234 : }
1235 27749 : BATiter bi = bat_iterator_nolock(b);
1236 27749 : if (ni.count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1237 27122 : b->tunique_est = 0;
1238 : }
1239 :
1240 27749 : b->tsorted = b->trevsorted = false;
1241 27749 : b->tnosorted = b->tnorevsorted = 0;
1242 27749 : b->tseqbase = oid_nil;
1243 27749 : b->tkey = false;
1244 27749 : b->tnokey[0] = b->tnokey[1] = 0;
1245 :
1246 27749 : int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
1247 27749 : const void *nil = ATOMnilptr(b->ttype);
1248 27749 : oid hseqend = b->hseqbase + BATcount(b);
1249 :
1250 27749 : MT_lock_unset(&b->theaplock);
1251 :
1252 31112 : bool anynil = false;
1253 31112 : bool locked = false;
1254 :
1255 31112 : if (b->tvheap) {
1256 1214747 : for (BUN i = 0; i < ni.count; i++) {
1257 1211986 : oid updid;
1258 1211986 : if (positions) {
1259 1210973 : updid = autoincr ? pos++ : *positions++;
1260 : } else {
1261 1013 : updid = BUNtoid(p, i);
1262 : }
1263 :
1264 1211986 : if (updid < b->hseqbase ||
1265 1211986 : (!mayappend && updid >= hseqend)) {
1266 0 : GDKerror("id out of range\n");
1267 0 : goto bailout;
1268 : }
1269 1211986 : updid -= b->hseqbase;
1270 1211986 : if (!force && updid < b->batInserted) {
1271 0 : GDKerror("updating committed value\n");
1272 0 : goto bailout;
1273 : }
1274 :
1275 1211986 : const void *new = BUNtvar(ni, i);
1276 :
1277 1201913 : if (updid >= BATcount(b)) {
1278 23534 : assert(mayappend);
1279 23534 : if (locked) {
1280 4 : MT_rwlock_wrunlock(&b->thashlock);
1281 4 : locked = false;
1282 : }
1283 23534 : if (b->tminpos != bi.minpos ||
1284 23533 : b->tmaxpos != bi.maxpos) {
1285 1 : MT_lock_set(&b->theaplock);
1286 1 : b->tminpos = bi.minpos;
1287 1 : b->tmaxpos = bi.maxpos;
1288 1 : MT_lock_unset(&b->theaplock);
1289 : }
1290 23534 : if (BATcount(b) < updid &&
1291 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1292 0 : bat_iterator_end(&ni);
1293 0 : return GDK_FAIL;
1294 : }
1295 23534 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1296 0 : bat_iterator_end(&ni);
1297 0 : return GDK_FAIL;
1298 : }
1299 23534 : bi = bat_iterator_nolock(b);
1300 136834 : continue;
1301 : }
1302 :
1303 : /* it is possible that a previous run was killed
1304 : * after an update (with a mmapped tail file)
1305 : * but before that was committed, then the
1306 : * offset may point outside of the vheap */
1307 1178379 : const void *old = BUNtvaroff(bi, updid) < bi.vhfree ? BUNtvar(bi, updid) : NULL;
1308 :
1309 1178744 : if (old && atomcmp(old, new) == 0) {
1310 : /* replacing with the same value:
1311 : * nothing to do */
1312 113300 : continue;
1313 : }
1314 :
1315 1064395 : bool isnil = atomcmp(new, nil) == 0;
1316 1071318 : anynil |= isnil;
1317 1071318 : MT_lock_set(&b->theaplock);
1318 1071627 : if (old == NULL ||
1319 1071627 : (b->tnil &&
1320 743 : !anynil &&
1321 743 : atomcmp(old, nil) == 0)) {
1322 : /* if old value is nil and no new
1323 : * value is, we're not sure anymore
1324 : * about the nil property, so we must
1325 : * clear it */
1326 741 : b->tnil = false;
1327 : }
1328 1071627 : b->tnonil &= !isnil;
1329 1071627 : b->tnil |= isnil;
1330 1071627 : MT_lock_unset(&b->theaplock);
1331 1071901 : if (bi.maxpos != BUN_NONE) {
1332 3448 : if (!isnil &&
1333 1724 : atomcmp(BUNtvar(bi, bi.maxpos), new) < 0) {
1334 : /* new value is larger than
1335 : * previous largest */
1336 22 : bi.maxpos = updid;
1337 3404 : } else if (old == NULL ||
1338 1714 : (atomcmp(BUNtvar(bi, bi.maxpos), old) == 0 &&
1339 12 : atomcmp(new, old) != 0)) {
1340 : /* old value is equal to
1341 : * largest and new value is
1342 : * smaller, so we don't know
1343 : * anymore which is the
1344 : * largest */
1345 12 : bi.maxpos = BUN_NONE;
1346 : }
1347 : }
1348 1071901 : if (bi.minpos != BUN_NONE) {
1349 3436 : if (!isnil &&
1350 1718 : atomcmp(BUNtvar(bi, bi.minpos), new) > 0) {
1351 : /* new value is smaller than
1352 : * previous smallest */
1353 14 : bi.minpos = updid;
1354 3408 : } else if (old == NULL ||
1355 1726 : (atomcmp(BUNtvar(bi, bi.minpos), old) == 0 &&
1356 22 : atomcmp(new, old) != 0)) {
1357 : /* old value is equal to
1358 : * smallest and new value is
1359 : * larger, so we don't know
1360 : * anymore which is the
1361 : * smallest */
1362 22 : bi.minpos = BUN_NONE;
1363 : }
1364 : }
1365 1071901 : if (!locked) {
1366 2283 : MT_rwlock_wrlock(&b->thashlock);
1367 2283 : locked = true;
1368 : }
1369 1071903 : if (old)
1370 1071903 : HASHdelete_locked(&bi, updid, old);
1371 0 : else if (b->thash) {
1372 0 : doHASHdestroy(b, b->thash);
1373 0 : b->thash = NULL;
1374 : }
1375 :
1376 1071879 : var_t d;
1377 1071879 : switch (b->twidth) {
1378 1058433 : case 1:
1379 1058433 : d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1380 1058433 : break;
1381 10429 : case 2:
1382 10429 : d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
1383 10429 : break;
1384 2982 : case 4:
1385 2982 : d = (var_t) ((uint32_t *) b->theap->base)[updid];
1386 2982 : break;
1387 : #if SIZEOF_VAR_T == 8
1388 35 : case 8:
1389 35 : d = (var_t) ((uint64_t *) b->theap->base)[updid];
1390 35 : break;
1391 : #endif
1392 : default:
1393 0 : MT_UNREACHABLE();
1394 : }
1395 1071879 : MT_lock_set(&b->theaplock);
1396 1071799 : gdk_return rc = ATOMreplaceVAR(b, &d, new);
1397 1071736 : MT_lock_unset(&b->theaplock);
1398 1071839 : if (rc != GDK_SUCCEED) {
1399 0 : goto bailout;
1400 : }
1401 1071839 : if (b->twidth < SIZEOF_VAR_T &&
1402 1071893 : (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
1403 : /* doesn't fit in current heap, upgrade it */
1404 20 : if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
1405 0 : goto bailout;
1406 : }
1407 : }
1408 : /* in case ATOMreplaceVAR and/or
1409 : * GDKupgradevarheap replaces a heap, we need to
1410 : * reinitialize the iterator */
1411 : {
1412 : /* save and restore minpos/maxpos */
1413 1071839 : BUN minpos = bi.minpos;
1414 1071839 : BUN maxpos = bi.maxpos;
1415 1071839 : bi = bat_iterator_nolock(b);
1416 1071839 : bi.minpos = minpos;
1417 1071839 : bi.maxpos = maxpos;
1418 : }
1419 1071839 : switch (b->twidth) {
1420 1058375 : case 1:
1421 1058375 : ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
1422 1058375 : break;
1423 10445 : case 2:
1424 10445 : ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
1425 10445 : break;
1426 2984 : case 4:
1427 2984 : ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
1428 2984 : break;
1429 : #if SIZEOF_VAR_T == 8
1430 35 : case 8:
1431 35 : ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
1432 35 : break;
1433 : #endif
1434 : default:
1435 0 : MT_UNREACHABLE();
1436 : }
1437 1071839 : HASHinsert_locked(&bi, updid, new);
1438 :
1439 : }
1440 2761 : if (locked) {
1441 2281 : if (b->thash)
1442 2 : nunique = b->thash->nunique;
1443 2281 : MT_rwlock_wrunlock(&b->thashlock);
1444 2281 : locked = false;
1445 : }
1446 2761 : MT_lock_set(&b->theaplock);
1447 2761 : b->tvheap->dirty = true;
1448 2761 : MT_lock_unset(&b->theaplock);
1449 24984 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1450 0 : assert(b->thash == NULL);
1451 0 : HASHdestroy(b); /* hash doesn't make sense for msk */
1452 0 : for (BUN i = 0; i < ni.count; i++) {
1453 0 : oid updid;
1454 0 : if (positions) {
1455 0 : updid = autoincr ? pos++ : *positions++;
1456 : } else {
1457 0 : updid = BUNtoid(p, i);
1458 : }
1459 :
1460 0 : if (updid < b->hseqbase ||
1461 0 : (!mayappend && updid >= hseqend)) {
1462 0 : GDKerror("id out of range\n");
1463 0 : bat_iterator_end(&ni);
1464 0 : return GDK_FAIL;
1465 : }
1466 0 : updid -= b->hseqbase;
1467 0 : if (!force && updid < b->batInserted) {
1468 0 : GDKerror("updating committed value\n");
1469 0 : bat_iterator_end(&ni);
1470 0 : return GDK_FAIL;
1471 : }
1472 0 : if (updid >= BATcount(b)) {
1473 0 : assert(mayappend);
1474 0 : if (BATcount(b) < updid &&
1475 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1476 0 : bat_iterator_end(&ni);
1477 0 : return GDK_FAIL;
1478 : }
1479 0 : if (BUNappend(b, BUNtmsk(ni, i), force) != GDK_SUCCEED) {
1480 0 : bat_iterator_end(&ni);
1481 0 : return GDK_FAIL;
1482 : }
1483 0 : continue;
1484 : }
1485 0 : mskSetVal(b, updid, Tmskval(&ni, i));
1486 : }
1487 0 : bi = bat_iterator_nolock(b);
1488 24984 : } else if (autoincr) {
1489 17162 : if (pos < b->hseqbase ||
1490 16306 : (!mayappend && pos + ni.count > hseqend)) {
1491 0 : GDKerror("id out of range\n");
1492 0 : bat_iterator_end(&ni);
1493 0 : return GDK_FAIL;
1494 : }
1495 17162 : pos -= b->hseqbase;
1496 17162 : if (!force && pos < b->batInserted) {
1497 0 : GDKerror("updating committed value\n");
1498 0 : bat_iterator_end(&ni);
1499 0 : return GDK_FAIL;
1500 : }
1501 :
1502 17162 : if (pos >= BATcount(b)) {
1503 442 : assert(mayappend);
1504 442 : bat_iterator_end(&ni);
1505 442 : if (BATcount(b) < pos &&
1506 0 : BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
1507 : return GDK_FAIL;
1508 : }
1509 442 : return BATappend(b, n, NULL, force);
1510 : }
1511 16726 : if (pos + ni.count > BATcount(b) &&
1512 6 : BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
1513 0 : bat_iterator_end(&ni);
1514 0 : return GDK_FAIL;
1515 : }
1516 16720 : bi = bat_iterator_nolock(b);
1517 :
1518 : /* we copy all of n, so if there are nils in n we get
1519 : * nils in b (and else we don't know) */
1520 16720 : b->tnil = ni.nil;
1521 : /* we may not copy over all of b, so we only know that
1522 : * there are no nils in b afterward if there weren't
1523 : * any in either b or n to begin with */
1524 16720 : b->tnonil &= ni.nonil;
1525 : /* if there is no hash, we don't start the loop, if
1526 : * there is only a persisted hash, it will get destroyed
1527 : * in the first iteration, after which there is no hash
1528 : * and the loop ends */
1529 16720 : MT_rwlock_wrlock(&b->thashlock);
1530 16727 : locked = true;
1531 16727 : for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
1532 27 : HASHdelete_locked(&bi, i, Tloc(b, i));
1533 16700 : if (ni.type == TYPE_void) {
1534 0 : assert(b->ttype == TYPE_oid);
1535 0 : oid *o = Tloc(b, pos);
1536 0 : if (is_oid_nil(ni.tseq)) {
1537 : /* we may or may not overwrite the old
1538 : * min/max values */
1539 0 : bi.minpos = BUN_NONE;
1540 0 : bi.maxpos = BUN_NONE;
1541 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1542 0 : o[i] = oid_nil;
1543 0 : b->tnil = true;
1544 : } else {
1545 0 : oid v = ni.tseq;
1546 : /* we know min/max of n, so we know
1547 : * the new min/max of b if those of n
1548 : * are smaller/larger than the old */
1549 0 : if (bi.minpos != BUN_NONE) {
1550 0 : if (v <= BUNtoid(b, bi.minpos))
1551 0 : bi.minpos = pos;
1552 0 : else if (pos <= bi.minpos && bi.minpos < pos + ni.count)
1553 0 : bi.minpos = BUN_NONE;
1554 : }
1555 0 : if (complex_cand(n)) {
1556 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1557 0 : o[i] = *(oid *)Tpos(&ni, i);
1558 : /* last value */
1559 0 : v = o[ni.count - 1];
1560 : } else {
1561 0 : for (BUN i = 0, j = ni.count; i < j; i++)
1562 0 : o[i] = v++;
1563 : /* last value added (not one beyond) */
1564 0 : v--;
1565 : }
1566 0 : if (bi.maxpos != BUN_NONE) {
1567 0 : if (v >= BUNtoid(b, bi.maxpos))
1568 0 : bi.maxpos = pos + ni.count - 1;
1569 0 : else if (pos <= bi.maxpos && bi.maxpos < pos + ni.count)
1570 0 : bi.maxpos = BUN_NONE;
1571 : }
1572 : }
1573 : } else {
1574 : /* if the extremes of n are at least as
1575 : * extreme as those of b, we can replace b's
1576 : * min/max, else we don't know what b's new
1577 : * min/max are*/
1578 16992 : if (bi.minpos != BUN_NONE && ni.minpos != BUN_NONE &&
1579 292 : atomcmp(BUNtloc(bi, bi.minpos), BUNtail(ni, ni.minpos)) >= 0) {
1580 173 : bi.minpos = pos + ni.minpos;
1581 : } else {
1582 16527 : bi.minpos = BUN_NONE;
1583 : }
1584 17022 : if (bi.maxpos != BUN_NONE && ni.maxpos != BUN_NONE &&
1585 322 : atomcmp(BUNtloc(bi, bi.maxpos), BUNtail(ni, ni.maxpos)) <= 0) {
1586 234 : bi.maxpos = pos + ni.maxpos;
1587 : } else {
1588 16466 : bi.maxpos = BUN_NONE;
1589 : }
1590 16700 : memcpy(Tloc(b, pos), ni.base,
1591 16700 : ni.count << b->tshift);
1592 : }
1593 : /* either we have a hash that was updated above, or we
1594 : * have no hash; we cannot have the case where there is
1595 : * only a persisted (unloaded) hash since it would have
1596 : * been destroyed above */
1597 16700 : if (b->thash != NULL) {
1598 0 : for (BUN i = pos, j = pos + ni.count; i < j; i++)
1599 0 : HASHinsert_locked(&bi, i, Tloc(b, i));
1600 0 : if (b->thash)
1601 0 : nunique = b->thash->nunique;
1602 : }
1603 16712 : MT_rwlock_wrunlock(&b->thashlock);
1604 16729 : locked = false;
1605 16729 : if (ni.count == BATcount(b)) {
1606 : /* if we replaced all values of b by values
1607 : * from n, we can also copy the min/max
1608 : * properties */
1609 8291 : bi.minpos = ni.minpos;
1610 8291 : bi.maxpos = ni.maxpos;
1611 8291 : if (BATtdensebi(&ni)) {
1612 : /* replaced all of b with a dense sequence */
1613 47 : MT_lock_set(&b->theaplock);
1614 47 : BATtseqbase(b, ni.tseq);
1615 47 : MT_lock_unset(&b->theaplock);
1616 : }
1617 : }
1618 : } else {
1619 155685423 : for (BUN i = 0; i < ni.count; i++) {
1620 155677606 : oid updid;
1621 155677606 : if (positions) {
1622 : /* assert(!autoincr) */
1623 155677606 : updid = *positions++;
1624 : } else {
1625 0 : updid = BUNtoid(p, i);
1626 : }
1627 :
1628 155677606 : if (updid < b->hseqbase ||
1629 155677606 : (!mayappend && updid >= hseqend)) {
1630 0 : GDKerror("id out of range\n");
1631 0 : goto bailout;
1632 : }
1633 155677606 : updid -= b->hseqbase;
1634 155677606 : if (!force && updid < b->batInserted) {
1635 0 : GDKerror("updating committed value\n");
1636 0 : goto bailout;
1637 : }
1638 :
1639 155677606 : const void *new = BUNtloc(ni, i);
1640 :
1641 155677606 : if (updid >= BATcount(b)) {
1642 16059 : assert(mayappend);
1643 16059 : if (locked) {
1644 10 : MT_rwlock_wrunlock(&b->thashlock);
1645 10 : locked = false;
1646 : }
1647 16059 : if (b->tminpos != bi.minpos ||
1648 16057 : b->tmaxpos != bi.maxpos) {
1649 3 : MT_lock_set(&b->theaplock);
1650 3 : b->tminpos = bi.minpos;
1651 3 : b->tmaxpos = bi.maxpos;
1652 3 : MT_lock_unset(&b->theaplock);
1653 : }
1654 16059 : if (BATcount(b) < updid &&
1655 0 : BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
1656 0 : goto bailout;
1657 : }
1658 16059 : if (BUNappend(b, new, force) != GDK_SUCCEED) {
1659 0 : bat_iterator_end(&ni);
1660 0 : return GDK_FAIL;
1661 : }
1662 16059 : bi = bat_iterator_nolock(b);
1663 16059 : continue;
1664 : }
1665 :
1666 155661547 : const void *old = BUNtloc(bi, updid);
1667 155661547 : bool isnil = atomcmp(new, nil) == 0;
1668 152135084 : anynil |= isnil;
1669 152135084 : if (b->tnil &&
1670 1898 : !anynil &&
1671 1898 : atomcmp(old, nil) == 0) {
1672 : /* if old value is nil and no new
1673 : * value is, we're not sure anymore
1674 : * about the nil property, so we must
1675 : * clear it */
1676 1894 : b->tnil = false;
1677 : }
1678 152135084 : b->tnonil &= !isnil;
1679 152135084 : b->tnil |= isnil;
1680 152135084 : if (bi.maxpos != BUN_NONE) {
1681 8864 : if (!isnil &&
1682 4430 : atomcmp(BUNtloc(bi, bi.maxpos), new) < 0) {
1683 : /* new value is larger than
1684 : * previous largest */
1685 69 : bi.maxpos = updid;
1686 4372 : } else if (atomcmp(BUNtloc(bi, bi.maxpos), old) == 0 &&
1687 7 : atomcmp(new, old) != 0) {
1688 : /* old value is equal to
1689 : * largest and new value is
1690 : * smaller, so we don't know
1691 : * anymore which is the
1692 : * largest */
1693 7 : bi.maxpos = BUN_NONE;
1694 : }
1695 : }
1696 152135084 : if (bi.minpos != BUN_NONE) {
1697 8864 : if (!isnil &&
1698 4430 : atomcmp(BUNtloc(bi, bi.minpos), new) > 0) {
1699 : /* new value is smaller than
1700 : * previous smallest */
1701 6 : bi.minpos = updid;
1702 4434 : } else if (atomcmp(BUNtloc(bi, bi.minpos), old) == 0 &&
1703 6 : atomcmp(new, old) != 0) {
1704 : /* old value is equal to
1705 : * smallest and new value is
1706 : * larger, so we don't know
1707 : * anymore which is the
1708 : * smallest */
1709 6 : bi.minpos = BUN_NONE;
1710 : }
1711 : }
1712 :
1713 152135084 : if (!locked) {
1714 7813 : MT_rwlock_wrlock(&b->thashlock);
1715 7813 : locked = true;
1716 : }
1717 152135086 : HASHdelete_locked(&bi, updid, old);
1718 151819474 : switch (b->twidth) {
1719 26795763 : case 1:
1720 26795763 : ((bte *) b->theap->base)[updid] = * (bte *) new;
1721 26795763 : break;
1722 525061 : case 2:
1723 525061 : ((sht *) b->theap->base)[updid] = * (sht *) new;
1724 525061 : break;
1725 20747643 : case 4:
1726 20747643 : ((int *) b->theap->base)[updid] = * (int *) new;
1727 20747643 : break;
1728 98416702 : case 8:
1729 98416702 : ((lng *) b->theap->base)[updid] = * (lng *) new;
1730 98416702 : break;
1731 5334305 : case 16:
1732 : #ifdef HAVE_HGE
1733 5334305 : ((hge *) b->theap->base)[updid] = * (hge *) new;
1734 : #else
1735 : ((uuid *) b->theap->base)[updid] = * (uuid *) new;
1736 : #endif
1737 5334305 : break;
1738 0 : default:
1739 0 : memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
1740 0 : break;
1741 : }
1742 151819474 : HASHinsert_locked(&bi, updid, new);
1743 : }
1744 7817 : if (locked) {
1745 7807 : if (b->thash)
1746 0 : nunique = b->thash->nunique;
1747 7807 : MT_rwlock_wrunlock(&b->thashlock);
1748 7807 : locked = false;
1749 : }
1750 : }
1751 27307 : bat_iterator_end(&ni);
1752 27288 : MT_lock_set(&b->theaplock);
1753 27300 : if (nunique != 0)
1754 2 : b->tunique_est = (double) nunique;
1755 27300 : b->tminpos = bi.minpos;
1756 27300 : b->tmaxpos = bi.maxpos;
1757 27300 : b->theap->dirty = true;
1758 27300 : MT_lock_unset(&b->theaplock);
1759 27311 : TRC_DEBUG(ALGO,
1760 : "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
1761 : ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
1762 : GDKusec() - t0);
1763 : return GDK_SUCCEED;
1764 :
1765 0 : bailout:
1766 0 : bat_iterator_end(&ni);
1767 0 : if (locked) {
1768 0 : Hash *h = b->thash;
1769 0 : b->thash = NULL;
1770 0 : MT_rwlock_wrunlock(&b->thashlock);
1771 0 : doHASHdestroy(b, h);
1772 : }
1773 : return GDK_FAIL;
1774 : }
1775 :
1776 : /* replace values from b at locations specified in p with values in n */
1777 : gdk_return
1778 119902 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1779 : {
1780 119902 : return BATappend_or_update(b, p, NULL, n, false, false, force);
1781 : }
1782 :
1783 : /* like BATreplace, but p may specify locations beyond the end of b */
1784 : gdk_return
1785 968 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
1786 : {
1787 968 : return BATappend_or_update(b, p, NULL, n, true, false, force);
1788 : }
1789 :
1790 : #if 0 /* not used */
1791 : /* like BATreplace, but the positions are given by an array of oid values */
1792 : gdk_return
1793 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1794 : {
1795 : return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
1796 : }
1797 : #endif
1798 :
1799 : /* like BATreplace, but the positions are given by an array of oid
1800 : * values, and they may specify locations beyond the end of b */
1801 : gdk_return
1802 176 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
1803 : {
1804 176 : return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
1805 : }
1806 :
1807 : /*
1808 : * BAT Selections
1809 : * The BAT selectors are among the most heavily used operators.
1810 : * Their efficient implementation is therefore mandatory.
1811 : *
1812 : * BAT slice
1813 : * This function returns a horizontal slice from a BAT. It optimizes
1814 : * execution by avoiding to copy when the BAT is memory mapped (in
1815 : * this case, an independent submap is created) or else when it is
1816 : * read-only, then a VIEW bat is created as a result.
1817 : *
1818 : * If a new copy has to be created, this function takes care to
1819 : * preserve void-columns (in this case, the seqbase has to be
1820 : * recomputed in the result).
1821 : *
1822 : * The selected range is excluding the high value.
1823 : */
1824 : BAT *
1825 11507334 : BATslice(BAT *b, BUN l, BUN h)
1826 : {
1827 11507334 : BUN low = l;
1828 11507334 : BAT *bn = NULL;
1829 :
1830 11507334 : BATcheck(b, NULL);
1831 11507334 : BATiter bi = bat_iterator(b);
1832 11522023 : if (l > bi.count)
1833 : l = bi.count;
1834 11522023 : if (h > bi.count)
1835 : h = bi.count;
1836 11522023 : if (h < l)
1837 : h = l;
1838 :
1839 11522023 : if (complex_cand(b)) {
1840 : /* slicing a candidate list with exceptions */
1841 75 : struct canditer ci;
1842 75 : canditer_init(&ci, NULL, b);
1843 75 : if (b->hseqbase + l >= ci.hseq) {
1844 75 : l = b->hseqbase + l - ci.hseq;
1845 75 : h = b->hseqbase + h - ci.hseq;
1846 : } else {
1847 0 : l = 0;
1848 0 : if (b->hseqbase + h >= ci.hseq)
1849 0 : h = b->hseqbase + h - ci.hseq;
1850 : else
1851 : h = 0;
1852 : }
1853 75 : bn = canditer_slice(&ci, l, h);
1854 75 : goto doreturn;
1855 : }
1856 : /* If the source BAT is readonly, then we can obtain a VIEW
1857 : * that just reuses the memory of the source. */
1858 11521948 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1859 : /* forget about slices for bit masks: we can't deal
1860 : * with difference in alignment, so we'll just make a
1861 : * copy */
1862 0 : bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
1863 : /* we use BATappend with a candidate list to easily
1864 : * copy the part of b that we need */
1865 0 : BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
1866 0 : if (bn == NULL ||
1867 0 : s == NULL ||
1868 0 : BATappend(bn, b, s, false) != GDK_SUCCEED) {
1869 0 : BBPreclaim(bn);
1870 0 : BBPreclaim(s);
1871 0 : bn = NULL;
1872 0 : goto doreturn;
1873 : }
1874 0 : BBPunfix(s->batCacheid);
1875 0 : goto doreturn;
1876 : }
1877 11521948 : restrict_t prestricted;
1878 13431619 : if (bi.restricted == BAT_READ && VIEWtparent(b)) {
1879 1904221 : BAT *pb = BBP_desc(VIEWtparent(b));
1880 1904221 : MT_lock_set(&pb->theaplock);
1881 1903901 : prestricted = pb->batRestricted;
1882 1903901 : MT_lock_unset(&pb->theaplock);
1883 : } else {
1884 : prestricted = BAT_WRITE; /* just initialize with anything */
1885 : }
1886 11527398 : if (bi.restricted == BAT_READ &&
1887 11474270 : (!VIEWtparent(b) || prestricted == BAT_READ)) {
1888 11474468 : bn = VIEWcreate(b->hseqbase + low, b, l, h);
1889 11474468 : if (bn == NULL)
1890 : goto doreturn;
1891 : } else {
1892 : /* create a new BAT and put everything into it */
1893 52930 : BUN p = l;
1894 52930 : BUN q = h;
1895 :
1896 52930 : bn = COLnew((oid) (b->hseqbase + low), BATtdensebi(&bi) || (b->ttype == TYPE_oid && h == l) ? TYPE_void : b->ttype, h - l, TRANSIENT);
1897 53025 : if (bn == NULL)
1898 0 : goto doreturn;
1899 :
1900 53025 : if (bn->ttype == TYPE_void) {
1901 36370 : BATsetcount(bn, h - l);
1902 36378 : BATtseqbase(bn, is_oid_nil(bi.tseq) ? oid_nil : h == l ? 0 : (oid) (bi.tseq + low));
1903 16655 : } else if (bn->tvheap == NULL) {
1904 10200 : assert(BATatoms[bn->ttype].atomPut == NULL);
1905 10200 : memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
1906 10200 : (q - p) << bn->tshift);
1907 10200 : bn->theap->dirty = true;
1908 10200 : BATsetcount(bn, h - l);
1909 : } else {
1910 1686875 : for (; p < q; p++) {
1911 1680416 : if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
1912 0 : BBPreclaim(bn);
1913 0 : bn = NULL;
1914 0 : goto doreturn;
1915 : }
1916 : }
1917 : }
1918 52935 : bn->theap->dirty = true;
1919 52935 : bn->tsorted = bi.sorted || bn->batCount <= 1;
1920 52935 : bn->trevsorted = bi.revsorted || bn->batCount <= 1;
1921 52935 : bn->tkey = bi.key || bn->batCount <= 1;
1922 52935 : bn->tnonil = bi.nonil;
1923 52935 : bn->tnil = false; /* we don't know */
1924 52935 : if (bi.nosorted > l && bi.nosorted < h && !bn->tsorted)
1925 1637 : bn->tnosorted = bi.nosorted - l;
1926 : else
1927 51298 : bn->tnosorted = 0;
1928 52935 : if (bi.norevsorted > l && bi.norevsorted < h && !bn->trevsorted)
1929 4033 : bn->tnorevsorted = bi.norevsorted - l;
1930 : else
1931 48902 : bn->tnorevsorted = 0;
1932 52935 : if (bi.nokey[0] >= l && bi.nokey[0] < h &&
1933 42864 : bi.nokey[1] >= l && bi.nokey[1] < h &&
1934 418 : bi.nokey[0] != bi.nokey[1] &&
1935 : !bn->tkey) {
1936 418 : bn->tnokey[0] = bi.nokey[0] - l;
1937 418 : bn->tnokey[1] = bi.nokey[1] - l;
1938 : } else {
1939 52517 : bn->tnokey[0] = bn->tnokey[1] = 0;
1940 : }
1941 : }
1942 11528680 : doreturn:
1943 11528680 : bat_iterator_end(&bi);
1944 11529980 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
1945 : ALGOOPTBATFMT "\n",
1946 : ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
1947 : return bn;
1948 : }
1949 :
1950 : #define BAT_ORDERED(TPE) \
1951 : do { \
1952 : const TPE *restrict vals = Tloc(b, 0); \
1953 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1954 : if (vals[p - 1] > vals[p]) { \
1955 : b->tnosorted = p; \
1956 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1957 : goto doreturn; \
1958 : } else if (vals[p - 1] < vals[p]) { \
1959 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1960 : b->tnorevsorted = p; \
1961 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1962 : } \
1963 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1964 : b->tnokey[0] = p - 1; \
1965 : b->tnokey[1] = p; \
1966 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1967 : } \
1968 : } \
1969 : } while (0)
1970 :
1971 : #define BAT_ORDERED_FP(TPE) \
1972 : do { \
1973 : const TPE *restrict vals = Tloc(b, 0); \
1974 : TPE prev = vals[0]; \
1975 : bool prevnil = is_##TPE##_nil(prev); \
1976 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
1977 : TPE next = vals[p]; \
1978 : int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
1979 : prev = next; \
1980 : if (cmp > 0) { \
1981 : b->tnosorted = bi.nosorted = p; \
1982 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
1983 : goto doreturn; \
1984 : } else if (cmp < 0) { \
1985 : if (!b->trevsorted && b->tnorevsorted == 0) { \
1986 : b->tnorevsorted = bi.norevsorted = p; \
1987 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
1988 : } \
1989 : } else if (!b->tkey && b->tnokey[1] == 0) { \
1990 : b->tnokey[0] = bi.nokey[0] = p - 1; \
1991 : b->tnokey[1] = bi.nokey[1] = p; \
1992 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
1993 : } \
1994 : } \
1995 : } while (0)
1996 :
1997 : /* Return whether the BAT is ordered or not. If we don't know, invest
1998 : * in a scan and record the results in the bat descriptor. If during
1999 : * the scan we happen to find evidence that the BAT is not reverse
2000 : * sorted, we record the location. */
2001 : bool
2002 2980087 : BATordered(BAT *b)
2003 : {
2004 2980087 : lng t0 = GDKusec();
2005 2980976 : bool sorted;
2006 :
2007 2980976 : MT_lock_set(&b->theaplock);
2008 2981280 : if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0) {
2009 420281 : MT_lock_unset(&b->theaplock);
2010 420301 : return true;
2011 : }
2012 2560999 : if (b->tnosorted > 0 || !ATOMlinear(b->ttype)) {
2013 2100628 : MT_lock_unset(&b->theaplock);
2014 2100637 : return false;
2015 : }
2016 :
2017 : /* There are a few reasons why we need a lock here. It may be
2018 : * that multiple threads call this functions at the same time
2019 : * (happens a lot with mitosis/mergetable), but we only need to
2020 : * scan the bat in one thread: the others can reap the rewards
2021 : * when that one thread is done. Also, we need the heap to
2022 : * remain accessible (could have used bat_iterator for that),
2023 : * and, and this is the killer argument, we may need to make
2024 : * changes to the bat descriptor. */
2025 460371 : BATiter bi = bat_iterator_nolock(b);
2026 460371 : if (!b->tsorted && b->tnosorted == 0) {
2027 832305 : switch (ATOMbasetype(b->ttype)) {
2028 84069 : case TYPE_bte:
2029 126957982 : BAT_ORDERED(bte);
2030 : break;
2031 8636 : case TYPE_sht:
2032 1575089 : BAT_ORDERED(sht);
2033 : break;
2034 309573 : case TYPE_int:
2035 119894242 : BAT_ORDERED(int);
2036 : break;
2037 8936 : case TYPE_lng:
2038 62204496 : BAT_ORDERED(lng);
2039 : break;
2040 : #ifdef HAVE_HGE
2041 360 : case TYPE_hge:
2042 8002640 : BAT_ORDERED(hge);
2043 : break;
2044 : #endif
2045 974 : case TYPE_flt:
2046 8007244 : BAT_ORDERED_FP(flt);
2047 : break;
2048 794 : case TYPE_dbl:
2049 8230567 : BAT_ORDERED_FP(dbl);
2050 : break;
2051 : case TYPE_str:
2052 21313355 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2053 21299204 : int c;
2054 21299204 : const char *p1 = BUNtvar(bi, p - 1);
2055 21299049 : const char *p2 = BUNtvar(bi, p);
2056 21299206 : if (p1 == p2)
2057 : c = 0;
2058 2262780 : else if (p1[0] == '\200') {
2059 1738 : if (p2[0] == '\200')
2060 : c = 0;
2061 : else
2062 : c = -1;
2063 2261042 : } else if (p2[0] == '\200')
2064 : c = 1;
2065 : else
2066 2259946 : c = strcmp(p1, p2);
2067 2259946 : if (c > 0) {
2068 32694 : b->tnosorted = bi.nosorted = p;
2069 32694 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2070 32694 : goto doreturn;
2071 21266512 : } else if (c < 0) {
2072 2230001 : assert(!b->trevsorted);
2073 2230001 : if (b->tnorevsorted == 0) {
2074 15923 : b->tnorevsorted = bi.norevsorted = p;
2075 15923 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2076 : }
2077 19036511 : } else if (b->tnokey[1] == 0) {
2078 4164 : assert(!b->tkey);
2079 4164 : b->tnokey[0] = bi.nokey[0] = p - 1;
2080 4164 : b->tnokey[1] = bi.nokey[1] = p;
2081 21266512 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2082 : }
2083 : }
2084 : break;
2085 186 : default: {
2086 186 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2087 2482 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2088 2437 : int c;
2089 2437 : if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
2090 141 : b->tnosorted = bi.nosorted = p;
2091 141 : TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2092 141 : goto doreturn;
2093 2296 : } else if (c < 0) {
2094 2246 : if (!b->trevsorted && b->tnorevsorted == 0) {
2095 90 : b->tnorevsorted = bi.norevsorted = p;
2096 90 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
2097 : }
2098 50 : } else if (!b->tkey && b->tnokey[1] == 0) {
2099 8 : b->tnokey[0] = bi.nokey[0] = p - 1;
2100 8 : b->tnokey[1] = bi.nokey[1] = p;
2101 2296 : TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
2102 : }
2103 : }
2104 : break;
2105 : }
2106 : }
2107 : /* we only get here if we completed the scan; note that
2108 : * if we didn't record evidence about *reverse*
2109 : * sortedness, we know that the BAT is also reverse
2110 : * sorted; similarly, if we didn't record evidence about
2111 : * keyness, we know the BAT is key */
2112 141387 : b->tsorted = bi.sorted = true;
2113 141387 : TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2114 141773 : if (!b->trevsorted && b->tnorevsorted == 0) {
2115 58485 : b->trevsorted = bi.revsorted = true;
2116 58485 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
2117 : }
2118 141773 : if (!b->tkey && b->tnokey[1] == 0) {
2119 51647 : b->tkey = bi.key = true;
2120 51647 : TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
2121 : }
2122 : }
2123 141773 : doreturn:
2124 460760 : sorted = b->tsorted;
2125 460760 : bat pbid = VIEWtparent(b);
2126 460760 : MT_lock_unset(&b->theaplock);
2127 461195 : if (pbid) {
2128 252577 : BAT *pb = BBP_desc(pbid);
2129 252577 : MT_lock_set(&pb->theaplock);
2130 252585 : if (bi.count == BATcount(pb) &&
2131 194659 : bi.h == pb->theap &&
2132 194659 : bi.type == pb->ttype) {
2133 : /* add to knowledge in parent bat */
2134 194655 : pb->tsorted |= bi.sorted;
2135 194655 : if (pb->tnosorted == 0)
2136 194655 : pb->tnosorted = bi.nosorted;
2137 194655 : pb->trevsorted |= bi.revsorted;
2138 194655 : if (pb->tnorevsorted == 0)
2139 26927 : pb->tnorevsorted = bi.norevsorted;
2140 194655 : pb->tkey |= bi.key;
2141 194655 : if (pb->tnokey[1] == 0) {
2142 171857 : pb->tnokey[0] = bi.nokey[0];
2143 171857 : pb->tnokey[1] = bi.nokey[1];
2144 : }
2145 : }
2146 252585 : MT_lock_unset(&pb->theaplock);
2147 : }
2148 : return sorted;
2149 : }
2150 :
2151 : #define BAT_REVORDERED(TPE) \
2152 : do { \
2153 : const TPE *restrict vals = Tloc(b, 0); \
2154 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2155 : if (vals[p - 1] < vals[p]) { \
2156 : b->tnorevsorted = p; \
2157 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2158 : goto doreturn; \
2159 : } \
2160 : } \
2161 : } while (0)
2162 :
2163 : #define BAT_REVORDERED_FP(TPE) \
2164 : do { \
2165 : const TPE *restrict vals = Tloc(b, 0); \
2166 : for (BUN q = BATcount(b), p = 1; p < q; p++) { \
2167 : TPE prev = vals[p - 1], next = vals[p]; \
2168 : int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next); \
2169 : if (cmp < 0) { \
2170 : b->tnorevsorted = bi.norevsorted = p; \
2171 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
2172 : goto doreturn; \
2173 : } \
2174 : } \
2175 : } while (0)
2176 :
2177 : /* Return whether the BAT is reverse ordered or not. If we don't
2178 : * know, invest in a scan and record the results in the bat
2179 : * descriptor. */
2180 : bool
2181 2853289 : BATordered_rev(BAT *b)
2182 : {
2183 2853289 : lng t0 = GDKusec();
2184 2853840 : bool revsorted;
2185 :
2186 2853840 : if (b == NULL || !ATOMlinear(b->ttype))
2187 : return false;
2188 2853825 : MT_lock_set(&b->theaplock);
2189 2853578 : if (BATcount(b) <= 1 || b->trevsorted) {
2190 254269 : MT_lock_unset(&b->theaplock);
2191 254290 : return true;
2192 : }
2193 2599309 : if (b->ttype == TYPE_void) {
2194 21888 : MT_lock_unset(&b->theaplock);
2195 21896 : return is_oid_nil(b->tseqbase);
2196 : }
2197 2577421 : if (BATtdense(b) || b->tnorevsorted > 0) {
2198 2483827 : MT_lock_unset(&b->theaplock);
2199 2483878 : return false;
2200 : }
2201 93594 : BATiter bi = bat_iterator_nolock(b);
2202 93594 : if (!b->trevsorted && b->tnorevsorted == 0) {
2203 150516 : switch (ATOMbasetype(b->ttype)) {
2204 34606 : case TYPE_bte:
2205 10956709 : BAT_REVORDERED(bte);
2206 : break;
2207 3672 : case TYPE_sht:
2208 3140172 : BAT_REVORDERED(sht);
2209 : break;
2210 32377 : case TYPE_int:
2211 1158568 : BAT_REVORDERED(int);
2212 : break;
2213 4534 : case TYPE_lng:
2214 2686047 : BAT_REVORDERED(lng);
2215 : break;
2216 : #ifdef HAVE_HGE
2217 121 : case TYPE_hge:
2218 989 : BAT_REVORDERED(hge);
2219 : break;
2220 : #endif
2221 511 : case TYPE_flt:
2222 1997 : BAT_REVORDERED_FP(flt);
2223 : break;
2224 321 : case TYPE_dbl:
2225 485509 : BAT_REVORDERED_FP(dbl);
2226 : break;
2227 17452 : default: {
2228 17452 : int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
2229 526841 : for (BUN q = BATcount(b), p = 1; p < q; p++) {
2230 523514 : if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
2231 14124 : b->tnorevsorted = p;
2232 14124 : TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
2233 14124 : goto doreturn;
2234 : }
2235 : }
2236 : break;
2237 : }
2238 : }
2239 25781 : b->trevsorted = bi.revsorted = true;
2240 25781 : TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
2241 : }
2242 25781 : doreturn:
2243 93593 : revsorted = b->trevsorted;
2244 93593 : bat pbid = VIEWtparent(b);
2245 93593 : MT_lock_unset(&b->theaplock);
2246 93848 : if (pbid) {
2247 29337 : BAT *pb = BBP_desc(pbid);
2248 29337 : MT_lock_set(&pb->theaplock);
2249 29336 : if (bi.count == BATcount(pb) &&
2250 4150 : bi.h == pb->theap &&
2251 4150 : bi.type == pb->ttype) {
2252 : /* add to knowledge in parent bat */
2253 4150 : pb->trevsorted |= bi.revsorted;
2254 4150 : if (pb->tnorevsorted == 0)
2255 4150 : pb->tnorevsorted = bi.norevsorted;
2256 : }
2257 29336 : MT_lock_unset(&pb->theaplock);
2258 : }
2259 : return revsorted;
2260 : }
2261 :
2262 : /* figure out which sort function is to be called
2263 : * stable sort can produce an error (not enough memory available),
2264 : * "quick" sort does not produce errors */
2265 : static gdk_return
2266 3952870 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
2267 : size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
2268 : bool stable)
2269 : {
2270 3952870 : if (n <= 1) /* trivially sorted */
2271 : return GDK_SUCCEED;
2272 2753847 : switch (tpe) {
2273 2706002 : case TYPE_bte:
2274 : case TYPE_sht:
2275 : case TYPE_int:
2276 : case TYPE_lng:
2277 : #ifdef HAVE_HGE
2278 : case TYPE_hge:
2279 : #endif
2280 : case TYPE_date:
2281 : case TYPE_daytime:
2282 : case TYPE_timestamp:
2283 2706002 : assert(base == NULL);
2284 2706002 : if (nilslast == reverse && (stable || n > 100))
2285 20327 : return GDKrsort(h, t, n, hs, ts, reverse, false);
2286 : break;
2287 4 : case TYPE_uuid:
2288 4 : assert(base == NULL);
2289 4 : if (nilslast == reverse && (stable || n > 100))
2290 1 : return GDKrsort(h, t, n, hs, ts, reverse, true);
2291 : break;
2292 : default:
2293 : break;
2294 : }
2295 47920 : if (stable) {
2296 27 : if (reverse)
2297 0 : return GDKssort_rev(h, t, base, n, hs, ts, tpe);
2298 : else
2299 27 : return GDKssort(h, t, base, n, hs, ts, tpe);
2300 : } else {
2301 2733492 : GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
2302 : }
2303 2733492 : return GDK_SUCCEED;
2304 : }
2305 :
2306 : /* Sort the bat b according to both o and g. The stable and reverse
2307 : * parameters indicate whether the sort should be stable or descending
2308 : * respectively. The parameter b is required, o and g are optional
2309 : * (i.e., they may be NULL).
2310 : *
2311 : * A sorted copy is returned through the sorted parameter, the new
2312 : * ordering is returned through the order parameter, group information
2313 : * is returned through the groups parameter. All three output
2314 : * parameters may be NULL. If they're all NULL, this function does
2315 : * nothing.
2316 : *
2317 : * If o is specified, it is used to first rearrange b according to the
2318 : * order specified in o, after which b is sorted taking g into
2319 : * account.
2320 : *
2321 : * If g is specified, it indicates groups which should be individually
2322 : * ordered. Each row of consecutive equal values in g indicates a
2323 : * group which is sorted according to stable and reverse. g is used
2324 : * after the order in b was rearranged according to o.
2325 : *
2326 : * The outputs order and groups can be used in subsequent calls to
2327 : * this function. This can be used if multiple BATs need to be sorted
2328 : * together. The BATs should then be sorted in order of significance,
2329 : * and each following call should use the original unordered BAT plus
2330 : * the order and groups bat from the previous call. In this case, the
2331 : * sorted BATs are not of much use, so the sorted output parameter
2332 : * does not need to be specified.
2333 : * Apart from error checking and maintaining reference counts, sorting
2334 : * three columns (col1, col2, col3) could look like this with the
2335 : * sorted results in (col1s, col2s, col3s):
2336 : * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
2337 : * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
2338 : * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
2339 : * Note that the "reverse" parameter can be different for each call.
2340 : */
2341 : gdk_return
2342 29353 : BATsort(BAT **sorted, BAT **order, BAT **groups,
2343 : BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
2344 : {
2345 29353 : BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
2346 29353 : BATiter pbi;
2347 29353 : oid *restrict grps, *restrict ords, prev;
2348 29353 : BUN p, q, r;
2349 29353 : lng t0 = GDKusec();
2350 29356 : bool mkorderidx, orderidxlock = false;
2351 29356 : Heap *oidxh = NULL;
2352 :
2353 : /* we haven't implemented NILs as largest value for stable
2354 : * sort, so NILs come first for ascending and last for
2355 : * descending */
2356 29356 : assert(!stable || reverse == nilslast);
2357 :
2358 29356 : if (b == NULL) {
2359 0 : GDKerror("b must exist\n");
2360 0 : return GDK_FAIL;
2361 : }
2362 29356 : if (stable && reverse != nilslast) {
2363 0 : GDKerror("stable sort cannot have reverse != nilslast\n");
2364 0 : return GDK_FAIL;
2365 : }
2366 29356 : if (!ATOMlinear(b->ttype)) {
2367 0 : GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
2368 0 : return GDK_FAIL;
2369 : }
2370 29356 : MT_lock_set(&b->theaplock);
2371 29360 : if (b->ttype == TYPE_void) {
2372 148 : b->tsorted = true;
2373 222 : if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2374 0 : b->trevsorted = !b->trevsorted;
2375 : }
2376 296 : if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
2377 0 : b->tkey = !b->tkey;
2378 : }
2379 29212 : } else if (b->batCount <= 1) {
2380 9443 : if (!b->tsorted || !b->trevsorted) {
2381 29 : b->tsorted = b->trevsorted = true;
2382 : }
2383 : }
2384 29360 : MT_lock_unset(&b->theaplock);
2385 29361 : if (o != NULL &&
2386 16037 : (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
2387 16037 : BATcount(o) != BATcount(b) || /* same size as b */
2388 6345 : (o->ttype == TYPE_void && /* no nil tail */
2389 2467 : BATcount(o) != 0 &&
2390 2467 : is_oid_nil(o->tseqbase)))) {
2391 0 : GDKerror("o must have type oid and same size as b\n");
2392 0 : return GDK_FAIL;
2393 : }
2394 29361 : if (g != NULL &&
2395 16037 : (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
2396 16037 : !g->tsorted || /* sorted */
2397 16037 : BATcount(g) != BATcount(b) || /* same size as b */
2398 6606 : (g->ttype == TYPE_void && /* no nil tail */
2399 2729 : BATcount(g) != 0 &&
2400 2729 : is_oid_nil(g->tseqbase)))) {
2401 1 : GDKerror("g must have type oid, sorted on the tail, "
2402 : "and same size as b\n");
2403 1 : return GDK_FAIL;
2404 : }
2405 29360 : if (sorted == NULL && order == NULL) {
2406 : /* no place to put result, so we're done quickly */
2407 0 : GDKerror("no place to put the result.\n");
2408 0 : return GDK_FAIL;
2409 : }
2410 29360 : if (g == NULL && !stable) {
2411 : /* pre-ordering doesn't make sense if we're not
2412 : * subsorting and the sort is not stable */
2413 13041 : o = NULL;
2414 : }
2415 29360 : if (b->tnonil) {
2416 : /* if there are no nils, placement of nils doesn't
2417 : * matter, so set nilslast such that ordered bits can
2418 : * be used */
2419 20843 : nilslast = reverse;
2420 : }
2421 29360 : pbi = bat_iterator(NULL);
2422 30347 : if (BATcount(b) <= 1 ||
2423 19745 : (reverse == nilslast &&
2424 : (reverse ? b->trevsorted : b->tsorted) &&
2425 5083 : o == NULL && g == NULL &&
2426 2398 : (groups == NULL || BATtkey(b) ||
2427 : (reverse ? b->tsorted : b->trevsorted)))) {
2428 : /* trivially (sub)sorted, and either we don't need to
2429 : * return group information, or we can trivially
2430 : * deduce the groups */
2431 11391 : if (sorted) {
2432 9887 : bn = COLcopy(b, b->ttype, false, TRANSIENT);
2433 9887 : if (bn == NULL)
2434 0 : goto error;
2435 9887 : *sorted = bn;
2436 : }
2437 11391 : if (order) {
2438 10515 : on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
2439 10520 : if (on == NULL)
2440 0 : goto error;
2441 10520 : *order = on;
2442 : }
2443 11396 : if (groups) {
2444 5948 : if (BATtkey(b)) {
2445 : /* singleton groups */
2446 5513 : gn = BATdense(0, 0, BATcount(b));
2447 5513 : if (gn == NULL)
2448 0 : goto error;
2449 : } else {
2450 : /* single group */
2451 435 : const oid *o = 0;
2452 435 : assert(BATcount(b) == 1 ||
2453 : (b->tsorted && b->trevsorted));
2454 435 : gn = BATconstant(0, TYPE_oid, &o, BATcount(b), TRANSIENT);
2455 436 : if (gn == NULL)
2456 0 : goto error;
2457 : }
2458 5949 : *groups = gn;
2459 : }
2460 11397 : bat_iterator_end(&pbi);
2461 11394 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2462 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2463 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2464 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2465 : ALGOOPTBATFMT " -- trivial (" LLFMT
2466 : " usec)\n",
2467 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2468 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2469 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2470 : ALGOOPTBATPAR(on), GDKusec() - t0);
2471 11394 : return GDK_SUCCEED;
2472 : }
2473 17960 : if (VIEWtparent(b)) {
2474 3567 : pb = BATdescriptor(VIEWtparent(b));
2475 3569 : if (pb != NULL &&
2476 3569 : (b->tbaseoff != pb->tbaseoff ||
2477 2805 : BATcount(b) != BATcount(pb) ||
2478 2585 : b->hseqbase != pb->hseqbase ||
2479 2574 : BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)) {
2480 995 : BBPunfix(pb->batCacheid);
2481 995 : pb = NULL;
2482 : }
2483 : } else {
2484 : pb = b;
2485 : }
2486 17962 : bat_iterator_end(&pbi);
2487 17962 : pbi = bat_iterator(pb);
2488 : /* when we will create an order index if it doesn't already exist */
2489 17962 : mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pbi.transient));
2490 17962 : if (g == NULL && !reverse && !nilslast && pb != NULL) {
2491 5790 : (void) BATcheckorderidx(pb);
2492 5790 : MT_lock_set(&pb->batIdxLock);
2493 5790 : if (pb->torderidx) {
2494 59 : if (!stable || ((oid *) pb->torderidx->base)[2]) {
2495 : /* we can use the order index */
2496 59 : oidxh = pb->torderidx;
2497 59 : HEAPincref(oidxh);
2498 : }
2499 : mkorderidx = false;
2500 5731 : } else if (b != pb) {
2501 : /* don't build orderidx on parent bat */
2502 : mkorderidx = false;
2503 4828 : } else if (mkorderidx) {
2504 : /* keep lock when going to create */
2505 4228 : orderidxlock = true;
2506 : }
2507 5790 : if (!orderidxlock)
2508 1562 : MT_lock_unset(&pb->batIdxLock);
2509 : }
2510 17962 : if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
2511 : /* there is an order index that we can use */
2512 59 : on = COLnew(pb->hseqbase, TYPE_oid, pbi.count, TRANSIENT);
2513 59 : if (on == NULL)
2514 0 : goto error;
2515 59 : memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, pbi.count * sizeof(oid));
2516 59 : BATsetcount(on, BATcount(b));
2517 59 : HEAPdecref(oidxh, false);
2518 59 : oidxh = NULL;
2519 59 : on->tkey = true;
2520 59 : on->tnil = false;
2521 59 : on->tnonil = true;
2522 59 : on->tsorted = on->trevsorted = false;
2523 59 : on->tseqbase = oid_nil;
2524 59 : if (sorted || groups) {
2525 59 : bn = BATproject(on, b);
2526 59 : if (bn == NULL)
2527 0 : goto error;
2528 59 : bn->tsorted = true;
2529 59 : if (groups) {
2530 4 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2531 0 : goto error;
2532 4 : if (sorted &&
2533 4 : (*groups)->tkey &&
2534 : g == NULL) {
2535 : /* if new groups bat is key
2536 : * and since there is no input
2537 : * groups bat, we know the
2538 : * result bat is key */
2539 4 : bn->tkey = true;
2540 : }
2541 : }
2542 59 : if (sorted)
2543 59 : *sorted = bn;
2544 : else {
2545 0 : BBPunfix(bn->batCacheid);
2546 0 : bn = NULL;
2547 : }
2548 : }
2549 59 : if (order)
2550 9 : *order = on;
2551 : else {
2552 50 : BBPunfix(on->batCacheid);
2553 50 : on = NULL;
2554 : }
2555 59 : bat_iterator_end(&pbi);
2556 59 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
2557 : ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
2558 : ",reverse=%d,nilslast=%d,stable=%d) = ("
2559 : ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2560 : ALGOOPTBATFMT " -- orderidx (" LLFMT
2561 : " usec)\n",
2562 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2563 : ALGOOPTBATPAR(g), reverse, nilslast, stable,
2564 : ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
2565 : ALGOOPTBATPAR(on), GDKusec() - t0);
2566 59 : if (pb != NULL && pb != b)
2567 53 : BBPunfix(pb->batCacheid);
2568 59 : return GDK_SUCCEED;
2569 11638 : } else if (oidxh) {
2570 0 : HEAPdecref(oidxh, false);
2571 0 : oidxh = NULL;
2572 : }
2573 17903 : if (o) {
2574 11079 : bn = BATproject(o, b);
2575 11079 : if (bn == NULL)
2576 0 : goto error;
2577 11079 : if (bn->ttype == TYPE_void || isVIEW(bn)) {
2578 4892 : BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
2579 2445 : BBPunfix(bn->batCacheid);
2580 2445 : bn = b2;
2581 : }
2582 11079 : if (pb) {
2583 10637 : bat_iterator_end(&pbi);
2584 10637 : if (pb != b)
2585 1281 : BBPunfix(pb->batCacheid);
2586 10637 : pbi = bat_iterator(NULL);
2587 10637 : pb = NULL;
2588 : }
2589 : } else {
2590 6824 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
2591 : }
2592 17898 : if (bn == NULL)
2593 0 : goto error;
2594 17898 : if (order) {
2595 : /* prepare order bat */
2596 15149 : if (o) {
2597 : /* make copy of input so that we can refine it;
2598 : * copy can be read-only if we take the shortcut
2599 : * below in the case g is "key" */
2600 9443 : on = COLcopy(o, TYPE_oid,
2601 9443 : g == NULL ||
2602 9443 : !(g->tkey || g->ttype == TYPE_void),
2603 : TRANSIENT);
2604 9443 : if (on == NULL)
2605 0 : goto error;
2606 9443 : BAThseqbase(on, b->hseqbase);
2607 9443 : on->tminpos = BUN_NONE;
2608 9443 : on->tmaxpos = BUN_NONE;
2609 : } else {
2610 : /* create new order */
2611 5706 : on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
2612 5710 : if (on == NULL)
2613 0 : goto error;
2614 5710 : ords = (oid *) Tloc(on, 0);
2615 31048708 : for (p = 0, q = BATcount(bn); p < q; p++)
2616 31042998 : ords[p] = p + b->hseqbase;
2617 5710 : BATsetcount(on, BATcount(bn));
2618 5710 : on->tkey = true;
2619 5710 : on->tnil = false;
2620 5710 : on->tnonil = true;
2621 : }
2622 : /* COLcopy above can create TYPE_void */
2623 15153 : if (on->ttype != TYPE_void) {
2624 14390 : on->tsorted = on->trevsorted = false; /* it won't be sorted */
2625 14390 : on->tseqbase = oid_nil; /* and hence not dense */
2626 14390 : on->tnosorted = on->tnorevsorted = 0;
2627 : }
2628 15153 : *order = on;
2629 15153 : ords = (oid *) Tloc(on, 0);
2630 : } else {
2631 : ords = NULL;
2632 : }
2633 17902 : if (g) {
2634 11079 : if (g->tkey || g->ttype == TYPE_void) {
2635 : /* if g is "key", all groups are size 1, so no
2636 : * subsorting needed */
2637 4979 : if (sorted) {
2638 4593 : *sorted = bn;
2639 : } else {
2640 386 : BBPunfix(bn->batCacheid);
2641 386 : bn = NULL;
2642 : }
2643 4979 : if (order) {
2644 3808 : *order = on;
2645 3808 : if (o) {
2646 : /* we can inherit sortedness
2647 : * after all */
2648 3808 : on->tsorted = o->tsorted;
2649 3808 : on->trevsorted = o->trevsorted;
2650 3808 : if (o->tnosorted)
2651 56 : on->tnosorted = o->tnosorted;
2652 3808 : if (o->tnorevsorted)
2653 74 : on->tnorevsorted = o->tnorevsorted;
2654 : } else {
2655 : /* we didn't rearrange, so
2656 : * still sorted */
2657 0 : on->tsorted = true;
2658 0 : on->trevsorted = false;
2659 : }
2660 3808 : if (BATcount(on) <= 1) {
2661 0 : on->tsorted = true;
2662 0 : on->trevsorted = true;
2663 : }
2664 : }
2665 4979 : if (groups) {
2666 2879 : gn = COLcopy(g, g->ttype, false, TRANSIENT);
2667 2879 : if (gn == NULL)
2668 0 : goto error;
2669 2879 : *groups = gn;
2670 : }
2671 4979 : bat_iterator_end(&pbi);
2672 4979 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
2673 : ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
2674 : ",reverse=%d,nilslast=%d,stable=%d"
2675 : ") = (" ALGOOPTBATFMT ","
2676 : ALGOOPTBATFMT "," ALGOOPTBATFMT
2677 : " -- key group (" LLFMT " usec)\n",
2678 : ALGOBATPAR(b), ALGOOPTBATPAR(o),
2679 : ALGOBATPAR(g), reverse, nilslast,
2680 : stable, ALGOOPTBATPAR(bn),
2681 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2682 : GDKusec() - t0);
2683 4979 : if (pb != NULL && pb != b)
2684 0 : BBPunfix(pb->batCacheid);
2685 4979 : return GDK_SUCCEED;
2686 : }
2687 6100 : assert(g->ttype == TYPE_oid);
2688 6100 : grps = (oid *) Tloc(g, 0);
2689 6100 : prev = grps[0];
2690 6100 : if (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED)
2691 0 : goto error;
2692 45652015 : for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
2693 45645915 : if (grps[p] != prev) {
2694 : /* sub sort [r,p) */
2695 7580911 : if (do_sort(Tloc(bn, r),
2696 3640375 : ords ? ords + r : NULL,
2697 3940536 : bn->tvheap ? bn->tvheap->base : NULL,
2698 3940536 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2699 3940536 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2700 0 : goto error;
2701 3940536 : r = p;
2702 3940536 : prev = grps[p];
2703 : }
2704 : }
2705 : /* sub sort [r,q) */
2706 11735 : if (do_sort(Tloc(bn, r),
2707 5635 : ords ? ords + r : NULL,
2708 6100 : bn->tvheap ? bn->tvheap->base : NULL,
2709 6100 : p - r, bn->twidth, ords ? sizeof(oid) : 0,
2710 6100 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
2711 0 : goto error;
2712 : /* if single group (r==0) the result is (rev)sorted,
2713 : * otherwise (maybe) not */
2714 6100 : bn->tsorted = r == 0 && !reverse && !nilslast;
2715 12165 : bn->trevsorted = r == 0 && reverse && nilslast;
2716 : } else {
2717 6823 : Heap *m = NULL;
2718 : /* only invest in creating an order index if the BAT
2719 : * is persistent */
2720 6823 : if (mkorderidx) {
2721 4226 : assert(orderidxlock);
2722 4226 : if ((m = createOIDXheap(pb, stable)) != NULL &&
2723 : ords == NULL) {
2724 0 : ords = (oid *) m->base + ORDERIDXOFF;
2725 0 : if (o && o->ttype != TYPE_void)
2726 0 : memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
2727 0 : else if (o)
2728 0 : for (p = 0, q = BATcount(o); p < q; p++)
2729 0 : ords[p] = p + o->tseqbase;
2730 : else
2731 0 : for (p = 0, q = BATcount(b); p < q; p++)
2732 0 : ords[p] = p + b->hseqbase;
2733 : }
2734 : }
2735 6825 : if ((reverse != nilslast ||
2736 12971 : (reverse ? !bn->trevsorted : !bn->tsorted)) &&
2737 12466 : (BATmaterialize(bn, BUN_NONE) != GDK_SUCCEED ||
2738 6233 : do_sort(Tloc(bn, 0),
2739 : ords,
2740 6233 : bn->tvheap ? bn->tvheap->base : NULL,
2741 6233 : BATcount(bn), bn->twidth, ords ? sizeof(oid) : 0,
2742 6233 : bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
2743 0 : if (m != NULL) {
2744 0 : HEAPfree(m, true);
2745 0 : GDKfree(m);
2746 : }
2747 0 : goto error;
2748 : }
2749 6824 : bn->tsorted = !reverse && !nilslast;
2750 6824 : bn->trevsorted = reverse && nilslast;
2751 6824 : if (m != NULL) {
2752 4227 : assert(orderidxlock);
2753 4227 : if (pb->torderidx == NULL) {
2754 4227 : if (ords != (oid *) m->base + ORDERIDXOFF) {
2755 4227 : memcpy((oid *) m->base + ORDERIDXOFF,
2756 : ords,
2757 4227 : pbi.count * sizeof(oid));
2758 : }
2759 4227 : pb->torderidx = m;
2760 4227 : persistOIDX(pb);
2761 : } else {
2762 0 : HEAPfree(m, true);
2763 0 : GDKfree(m);
2764 : }
2765 : }
2766 : }
2767 12922 : if (orderidxlock) {
2768 4225 : MT_lock_unset(&pb->batIdxLock);
2769 4228 : orderidxlock = false;
2770 : }
2771 12925 : bn->theap->dirty = true;
2772 12925 : bn->tnosorted = 0;
2773 12925 : bn->tnorevsorted = 0;
2774 12925 : bn->tnokey[0] = bn->tnokey[1] = 0;
2775 12925 : bn->tminpos = BUN_NONE;
2776 12925 : bn->tmaxpos = BUN_NONE;
2777 12925 : if (groups) {
2778 7929 : if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
2779 0 : goto error;
2780 7929 : if ((*groups)->tkey &&
2781 910 : (g == NULL || (g->tsorted && g->trevsorted))) {
2782 : /* if new groups bat is key and the input
2783 : * group bat has a single value (both sorted
2784 : * and revsorted), we know the result bat is
2785 : * key */
2786 1111 : bn->tkey = true;
2787 : }
2788 : }
2789 :
2790 12925 : bat_iterator_end(&pbi);
2791 12923 : if (sorted)
2792 9700 : *sorted = bn;
2793 : else {
2794 3223 : BBPunfix(bn->batCacheid);
2795 3223 : bn = NULL;
2796 : }
2797 :
2798 12923 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
2799 : ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
2800 : "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
2801 : ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
2802 : ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
2803 : reverse, nilslast, stable, ALGOOPTBATPAR(bn),
2804 : ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
2805 : g ? "grouped " : "", GDKusec() - t0);
2806 12923 : if (pb && pb != b)
2807 1240 : BBPunfix(pb->batCacheid);
2808 : return GDK_SUCCEED;
2809 :
2810 0 : error:
2811 0 : bat_iterator_end(&pbi);
2812 0 : if (orderidxlock)
2813 0 : MT_lock_unset(&pb->batIdxLock);
2814 0 : if (oidxh)
2815 0 : HEAPdecref(oidxh, false);
2816 0 : BBPreclaim(bn);
2817 0 : if (pb && pb != b)
2818 0 : BBPunfix(pb->batCacheid);
2819 0 : BBPreclaim(on);
2820 0 : if (sorted)
2821 0 : *sorted = NULL;
2822 0 : if (order)
2823 0 : *order = NULL;
2824 0 : if (groups)
2825 0 : *groups = NULL;
2826 : return GDK_FAIL;
2827 : }
2828 :
2829 : /* return a new BAT of length n with seqbase hseq, and the constant v
2830 : * in the tail */
2831 : BAT *
2832 1694188 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
2833 : {
2834 1694188 : BAT *bn;
2835 1694188 : void *restrict p;
2836 1694188 : BUN i;
2837 1694188 : lng t0 = 0;
2838 :
2839 1694188 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
2840 1694188 : if (v == NULL)
2841 : return NULL;
2842 1694188 : bn = COLnew(hseq, tailtype, n, role);
2843 1698425 : if (bn != NULL && n > 0) {
2844 84913 : p = Tloc(bn, 0);
2845 84913 : switch (ATOMstorage(tailtype)) {
2846 7 : case TYPE_void:
2847 7 : v = &oid_nil;
2848 7 : BATtseqbase(bn, oid_nil);
2849 7 : break;
2850 0 : case TYPE_msk:
2851 0 : if (*(msk*)v) {
2852 0 : memset(p, 0xFF, 4 * ((n + 31) / 32));
2853 0 : if (n & 31) {
2854 0 : uint32_t *m = p;
2855 0 : m[n / 32] &= (1U << (n % 32)) - 1;
2856 : }
2857 : } else
2858 0 : memset(p, 0x00, 4 * ((n + 31) / 32));
2859 : break;
2860 10627 : case TYPE_bte:
2861 10627 : memset(p, *(bte*)v, n);
2862 10627 : break;
2863 : case TYPE_sht:
2864 7161269 : for (i = 0; i < n; i++)
2865 7142827 : ((sht *) p)[i] = *(sht *) v;
2866 : break;
2867 : case TYPE_int:
2868 : case TYPE_flt:
2869 : assert(sizeof(int) == sizeof(flt));
2870 214440495 : for (i = 0; i < n; i++)
2871 214434439 : ((int *) p)[i] = *(int *) v;
2872 : break;
2873 : case TYPE_lng:
2874 : case TYPE_dbl:
2875 : assert(sizeof(lng) == sizeof(dbl));
2876 232254316 : for (i = 0; i < n; i++)
2877 232221418 : ((lng *) p)[i] = *(lng *) v;
2878 : break;
2879 : #ifdef HAVE_HGE
2880 : case TYPE_hge:
2881 24506539 : for (i = 0; i < n; i++)
2882 24504971 : ((hge *) p)[i] = *(hge *) v;
2883 : break;
2884 : #endif
2885 : case TYPE_uuid:
2886 200047 : for (i = 0; i < n; i++)
2887 200038 : ((uuid *) p)[i] = *(uuid *) v;
2888 : break;
2889 15158 : case TYPE_str:
2890 : /* insert the first value, then just copy the
2891 : * offset lots of times */
2892 15158 : if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
2893 0 : BBPreclaim(bn);
2894 0 : return NULL;
2895 : }
2896 15166 : char val[sizeof(var_t)];
2897 15166 : memcpy(val, Tloc(bn, 0), bn->twidth);
2898 15166 : if (bn->twidth == 1 && n > 1) {
2899 : /* single byte value: we have a
2900 : * function for that */
2901 7924 : memset(Tloc(bn, 1), val[0], n - 1);
2902 : } else {
2903 7242 : char *p = Tloc(bn, 0);
2904 7262 : for (i = 1; i < n; i++) {
2905 20 : p += bn->twidth;
2906 20 : memcpy(p, val, bn->twidth);
2907 : }
2908 : }
2909 : break;
2910 : default:
2911 333710 : for (i = 0; i < n; i++)
2912 333560 : if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
2913 0 : BBPreclaim(bn);
2914 0 : return NULL;
2915 : }
2916 : break;
2917 : }
2918 84923 : bn->theap->dirty = true;
2919 84923 : bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
2920 84872 : BATsetcount(bn, n);
2921 84987 : bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
2922 84987 : bn->tnonil = !bn->tnil;
2923 84987 : bn->tkey = BATcount(bn) <= 1;
2924 : }
2925 1698499 : TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
2926 : ALGOOPTBATPAR(bn), GDKusec() - t0);
2927 : return bn;
2928 : }
2929 :
2930 : /*
2931 : * BAT Aggregates
2932 : *
2933 : * We retain the size() and card() aggregate results in the column
2934 : * descriptor. We would like to have such functionality in an
2935 : * extensible way for many aggregates, for DD (1) we do not want to
2936 : * change the binary BAT format on disk and (2) aggr and size are the
2937 : * most relevant aggregates.
2938 : *
2939 : * It is all hacked into the aggr[3] records; three adjacent integers
2940 : * that were left over in the column record. We refer to these as if
2941 : * it where an int aggr[3] array. The below routines set and retrieve
2942 : * the aggregate values from the tail of the BAT, as many
2943 : * aggregate-manipulating BAT functions work on tail.
2944 : *
2945 : * The rules are as follows: aggr[0] contains the alignment ID of the
2946 : * column (if set i.e. nonzero). Hence, if this value is nonzero and
2947 : * equal to b->talign, the precomputed aggregate values in
2948 : * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
2949 : * of them may be set at the time. This is encoded by the value
2950 : * int_nil, which cannot occur in these two aggregates.
2951 : *
2952 : * This was now extended to record the property whether we know there
2953 : * is a nil value present by mis-using the highest bits of both
2954 : * GDK_AGGR_SIZE and GDK_AGGR_CARD.
2955 : */
2956 :
2957 : void
2958 45139864 : PROPdestroy_nolock(BAT *b)
2959 : {
2960 45139864 : PROPrec *p = b->tprops;
2961 45139864 : PROPrec *n;
2962 :
2963 45139864 : b->tprops = NULL;
2964 45142578 : while (p) {
2965 4230 : n = p->next;
2966 4230 : assert(p->id != (enum prop_t) 20);
2967 4230 : VALclear(&p->v);
2968 4230 : GDKfree(p);
2969 4230 : p = n;
2970 : }
2971 45138348 : }
2972 :
2973 : void
2974 426 : PROPdestroy(BAT *b)
2975 : {
2976 426 : MT_lock_set(&b->theaplock);
2977 426 : PROPdestroy_nolock(b);
2978 426 : MT_lock_unset(&b->theaplock);
2979 426 : }
2980 :
2981 : ValPtr
2982 213233581 : BATgetprop_nolock(BAT *b, enum prop_t idx)
2983 : {
2984 213233581 : PROPrec *p;
2985 :
2986 213233581 : p = b->tprops;
2987 213238595 : while (p && p->id != idx)
2988 5014 : p = p->next;
2989 213233581 : return p ? &p->v : NULL;
2990 : }
2991 :
2992 : void
2993 398212 : BATrmprop_nolock(BAT *b, enum prop_t idx)
2994 : {
2995 398212 : PROPrec *prop = b->tprops, *prev = NULL;
2996 :
2997 398447 : while (prop) {
2998 398320 : if (prop->id == idx) {
2999 398085 : if (prev)
3000 106 : prev->next = prop->next;
3001 : else
3002 397979 : b->tprops = prop->next;
3003 398085 : VALclear(&prop->v);
3004 398085 : GDKfree(prop);
3005 398085 : return;
3006 : }
3007 235 : prev = prop;
3008 235 : prop = prop->next;
3009 : }
3010 : }
3011 :
3012 : ValPtr
3013 402354 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
3014 : {
3015 402354 : PROPrec *p;
3016 :
3017 402354 : p = b->tprops;
3018 409387 : while (p && p->id != idx)
3019 7033 : p = p->next;
3020 402354 : if (p == NULL) {
3021 402347 : if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
3022 : /* properties are hints, so if we can't create
3023 : * one we ignore the error */
3024 0 : GDKclrerr();
3025 0 : return NULL;
3026 : }
3027 402347 : p->id = idx;
3028 402347 : p->next = b->tprops;
3029 402347 : p->v.vtype = 0;
3030 402347 : b->tprops = p;
3031 : } else {
3032 7 : VALclear(&p->v);
3033 : }
3034 402354 : if (VALinit(&p->v, type, v) == NULL) {
3035 : /* failed to initialize, so remove property */
3036 0 : BATrmprop_nolock(b, idx);
3037 0 : GDKclrerr();
3038 0 : p = NULL;
3039 : }
3040 0 : return p ? &p->v : NULL;
3041 : }
3042 :
3043 : ValPtr
3044 3269124 : BATgetprop(BAT *b, enum prop_t idx)
3045 : {
3046 3269124 : ValPtr p;
3047 :
3048 3269124 : MT_lock_set(&b->theaplock);
3049 3272078 : p = BATgetprop_nolock(b, idx);
3050 3271953 : MT_lock_unset(&b->theaplock);
3051 3272924 : return p;
3052 : }
3053 :
3054 : ValPtr
3055 4040 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
3056 : {
3057 4040 : ValPtr p;
3058 4040 : MT_lock_set(&b->theaplock);
3059 4040 : p = BATsetprop_nolock(b, idx, type, v);
3060 4040 : MT_lock_unset(&b->theaplock);
3061 4040 : return p;
3062 : }
3063 :
3064 : void
3065 2 : BATrmprop(BAT *b, enum prop_t idx)
3066 : {
3067 2 : MT_lock_set(&b->theaplock);
3068 2 : BATrmprop_nolock(b, idx);
3069 2 : MT_lock_unset(&b->theaplock);
3070 2 : }
3071 :
3072 : /*
3073 : * The BATcount_no_nil function counts all BUN in a BAT that have a
3074 : * non-nil tail value.
3075 : * This function does not fail (the callers currently don't check for failure).
3076 : */
3077 : BUN
3078 2248 : BATcount_no_nil(BAT *b, BAT *s)
3079 : {
3080 2248 : BUN cnt = 0;
3081 2248 : const void *restrict p, *restrict nil;
3082 2248 : const char *restrict base;
3083 2248 : int t;
3084 2248 : int (*cmp)(const void *, const void *);
3085 2248 : struct canditer ci;
3086 2248 : oid hseq;
3087 :
3088 2248 : BATcheck(b, 0);
3089 :
3090 2248 : hseq = b->hseqbase;
3091 2248 : canditer_init(&ci, b, s);
3092 2250 : BATiter bi = bat_iterator(b);
3093 2248 : if (bi.nonil) {
3094 2010 : bat_iterator_end(&bi);
3095 2011 : return ci.ncand;
3096 : }
3097 238 : p = bi.base;
3098 238 : t = ATOMbasetype(bi.type);
3099 238 : switch (t) {
3100 0 : case TYPE_void:
3101 0 : cnt = ci.ncand * BATtdensebi(&bi);
3102 0 : break;
3103 0 : case TYPE_msk:
3104 0 : cnt = ci.ncand;
3105 0 : break;
3106 16 : case TYPE_bte:
3107 32 : CAND_LOOP(&ci)
3108 16 : cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
3109 : break;
3110 0 : case TYPE_sht:
3111 0 : CAND_LOOP(&ci)
3112 0 : cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
3113 : break;
3114 112 : case TYPE_int:
3115 3894960 : CAND_LOOP(&ci)
3116 3894848 : cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
3117 : break;
3118 85 : case TYPE_lng:
3119 156873 : CAND_LOOP(&ci)
3120 156788 : cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
3121 : break;
3122 : #ifdef HAVE_HGE
3123 0 : case TYPE_hge:
3124 0 : CAND_LOOP(&ci)
3125 0 : cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
3126 : break;
3127 : #endif
3128 0 : case TYPE_flt:
3129 0 : CAND_LOOP(&ci)
3130 0 : cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
3131 : break;
3132 0 : case TYPE_dbl:
3133 0 : CAND_LOOP(&ci)
3134 0 : cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
3135 : break;
3136 0 : case TYPE_uuid:
3137 0 : CAND_LOOP(&ci)
3138 0 : cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
3139 : break;
3140 25 : case TYPE_str:
3141 25 : base = bi.vh->base;
3142 25 : switch (bi.width) {
3143 23 : case 1:
3144 4805 : CAND_LOOP(&ci)
3145 4782 : cnt += base[(var_t) ((const uint8_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3146 : break;
3147 1 : case 2:
3148 144 : CAND_LOOP(&ci)
3149 143 : cnt += base[(var_t) ((const uint16_t *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
3150 : break;
3151 1 : case 4:
3152 168 : CAND_LOOP(&ci)
3153 167 : cnt += base[(var_t) ((const uint32_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3154 : break;
3155 : #if SIZEOF_VAR_T == 8
3156 0 : case 8:
3157 0 : CAND_LOOP(&ci)
3158 0 : cnt += base[(var_t) ((const uint64_t *) p)[canditer_next(&ci) - hseq]] != '\200';
3159 : break;
3160 : #endif
3161 : default:
3162 0 : MT_UNREACHABLE();
3163 : }
3164 : break;
3165 0 : default:
3166 0 : nil = ATOMnilptr(t);
3167 0 : cmp = ATOMcompare(t);
3168 0 : if (nil == NULL) {
3169 0 : cnt = ci.ncand;
3170 0 : } else if (b->tvheap) {
3171 0 : base = b->tvheap->base;
3172 0 : CAND_LOOP(&ci)
3173 0 : cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
3174 : } else {
3175 0 : CAND_LOOP(&ci)
3176 0 : cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
3177 : }
3178 : break;
3179 : }
3180 238 : if (cnt == bi.count) {
3181 35 : MT_lock_set(&b->theaplock);
3182 35 : if (cnt == BATcount(b) && bi.h == b->theap) {
3183 : /* we learned something */
3184 35 : b->tnonil = true;
3185 35 : assert(!b->tnil);
3186 35 : b->tnil = false;
3187 : }
3188 35 : bat pbid = VIEWtparent(b);
3189 35 : MT_lock_unset(&b->theaplock);
3190 35 : if (pbid) {
3191 21 : BAT *pb = BATdescriptor(pbid);
3192 21 : if (pb) {
3193 21 : MT_lock_set(&pb->theaplock);
3194 21 : if (cnt == BATcount(pb) &&
3195 0 : bi.h == pb->theap &&
3196 0 : !pb->tnonil) {
3197 0 : pb->tnonil = true;
3198 0 : assert(!pb->tnil);
3199 0 : pb->tnil = false;
3200 : }
3201 21 : MT_lock_unset(&pb->theaplock);
3202 21 : BBPunfix(pb->batCacheid);
3203 : }
3204 : }
3205 : }
3206 238 : bat_iterator_end(&bi);
3207 238 : return cnt;
3208 : }
|