Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : * @* BAT Module
16 : * In this Chapter we describe the BAT implementation in more detail.
17 : * The routines mentioned are primarily meant to simplify the library
18 : * implementation.
19 : *
20 : * @+ BAT Construction
21 : * BATs are implemented in several blocks of memory, prepared for disk
22 : * storage and easy shipment over a network.
23 : *
24 : * The BAT starts with a descriptor, which indicates the required BAT
25 : * library version and the BAT administration details. In particular,
26 : * it describes the binary relationship maintained and the location of
27 : * fields required for storage.
28 : *
29 : * The general layout of the BAT in this implementation is as follows.
30 : * Each BAT comes with a heap for the loc-size buns and, optionally,
31 : * with heaps to manage the variable-sized data items of both
32 : * dimensions. The buns are assumed to be stored as loc-size objects.
33 : * This is essentially an array of structs to store the associations.
34 : * The size is determined at BAT creation time using an upper bound on
35 : * the number of elements to be accommodated. In case of overflow,
36 : * its storage space is extended automatically.
37 : *
38 : * The capacity of a BAT places an upper limit on the number of BUNs
39 : * to be stored initially. The actual space set aside may be quite
40 : * large. Moreover, the size is aligned to int boundaries to speedup
41 : * access and avoid some machine limitations.
42 : *
43 : * Initialization of the variable parts rely on type specific routines
44 : * called atomHeap.
45 : */
46 : #include "monetdb_config.h"
47 : #include "gdk.h"
48 : #include "gdk_private.h"
49 : #include "mutils.h"
50 :
51 : #ifdef ALIGN
52 : #undef ALIGN
53 : #endif
54 : #define ALIGN(n,b) ((b)?(b)*(1+(((n)-1)/(b))):n)
55 :
56 : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
57 :
58 : BAT *
59 23380398 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
60 : {
61 23380398 : bat bid;
62 23380398 : BAT *bn;
63 23380398 : Heap *h = NULL, *vh = NULL;
64 :
65 : /*
66 : * Alloc space for the BAT and its dependent records.
67 : */
68 23380398 : assert(tt >= 0);
69 :
70 23380398 : if (heapnames) {
71 12019286 : if ((h = GDKmalloc(sizeof(Heap))) == NULL) {
72 : return NULL;
73 : }
74 24156520 : *h = (Heap) {
75 12081322 : .farmid = BBPselectfarm(role, tt, offheap),
76 : .dirty = true,
77 : .refs = ATOMIC_VAR_INIT(1),
78 : };
79 :
80 12075198 : if (ATOMneedheap(tt)) {
81 1044625 : if ((vh = GDKmalloc(sizeof(Heap))) == NULL) {
82 0 : GDKfree(h);
83 0 : return NULL;
84 : }
85 1052487 : *vh = (Heap) {
86 1052158 : .farmid = BBPselectfarm(role, tt, varheap),
87 : .dirty = true,
88 : .refs = ATOMIC_VAR_INIT(1),
89 : };
90 : }
91 : }
92 :
93 23444172 : bid = BBPallocbat(tt);
94 23511211 : if (bid == 0) {
95 0 : GDKfree(h);
96 0 : GDKfree(vh);
97 0 : return NULL;
98 : }
99 23511211 : bn = BBP_desc(bid);
100 :
101 : /*
102 : * Fill in basic column info
103 : */
104 46970865 : *bn = (BAT) {
105 : .batCacheid = bid,
106 : .hseqbase = hseq,
107 :
108 : .ttype = tt,
109 : .tkey = true,
110 : .tnonil = true,
111 : .tnil = false,
112 23459654 : .tsorted = ATOMlinear(tt),
113 : .trevsorted = ATOMlinear(tt),
114 23459654 : .tascii = tt == TYPE_str,
115 : .tseqbase = oid_nil,
116 : .tminpos = BUN_NONE,
117 : .tmaxpos = BUN_NONE,
118 : .tunique_est = 0.0,
119 :
120 : .batRole = role,
121 : .batTransient = true,
122 : .batRestricted = BAT_WRITE,
123 : .theap = h,
124 : .tvheap = vh,
125 23511211 : .creator_tid = MT_getpid(),
126 : };
127 :
128 23459654 : if (bn->theap) {
129 12054604 : bn->theap->parentid = bn->batCacheid;
130 12054604 : const char *nme = BBP_physical(bn->batCacheid);
131 12054604 : settailname(bn->theap, nme, tt, width);
132 :
133 12030768 : if (bn->tvheap) {
134 1038660 : bn->tvheap->parentid = bn->batCacheid;
135 1038660 : strconcat_len(bn->tvheap->filename,
136 : sizeof(bn->tvheap->filename),
137 : nme, ".theap", NULL);
138 : }
139 : }
140 23448286 : char name[MT_NAME_LEN];
141 23448286 : snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
142 23448286 : MT_lock_init(&bn->theaplock, name);
143 23413529 : snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
144 23413529 : MT_lock_init(&bn->batIdxLock, name);
145 23465922 : snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
146 23465922 : MT_rwlock_init(&bn->thashlock, name);
147 23401126 : return bn;
148 : }
149 :
150 : uint8_t
151 24026491 : ATOMelmshift(int sz)
152 : {
153 24026491 : uint8_t sh;
154 24026491 : int i = sz >> 1;
155 :
156 44505481 : for (sh = 0; i != 0; sh++) {
157 20478990 : i >>= 1;
158 : }
159 24026491 : return sh;
160 : }
161 :
162 :
163 : void
164 12049033 : BATsetdims(BAT *b, uint16_t width)
165 : {
166 12049033 : b->twidth = b->ttype == TYPE_str ? width > 0 ? width : 1 : ATOMsize(b->ttype);
167 12049033 : b->tshift = ATOMelmshift(b->twidth);
168 12049033 : assert_shift_width(b->tshift, b->twidth);
169 12049033 : }
170 :
171 : const char *
172 1260 : BATtailname(const BAT *b)
173 : {
174 1260 : if (b->ttype == TYPE_str) {
175 340 : switch (b->twidth) {
176 250 : case 1:
177 250 : return "tail1";
178 75 : case 2:
179 75 : return "tail2";
180 15 : case 4:
181 : #if SIZEOF_VAR_T == 8
182 15 : return "tail4";
183 : case 8:
184 : #endif
185 : break;
186 : default:
187 0 : MT_UNREACHABLE();
188 : }
189 : }
190 : return "tail";
191 : }
192 :
193 : void
194 12117688 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
195 : {
196 12117688 : if (tt == TYPE_str) {
197 1072505 : switch (width) {
198 1015713 : case 0:
199 : case 1:
200 1015713 : strconcat_len(tail->filename,
201 : sizeof(tail->filename), physnme,
202 : ".tail1", NULL);
203 1015713 : return;
204 52182 : case 2:
205 52182 : strconcat_len(tail->filename,
206 : sizeof(tail->filename), physnme,
207 : ".tail2", NULL);
208 52182 : return;
209 4610 : case 4:
210 : #if SIZEOF_VAR_T == 8
211 4610 : strconcat_len(tail->filename,
212 : sizeof(tail->filename), physnme,
213 : ".tail4", NULL);
214 4610 : return;
215 : case 8:
216 : #endif
217 : break;
218 : default:
219 0 : MT_UNREACHABLE();
220 : }
221 : }
222 11045183 : strconcat_len(tail->filename, sizeof(tail->filename), physnme,
223 : ".tail", NULL);
224 : }
225 :
226 : /*
227 : * @- BAT allocation
228 : * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
229 : * We now initialize new BATs with their heapname such that the
230 : * modified HEAPalloc/HEAPextend primitives can possibly use memory
231 : * mapped files as temporary heap storage.
232 : *
233 : * In case of huge bats, we want HEAPalloc to write a file to disk,
234 : * and memory map it. To make this possible, we must provide it with
235 : * filenames.
236 : */
237 : BAT *
238 12034412 : COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
239 : {
240 12034412 : BAT *bn;
241 :
242 12034412 : assert(cap <= BUN_MAX);
243 12034412 : assert(hseq <= oid_nil);
244 12034412 : ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
245 :
246 : /* round up to multiple of BATTINY */
247 12034412 : if (cap < BUN_MAX - BATTINY)
248 12016237 : cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
249 12034412 : if (ATOMstorage(tt) == TYPE_msk) {
250 152454 : if (cap < 8*BATTINY)
251 : cap = 8*BATTINY;
252 : else
253 50763 : cap = (cap + 31) & ~(BUN)31;
254 11881958 : } else if (cap < BATTINY)
255 : cap = BATTINY;
256 : /* limit the size */
257 3503254 : if (cap > BUN_MAX)
258 : cap = BUN_MAX;
259 :
260 12034412 : bn = BATcreatedesc(hseq, tt, true, role, width);
261 12037213 : if (bn == NULL)
262 : return NULL;
263 :
264 12037213 : BATsetdims(bn, width);
265 12024398 : bn->batCapacity = cap;
266 :
267 12024398 : if (ATOMstorage(tt) == TYPE_msk)
268 152419 : cap /= 8; /* 8 values per byte */
269 :
270 : /* alloc the main heaps */
271 12024398 : if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
272 0 : goto bailout;
273 : }
274 :
275 12067573 : if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
276 0 : HEAPfree(bn->theap, true);
277 0 : goto bailout;
278 : }
279 12061141 : DELTAinit(bn);
280 12056778 : if (BBPcacheit(bn, true) != GDK_SUCCEED) {
281 : /* cannot happen, function always returns success */
282 0 : goto bailout;
283 : }
284 12098955 : TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
285 : return bn;
286 0 : bailout:
287 0 : BBPclear(bn->batCacheid);
288 0 : return NULL;
289 : }
290 :
291 : BAT *
292 11762941 : COLnew(oid hseq, int tt, BUN cap, role_t role)
293 : {
294 11762941 : return COLnew2(hseq, tt, cap, role, 0);
295 : }
296 :
297 : BAT *
298 5608183 : BATdense(oid hseq, oid tseq, BUN cnt)
299 : {
300 5608183 : BAT *bn;
301 :
302 5608183 : bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
303 5618463 : if (bn != NULL) {
304 5618463 : BATtseqbase(bn, tseq);
305 5616009 : BATsetcount(bn, cnt);
306 5615748 : TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
307 : "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
308 : ALGOBATPAR(bn));
309 : }
310 5615748 : return bn;
311 : }
312 :
313 : BAT *
314 0 : BATattach(int tt, const char *heapfile, role_t role)
315 : {
316 0 : BAT *bn;
317 0 : char *p;
318 0 : size_t m;
319 0 : FILE *f;
320 :
321 0 : ERRORcheck(tt <= 0 , "bad tail type (<=0)\n", NULL);
322 0 : ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "bad tail type (varsized and not str)\n", NULL);
323 0 : ERRORcheck(heapfile == NULL, "bad heapfile name\n", NULL);
324 :
325 0 : if ((f = MT_fopen(heapfile, "rb")) == NULL) {
326 0 : GDKsyserror("BATattach: cannot open %s\n", heapfile);
327 0 : return NULL;
328 : }
329 0 : if (ATOMstorage(tt) == TYPE_str) {
330 0 : size_t n;
331 0 : char *s;
332 0 : int c, u;
333 :
334 0 : if ((bn = COLnew(0, tt, 0, role)) == NULL) {
335 0 : fclose(f);
336 0 : return NULL;
337 : }
338 0 : m = 4096;
339 0 : n = 0;
340 0 : u = 0;
341 0 : s = p = GDKmalloc(m);
342 0 : if (p == NULL) {
343 0 : fclose(f);
344 0 : BBPreclaim(bn);
345 0 : return NULL;
346 : }
347 0 : while ((c = getc(f)) != EOF) {
348 0 : if (n == m) {
349 0 : m += 4096;
350 0 : s = GDKrealloc(p, m);
351 0 : if (s == NULL) {
352 0 : GDKfree(p);
353 0 : BBPreclaim(bn);
354 0 : fclose(f);
355 0 : return NULL;
356 : }
357 0 : p = s;
358 0 : s = p + n;
359 : }
360 0 : if (c == '\n' && n > 0 && s[-1] == '\r') {
361 : /* deal with CR-LF sequence */
362 0 : s[-1] = c;
363 : } else {
364 0 : *s++ = c;
365 0 : n++;
366 : }
367 0 : if (u) {
368 0 : if ((c & 0xC0) == 0x80)
369 0 : u--;
370 : else
371 0 : goto notutf8;
372 0 : } else if ((c & 0xF8) == 0xF0)
373 : u = 3;
374 0 : else if ((c & 0xF0) == 0xE0)
375 : u = 2;
376 0 : else if ((c & 0xE0) == 0xC0)
377 : u = 1;
378 0 : else if ((c & 0x80) == 0x80)
379 0 : goto notutf8;
380 0 : else if (c == 0) {
381 0 : if (BUNappend(bn, p, false) != GDK_SUCCEED) {
382 0 : BBPreclaim(bn);
383 0 : fclose(f);
384 0 : GDKfree(p);
385 0 : return NULL;
386 : }
387 : s = p;
388 : n = 0;
389 : }
390 : }
391 0 : fclose(f);
392 0 : GDKfree(p);
393 0 : if (n > 0) {
394 0 : BBPreclaim(bn);
395 0 : GDKerror("last string is not null-terminated\n");
396 0 : return NULL;
397 : }
398 : } else {
399 0 : struct stat st;
400 0 : int atomsize;
401 0 : BUN cap;
402 0 : lng n;
403 :
404 0 : if (fstat(fileno(f), &st) < 0) {
405 0 : GDKsyserror("BATattach: cannot stat %s\n", heapfile);
406 0 : fclose(f);
407 0 : return NULL;
408 : }
409 0 : atomsize = ATOMsize(tt);
410 0 : if (st.st_size % atomsize != 0) {
411 0 : fclose(f);
412 0 : GDKerror("heapfile size not integral number of atoms\n");
413 0 : return NULL;
414 : }
415 0 : if (ATOMstorage(tt) == TYPE_msk ?
416 : (st.st_size > (off_t) (BUN_MAX / 8)) :
417 0 : ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) {
418 0 : fclose(f);
419 0 : GDKerror("heapfile too large\n");
420 0 : return NULL;
421 : }
422 0 : cap = (BUN) (ATOMstorage(tt) == TYPE_msk ?
423 0 : st.st_size * 8 :
424 0 : st.st_size / atomsize);
425 0 : bn = COLnew(0, tt, cap, role);
426 0 : if (bn == NULL) {
427 0 : fclose(f);
428 0 : return NULL;
429 : }
430 0 : p = Tloc(bn, 0);
431 0 : n = (lng) st.st_size;
432 0 : while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
433 0 : p += m;
434 0 : n -= m;
435 : }
436 0 : fclose(f);
437 0 : if (n > 0) {
438 0 : GDKerror("couldn't read the complete file\n");
439 0 : BBPreclaim(bn);
440 0 : return NULL;
441 : }
442 0 : BATsetcount(bn, cap);
443 0 : bn->tnonil = cap == 0;
444 0 : bn->tnil = false;
445 0 : bn->tseqbase = oid_nil;
446 0 : if (cap > 1) {
447 0 : bn->tsorted = false;
448 0 : bn->trevsorted = false;
449 0 : bn->tkey = false;
450 : } else {
451 0 : bn->tsorted = ATOMlinear(tt);
452 0 : bn->trevsorted = ATOMlinear(tt);
453 0 : bn->tkey = true;
454 : }
455 : }
456 : return bn;
457 :
458 0 : notutf8:
459 0 : fclose(f);
460 0 : BBPreclaim(bn);
461 0 : GDKfree(p);
462 0 : GDKerror("input is not UTF-8\n");
463 0 : return NULL;
464 : }
465 :
466 : /*
467 : * If the BAT runs out of storage for BUNS it will reallocate space.
468 : * For memory mapped BATs we simple extend the administration after
469 : * having an assurance that the BAT still can be safely stored away.
470 : */
471 : BUN
472 21515 : BATgrows(BAT *b)
473 : {
474 21515 : BUN oldcap, newcap;
475 :
476 21515 : BATcheck(b, 0);
477 :
478 21515 : newcap = oldcap = BATcapacity(b);
479 21515 : if (newcap < BATTINY)
480 : newcap = 2 * BATTINY;
481 21585 : else if (newcap < 10 * BATTINY)
482 20316 : newcap = 4 * newcap;
483 1269 : else if (newcap < 50 * BATTINY)
484 762 : newcap = 2 * newcap;
485 507 : else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
486 506 : newcap = (BUN) ((double) newcap * BATMARGIN);
487 : else
488 : newcap = BUN_MAX;
489 21585 : if (newcap == oldcap) {
490 0 : if (newcap <= BUN_MAX - 10)
491 0 : newcap += 10;
492 : else
493 : newcap = BUN_MAX;
494 : }
495 21515 : if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
496 2 : newcap = (newcap + 31) & ~(BUN)31;
497 : return newcap;
498 : }
499 :
500 : /*
501 : * The routine should ensure that the BAT keeps its location in the
502 : * BAT buffer.
503 : *
504 : * Overflow in the other heaps are dealt with in the atom routines.
505 : * Here we merely copy their references into the new administration
506 : * space.
507 : */
508 : gdk_return
509 181790 : BATextend(BAT *b, BUN newcap)
510 : {
511 181790 : size_t theap_size;
512 181790 : gdk_return rc = GDK_SUCCEED;
513 :
514 181790 : assert(newcap <= BUN_MAX);
515 181790 : BATcheck(b, GDK_FAIL);
516 : /*
517 : * The main issue is to properly predict the new BAT size.
518 : * storage overflow. The assumption taken is that capacity
519 : * overflow is rare. It is changed only when the position of
520 : * the next available BUN surpasses the free area marker. Be
521 : * aware that the newcap should be greater than the old value,
522 : * otherwise you may easily corrupt the administration of
523 : * malloc.
524 : */
525 181790 : if (newcap <= BATcapacity(b)) {
526 : return GDK_SUCCEED;
527 : }
528 :
529 31977 : if (ATOMstorage(b->ttype) == TYPE_msk) {
530 1098 : newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
531 1098 : theap_size = (size_t) (newcap / 8); /* in bytes */
532 : } else {
533 30879 : theap_size = (size_t) newcap << b->tshift;
534 : }
535 :
536 31977 : MT_lock_set(&b->theaplock);
537 32172 : if (b->theap->base) {
538 32172 : TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
539 : b->theap->filename, b->theap->size, theap_size);
540 32172 : rc = HEAPgrow(&b->theap, theap_size, b->batRestricted == BAT_READ);
541 32227 : if (rc == GDK_SUCCEED)
542 32227 : b->batCapacity = newcap;
543 : } else {
544 0 : b->batCapacity = newcap;
545 : }
546 32227 : MT_lock_unset(&b->theaplock);
547 :
548 32224 : return rc;
549 : }
550 :
551 :
552 :
553 : /*
554 : * @+ BAT destruction
555 : * BATclear quickly removes all elements from a BAT. It must respect
556 : * the transaction rules; so stable elements must be moved to the
557 : * "deleted" section of the BAT (they cannot be fully deleted
558 : * yet). For the elements that really disappear, we must free
559 : * heapspace. As an optimization, in the case of no stable elements, we quickly empty
560 : * the heaps by copying a standard small empty image over them.
561 : */
562 : gdk_return
563 423 : BATclear(BAT *b, bool force)
564 : {
565 423 : BUN p, q;
566 :
567 423 : BATcheck(b, GDK_FAIL);
568 :
569 423 : if (!force && b->batInserted > 0) {
570 0 : GDKerror("cannot clear committed BAT\n");
571 0 : return GDK_FAIL;
572 : }
573 :
574 423 : TRC_DEBUG(ALGO, ALGOBATFMT "\n", ALGOBATPAR(b));
575 :
576 : /* kill all search accelerators */
577 423 : HASHdestroy(b);
578 423 : IMPSdestroy(b);
579 423 : OIDXdestroy(b);
580 423 : STRMPdestroy(b);
581 423 : RTREEdestroy(b);
582 423 : PROPdestroy(b);
583 :
584 423 : bat tvp = 0;
585 :
586 : /* we must dispose of all inserted atoms */
587 423 : MT_lock_set(&b->theaplock);
588 423 : if (force && BATatoms[b->ttype].atomDel == NULL) {
589 416 : assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
590 : /* no stable elements: we do a quick heap clean */
591 : /* need to clean heap which keeps data even though the
592 : BUNs got removed. This means reinitialize when
593 : free > 0
594 : */
595 416 : if (b->tvheap && b->tvheap->free > 0) {
596 21 : Heap *th = GDKmalloc(sizeof(Heap));
597 :
598 21 : if (th == NULL) {
599 0 : MT_lock_unset(&b->theaplock);
600 0 : return GDK_FAIL;
601 : }
602 21 : *th = (Heap) {
603 21 : .farmid = b->tvheap->farmid,
604 21 : .parentid = b->tvheap->parentid,
605 : .dirty = true,
606 21 : .hasfile = b->tvheap->hasfile,
607 : .refs = ATOMIC_VAR_INIT(1),
608 : };
609 21 : strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
610 21 : if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
611 0 : MT_lock_unset(&b->theaplock);
612 0 : return GDK_FAIL;
613 : }
614 21 : tvp = b->tvheap->parentid;
615 21 : HEAPdecref(b->tvheap, false);
616 21 : b->tvheap = th;
617 : }
618 : } else {
619 : /* do heap-delete of all inserted atoms */
620 7 : void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
621 :
622 : /* TYPE_str has no del method, so we shouldn't get here */
623 7 : assert(tatmdel == NULL || b->twidth == sizeof(var_t));
624 0 : if (tatmdel) {
625 0 : BATiter bi = bat_iterator_nolock(b);
626 :
627 0 : for (p = b->batInserted, q = BATcount(b); p < q; p++)
628 0 : (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
629 0 : b->tvheap->dirty = true;
630 : }
631 : }
632 :
633 423 : b->batInserted = 0;
634 423 : b->batCount = 0;
635 423 : if (b->ttype == TYPE_void)
636 0 : b->batCapacity = 0;
637 423 : b->theap->free = 0;
638 423 : BAThseqbase(b, 0);
639 423 : BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
640 423 : b->theap->dirty = true;
641 423 : b->tnonil = true;
642 423 : b->tnil = false;
643 423 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
644 423 : b->tnosorted = b->tnorevsorted = 0;
645 423 : b->tkey = true;
646 423 : b->tnokey[0] = b->tnokey[1] = 0;
647 423 : b->tminpos = b->tmaxpos = BUN_NONE;
648 423 : b->tunique_est = 0;
649 423 : MT_lock_unset(&b->theaplock);
650 423 : if (tvp != 0 && tvp != b->batCacheid)
651 0 : BBPrelease(tvp);
652 : return GDK_SUCCEED;
653 : }
654 :
655 : /* free a cached BAT; leave the bat descriptor cached */
656 : void
657 566548 : BATfree(BAT *b)
658 : {
659 566548 : if (b == NULL)
660 : return;
661 :
662 : /* deallocate all memory for a bat */
663 566548 : MT_rwlock_rdlock(&b->thashlock);
664 566548 : BUN nunique = BUN_NONE;
665 566548 : if (b->thash && b->thash != (Hash *) 1) {
666 1980 : nunique = b->thash->nunique;
667 : }
668 566548 : MT_rwlock_rdunlock(&b->thashlock);
669 566548 : HASHfree(b);
670 566548 : IMPSfree(b);
671 566548 : OIDXfree(b);
672 566548 : STRMPfree(b);
673 566548 : RTREEfree(b);
674 566548 : MT_lock_set(&b->theaplock);
675 566548 : if (nunique != BUN_NONE) {
676 1980 : b->tunique_est = (double) nunique;
677 : }
678 : /* wait until there are no other references to the heap; a
679 : * reference is possible in e.g. BBPsync that uses a
680 : * bat_iterator directly on the BBP_desc, i.e. without fix */
681 566548 : while (b->theap && (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1) {
682 0 : MT_lock_unset(&b->theaplock);
683 0 : MT_sleep_ms(1);
684 566548 : MT_lock_set(&b->theaplock);
685 : }
686 566548 : if (b->theap) {
687 566548 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) == 1);
688 566548 : assert(b->theap->parentid == b->batCacheid);
689 566548 : HEAPfree(b->theap, false);
690 : }
691 : /* wait until there are no other references to the heap; a
692 : * reference is possible in e.g. BBPsync that uses a
693 : * bat_iterator directly on the BBP_desc, i.e. without fix */
694 30226 : while (b->tvheap && (ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) > 1) {
695 0 : MT_lock_unset(&b->theaplock);
696 0 : MT_sleep_ms(1);
697 566548 : MT_lock_set(&b->theaplock);
698 : }
699 566548 : if (b->tvheap) {
700 30226 : assert((ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) == 1);
701 30226 : assert(b->tvheap->parentid == b->batCacheid);
702 30226 : HEAPfree(b->tvheap, false);
703 : }
704 566548 : MT_lock_unset(&b->theaplock);
705 : }
706 :
707 : /* free a cached BAT descriptor */
708 : void
709 23525006 : BATdestroy(BAT *b)
710 : {
711 23525006 : if (b->tvheap) {
712 29066 : GDKfree(b->tvheap);
713 : }
714 23525006 : PROPdestroy_nolock(b);
715 23507511 : MT_lock_destroy(&b->theaplock);
716 23528620 : MT_lock_destroy(&b->batIdxLock);
717 23537003 : MT_rwlock_destroy(&b->thashlock);
718 23536905 : if (b->theap) {
719 562694 : GDKfree(b->theap);
720 : }
721 23546746 : if (b->oldtail) {
722 0 : ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
723 : /* the bat has not been committed, so we cannot remove
724 : * the old tail file */
725 0 : HEAPdecref(b->oldtail, false);
726 0 : b->oldtail = NULL;
727 : }
728 23546746 : *b = (BAT) {
729 : .batCacheid = 0,
730 : };
731 23546746 : }
732 :
733 : /*
734 : * @+ BAT copying
735 : *
736 : * BAT copying is an often used operation. So it deserves attention.
737 : * When making a copy of a BAT, the following aspects are of
738 : * importance:
739 : *
740 : * - the requested head and tail types. The purpose of the copy may be
741 : * to slightly change these types (e.g. void <-> oid). We may also
742 : * remap between types as long as they share the same
743 : * ATOMstorage(type), i.e. the types have the same physical
744 : * implementation. We may even want to allow 'dirty' trick such as
745 : * viewing a flt-column suddenly as int.
746 : *
747 : * To allow such changes, the desired column-types is a
748 : * parameter of COLcopy.
749 : *
750 : * - access mode. If we want a read-only copy of a read-only BAT, a
751 : * VIEW may do (in this case, the user may be after just an
752 : * independent BAT header and id). This is indicated by the
753 : * parameter (writable = FALSE).
754 : *
755 : * In other cases, we really want an independent physical copy
756 : * (writable = TRUE). Changing the mode to BAT_WRITE will be a
757 : * zero-cost operation if the BAT was copied with (writable = TRUE).
758 : *
759 : * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
760 : * TRUE).
761 : *
762 : * In these cases the copy becomes a logical view on the original,
763 : * which ensures that the original cannot be modified or destroyed
764 : * (which could affect the shared heaps).
765 : */
766 : static bool
767 431 : wrongtype(int t1, int t2)
768 : {
769 : /* check if types are compatible. be extremely forgiving */
770 431 : if (t1 != TYPE_void) {
771 431 : t1 = ATOMtype(ATOMstorage(t1));
772 431 : t2 = ATOMtype(ATOMstorage(t2));
773 431 : if (t1 != t2) {
774 364 : if (ATOMvarsized(t1) ||
775 364 : ATOMvarsized(t2) ||
776 364 : t1 == TYPE_msk || t2 == TYPE_msk ||
777 364 : ATOMsize(t1) != ATOMsize(t2))
778 0 : return true;
779 : }
780 : }
781 : return false;
782 : }
783 :
784 : /*
785 : * There are four main implementation cases:
786 : * (1) we are allowed to return a view (zero effort),
787 : * (2) the result is void,void (zero effort),
788 : * (3) we can copy the heaps (memcopy, or even VM page sharing)
789 : * (4) we must insert BUN-by-BUN into the result (fallback)
790 : * The latter case is still optimized for the case that the result
791 : * is bat[void,T] for a simple fixed-size type T. In that case we
792 : * do inline array[T] inserts.
793 : */
794 : BAT *
795 48301 : COLcopy(BAT *b, int tt, bool writable, role_t role)
796 : {
797 48301 : bool slowcopy = false;
798 48301 : BAT *bn = NULL;
799 48301 : BATiter bi;
800 48301 : char strhash[GDK_STRHASHSIZE];
801 :
802 48301 : BATcheck(b, NULL);
803 :
804 : /* maybe a bit ugly to change the requested bat type?? */
805 48301 : if (b->ttype == TYPE_void && !writable)
806 48301 : tt = TYPE_void;
807 :
808 48301 : if (tt != b->ttype && wrongtype(tt, b->ttype)) {
809 0 : GDKerror("wrong tail-type requested\n");
810 0 : return NULL;
811 : }
812 :
813 : /* in case of a string bat, we save the string heap hash table
814 : * while we have the lock so that we can restore it in the copy;
815 : * this is because during our operation, a parallel thread could
816 : * be adding strings to the vheap which would modify the hash
817 : * table and that would result in buckets containing values
818 : * beyond the original vheap that we're copying */
819 48301 : MT_lock_set(&b->theaplock);
820 48336 : BAT *pb = NULL, *pvb = NULL;
821 48336 : if (b->theap->parentid != b->batCacheid) {
822 10791 : pb = BBP_desc(b->theap->parentid);
823 10791 : MT_lock_set(&pb->theaplock);
824 : }
825 48343 : if (b->tvheap &&
826 16448 : b->tvheap->parentid != b->batCacheid &&
827 11499 : b->tvheap->parentid != b->theap->parentid) {
828 10834 : pvb = BBP_desc(b->tvheap->parentid);
829 10834 : MT_lock_set(&pvb->theaplock);
830 : }
831 48341 : bi = bat_iterator_nolock(b);
832 48341 : if (ATOMstorage(b->ttype) == TYPE_str && b->tvheap->free >= GDK_STRHASHSIZE)
833 12195 : memcpy(strhash, b->tvheap->base, GDK_STRHASHSIZE);
834 :
835 48341 : bat_iterator_incref(&bi);
836 48340 : if (pvb)
837 10838 : MT_lock_unset(&pvb->theaplock);
838 48341 : if (pb)
839 10791 : MT_lock_unset(&pb->theaplock);
840 48347 : MT_lock_unset(&b->theaplock);
841 :
842 : /* first try case (1); create a view, possibly with different
843 : * atom-types */
844 48333 : if (!writable &&
845 48333 : role == TRANSIENT &&
846 20283 : bi.restricted == BAT_READ &&
847 16159 : ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
848 16159 : (bi.h == NULL ||
849 16159 : bi.h->parentid == b->batCacheid ||
850 3624 : BBP_desc(bi.h->parentid)->batRestricted == BAT_READ)) {
851 16159 : bn = VIEWcreate(b->hseqbase, b, 0, BUN_MAX);
852 16167 : if (bn == NULL) {
853 0 : goto bunins_failed;
854 : }
855 16167 : if (tt != bn->ttype) {
856 64 : bn->ttype = tt;
857 64 : if (bn->tvheap && !ATOMvarsized(tt)) {
858 0 : if (bn->tvheap->parentid != bn->batCacheid)
859 0 : BBPrelease(bn->tvheap->parentid);
860 0 : HEAPdecref(bn->tvheap, false);
861 0 : bn->tvheap = NULL;
862 : }
863 64 : bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
864 : }
865 16167 : bat_iterator_end(&bi);
866 16167 : return bn;
867 : } else {
868 : /* check whether we need case (4); BUN-by-BUN copy (by
869 : * setting slowcopy to true) */
870 32174 : if (ATOMsize(tt) != ATOMsize(bi.type)) {
871 : /* oops, void materialization */
872 : slowcopy = true;
873 31808 : } else if (bi.h && bi.h->parentid != b->batCacheid &&
874 7166 : BATcapacity(BBP_desc(bi.h->parentid)) > bi.count + bi.count) {
875 : /* reduced slice view: do not copy too much
876 : * garbage */
877 : slowcopy = true;
878 24832 : } else if (bi.vh && bi.vh->parentid != b->batCacheid &&
879 10264 : BATcount(BBP_desc(bi.vh->parentid)) > bi.count + bi.count) {
880 : /* reduced vheap view: do not copy too much
881 : * garbage; this really is a heuristic since the
882 : * vheap could be used completely, even if the
883 : * offset heap is only (less than) half the size
884 : * of the parent's offset heap */
885 9529 : slowcopy = true;
886 : }
887 :
888 32174 : bn = COLnew2(b->hseqbase, tt, bi.count, role, bi.width);
889 32141 : if (bn == NULL) {
890 0 : goto bunins_failed;
891 : }
892 32141 : if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
893 : /* this combination can happen since the last
894 : * argument of COLnew2 not being zero triggers a
895 : * skip in the allocation of the tvheap */
896 12434 : if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
897 0 : goto bunins_failed;
898 : }
899 : }
900 :
901 32142 : if (tt == TYPE_void) {
902 : /* case (2): a void,void result => nothing to
903 : * copy! */
904 1128 : bn->theap->free = 0;
905 31014 : } else if (!slowcopy) {
906 : /* case (3): just copy the heaps */
907 21503 : if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
908 0 : goto bunins_failed;
909 : }
910 21514 : memcpy(bn->theap->base, bi.base, bi.hfree);
911 21514 : bn->theap->free = bi.hfree;
912 21514 : bn->theap->dirty = true;
913 21514 : if (bn->tvheap) {
914 9549 : memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
915 9549 : bn->tvheap->free = bi.vhfree;
916 9549 : bn->tvheap->dirty = true;
917 9549 : bn->tascii = bi.ascii;
918 9549 : if (ATOMstorage(b->ttype) == TYPE_str && bi.vhfree >= GDK_STRHASHSIZE)
919 8718 : memcpy(bn->tvheap->base, strhash, GDK_STRHASHSIZE);
920 : }
921 :
922 : /* make sure we use the correct capacity */
923 21514 : if (ATOMstorage(bn->ttype) == TYPE_msk)
924 0 : bn->batCapacity = (BUN) (bn->theap->size * 8);
925 21514 : else if (bn->ttype)
926 21514 : bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
927 : else
928 0 : bn->batCapacity = 0;
929 19035 : } else if (tt != TYPE_void || ATOMextern(tt)) {
930 : /* case (4): one-by-one BUN insert (really slow) */
931 9511 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
932 :
933 7775208 : TIMEOUT_LOOP_IDX_DECL(p, bi.count, qry_ctx) {
934 7755705 : const void *t = BUNtail(bi, p);
935 :
936 7750209 : if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
937 0 : goto bunins_failed;
938 : }
939 : }
940 9529 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
941 9524 : bn->theap->dirty |= bi.count > 0;
942 : } else if (tt != TYPE_void && bi.type == TYPE_void) {
943 : /* case (4): optimized for unary void
944 : * materialization */
945 : oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
946 : const oid inc = !is_oid_nil(cur);
947 :
948 : bn->theap->free = bi.count * sizeof(oid);
949 : bn->theap->dirty |= bi.count > 0;
950 : for (BUN p = 0; p < bi.count; p++) {
951 : dst[p] = cur;
952 : cur += inc;
953 : }
954 : } else if (ATOMstorage(bi.type) == TYPE_msk) {
955 : /* convert number of bits to number of bytes,
956 : * and round the latter up to a multiple of
957 : * 4 (copy in units of 4 bytes) */
958 : bn->theap->free = ((bi.count + 31) / 32) * 4;
959 : bn->theap->dirty |= bi.count > 0;
960 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
961 : } else {
962 : /* case (4): optimized for simple array copy */
963 : bn->theap->free = bi.count << bn->tshift;
964 : bn->theap->dirty |= bi.count > 0;
965 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
966 : }
967 : /* copy all properties (size+other) from the source bat */
968 32166 : BATsetcount(bn, bi.count);
969 : }
970 : /* set properties (note that types may have changed in the copy) */
971 63949 : if (ATOMtype(tt) == ATOMtype(bi.type)) {
972 32155 : if (ATOMtype(tt) == TYPE_oid) {
973 8025 : BATtseqbase(bn, bi.tseq);
974 : } else {
975 24130 : BATtseqbase(bn, oid_nil);
976 : }
977 32102 : BATkey(bn, bi.key);
978 32091 : bn->tsorted = bi.sorted;
979 32091 : bn->trevsorted = bi.revsorted;
980 32091 : bn->tnorevsorted = bi.norevsorted;
981 32091 : if (bi.nokey[0] != bi.nokey[1]) {
982 2685 : bn->tnokey[0] = bi.nokey[0];
983 2685 : bn->tnokey[1] = bi.nokey[1];
984 : } else {
985 29406 : bn->tnokey[0] = bn->tnokey[1] = 0;
986 : }
987 32091 : bn->tnosorted = bi.nosorted;
988 32091 : bn->tnonil = bi.nonil;
989 32091 : bn->tnil = bi.nil;
990 32091 : bn->tminpos = bi.minpos;
991 32091 : bn->tmaxpos = bi.maxpos;
992 32091 : bn->tunique_est = bi.unique_est;
993 3 : } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
994 3 : ATOMcompare(tt) == ATOMcompare(b->ttype)) {
995 3 : BUN h = bi.count;
996 3 : bn->tsorted = bi.sorted;
997 3 : bn->trevsorted = bi.revsorted;
998 3 : BATkey(bn, bi.key);
999 3 : bn->tnonil = bi.nonil;
1000 3 : bn->tnil = bi.nil;
1001 3 : if (bi.nosorted > 0 && bi.nosorted < h)
1002 1 : bn->tnosorted = bi.nosorted;
1003 : else
1004 2 : bn->tnosorted = 0;
1005 3 : if (bi.norevsorted > 0 && bi.norevsorted < h)
1006 3 : bn->tnorevsorted = bi.norevsorted;
1007 : else
1008 0 : bn->tnorevsorted = 0;
1009 3 : if (bi.nokey[0] < h &&
1010 3 : bi.nokey[1] < h &&
1011 : bi.nokey[0] != bi.nokey[1]) {
1012 0 : bn->tnokey[0] = bi.nokey[0];
1013 0 : bn->tnokey[1] = bi.nokey[1];
1014 : } else {
1015 3 : bn->tnokey[0] = bn->tnokey[1] = 0;
1016 : }
1017 3 : bn->tminpos = bi.minpos;
1018 3 : bn->tmaxpos = bi.maxpos;
1019 3 : bn->tunique_est = bi.unique_est;
1020 : } else {
1021 0 : bn->tsorted = bn->trevsorted = false; /* set based on count later */
1022 0 : bn->tnonil = bn->tnil = false;
1023 0 : bn->tkey = false;
1024 0 : bn->tnosorted = bn->tnorevsorted = 0;
1025 0 : bn->tnokey[0] = bn->tnokey[1] = 0;
1026 : }
1027 32094 : if (BATcount(bn) <= 1) {
1028 5984 : bn->tsorted = ATOMlinear(b->ttype);
1029 5984 : bn->trevsorted = ATOMlinear(b->ttype);
1030 5984 : bn->tkey = true;
1031 : }
1032 32094 : bat_iterator_end(&bi);
1033 32163 : if (!writable)
1034 4116 : bn->batRestricted = BAT_READ;
1035 32163 : TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
1036 : ALGOBATPAR(b), ALGOBATPAR(bn));
1037 : return bn;
1038 0 : bunins_failed:
1039 0 : bat_iterator_end(&bi);
1040 0 : BBPreclaim(bn);
1041 : return NULL;
1042 : }
1043 :
1044 : /* Append an array of values of length count to the bat. For
1045 : * fixed-sized values, `values' is an array of values, for
1046 : * variable-sized values, `values' is an array of pointers to values.
1047 : * If values equals NULL, count times nil will be appended. */
1048 : gdk_return
1049 54569071 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
1050 : {
1051 54569071 : BUN p;
1052 54569071 : BUN nunique = 0;
1053 :
1054 54569071 : BATcheck(b, GDK_FAIL);
1055 :
1056 54569071 : assert(!VIEWtparent(b));
1057 :
1058 54569071 : if (count == 0)
1059 : return GDK_SUCCEED;
1060 :
1061 54565404 : TRC_DEBUG(ALGO, ALGOBATFMT " appending " BUNFMT " values%s\n", ALGOBATPAR(b), count, values ? "" : " (all nil)");
1062 :
1063 54556875 : p = BATcount(b); /* insert at end */
1064 54556875 : if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
1065 0 : GDKerror("bat too large\n");
1066 0 : return GDK_FAIL;
1067 : }
1068 :
1069 54556875 : ALIGNapp(b, force, GDK_FAIL);
1070 : /* load hash so that we can maintain it */
1071 54599167 : (void) BATcheckhash(b);
1072 :
1073 54593880 : if (b->ttype == TYPE_void && BATtdense(b)) {
1074 0 : const oid *ovals = values;
1075 0 : bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
1076 0 : if (ovals) {
1077 0 : for (BUN i = 1; dense && i < count; i++) {
1078 0 : dense = ovals[i - 1] + 1 == ovals[i];
1079 : }
1080 : }
1081 0 : if (dense) {
1082 0 : MT_lock_set(&b->theaplock);
1083 0 : if (b->batCount == 0)
1084 0 : b->tseqbase = ovals ? ovals[0] : oid_nil;
1085 0 : BATsetcount(b, BATcount(b) + count);
1086 0 : MT_lock_unset(&b->theaplock);
1087 0 : return GDK_SUCCEED;
1088 : } else {
1089 : /* we need to materialize b; allocate enough capacity */
1090 0 : if (BATmaterialize(b, BATcount(b) + count) != GDK_SUCCEED)
1091 : return GDK_FAIL;
1092 : }
1093 : }
1094 :
1095 54593880 : if (unshare_varsized_heap(b) != GDK_SUCCEED) {
1096 : return GDK_FAIL;
1097 : }
1098 :
1099 54578831 : if (BATcount(b) + count > BATcapacity(b)) {
1100 : /* if needed space exceeds a normal growth extend just
1101 : * with what's needed */
1102 10606 : BUN ncap = BATcount(b) + count;
1103 10606 : BUN grows = BATgrows(b);
1104 :
1105 10602 : if (ncap > grows)
1106 : grows = ncap;
1107 10602 : gdk_return rc = BATextend(b, grows);
1108 10611 : if (rc != GDK_SUCCEED)
1109 : return rc;
1110 : }
1111 :
1112 54578836 : const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1113 54578836 : MT_lock_set(&b->theaplock);
1114 54601371 : BATiter bi = bat_iterator_nolock(b);
1115 54601371 : const ValRecord *prop;
1116 54601371 : ValRecord minprop, maxprop;
1117 54601371 : const void *minbound = NULL, *maxbound = NULL;
1118 54601382 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1119 11 : VALcopy(&minprop, prop) != NULL)
1120 11 : minbound = VALptr(&minprop);
1121 54503403 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1122 10 : VALcopy(&maxprop, prop) != NULL)
1123 10 : maxbound = VALptr(&maxprop);
1124 54563067 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1125 54593273 : MT_lock_unset(&b->theaplock);
1126 54623034 : MT_rwlock_wrlock(&b->thashlock);
1127 54622632 : if (values && b->ttype) {
1128 54614163 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1129 54614163 : const void *atomnil = ATOMnilptr(b->ttype);
1130 54614163 : const void *minvalp = NULL, *maxvalp = NULL;
1131 54614163 : if (b->tvheap) {
1132 21995453 : if (bi.minpos != BUN_NONE)
1133 19110418 : minvalp = BUNtvar(bi, bi.minpos);
1134 21989834 : if (bi.maxpos != BUN_NONE)
1135 19105134 : maxvalp = BUNtvar(bi, bi.maxpos);
1136 21993494 : const void *vbase = b->tvheap->base;
1137 44000464 : for (BUN i = 0; i < count; i++) {
1138 22011887 : t = ((void **) values)[i];
1139 22011887 : bool isnil = atomcmp(t, atomnil) == 0;
1140 22006536 : gdk_return rc;
1141 22006536 : if (notnull && isnil) {
1142 0 : assert(0);
1143 : GDKerror("NULL value not within bounds\n");
1144 : rc = GDK_FAIL;
1145 22006536 : } else if (minbound &&
1146 22006536 : !isnil &&
1147 0 : atomcmp(t, minbound) < 0) {
1148 0 : assert(0);
1149 : GDKerror("value not within bounds\n");
1150 : rc = GDK_FAIL;
1151 22006536 : } else if (maxbound &&
1152 0 : !isnil &&
1153 0 : atomcmp(t, maxbound) >= 0) {
1154 0 : assert(0);
1155 : GDKerror("value not within bounds\n");
1156 : rc = GDK_FAIL;
1157 : } else {
1158 22006536 : rc = tfastins_nocheckVAR(b, p, t);
1159 : }
1160 22006956 : if (rc != GDK_SUCCEED) {
1161 0 : MT_rwlock_wrunlock(&b->thashlock);
1162 0 : if (minbound)
1163 0 : VALclear(&minprop);
1164 0 : if (maxbound)
1165 0 : VALclear(&maxprop);
1166 0 : return rc;
1167 : }
1168 22006956 : if (vbase != b->tvheap->base) {
1169 : /* tvheap changed location, so
1170 : * pointers may need to be
1171 : * updated (not if they were
1172 : * initialized from t below, but
1173 : * we don't know) */
1174 3390 : BUN minpos = bi.minpos;
1175 3390 : BUN maxpos = bi.maxpos;
1176 3390 : MT_lock_set(&b->theaplock);
1177 3390 : bi = bat_iterator_nolock(b);
1178 3390 : MT_lock_unset(&b->theaplock);
1179 3390 : bi.minpos = minpos;
1180 3390 : bi.maxpos = maxpos;
1181 3390 : vbase = b->tvheap->base;
1182 3390 : if (bi.minpos != BUN_NONE)
1183 2643 : minvalp = BUNtvar(bi, bi.minpos);
1184 3390 : if (bi.maxpos != BUN_NONE)
1185 2642 : maxvalp = BUNtvar(bi, bi.maxpos);
1186 : }
1187 22006956 : if (!isnil) {
1188 20333640 : if (p == 0) {
1189 202996 : bi.minpos = bi.maxpos = 0;
1190 202996 : minvalp = maxvalp = t;
1191 : } else {
1192 39195115 : if (bi.minpos != BUN_NONE &&
1193 19064641 : atomcmp(minvalp, t) > 0) {
1194 82446 : bi.minpos = p;
1195 82446 : minvalp = t;
1196 : }
1197 39195868 : if (bi.maxpos != BUN_NONE &&
1198 19065210 : atomcmp(maxvalp, t) < 0) {
1199 2247378 : bi.maxpos = p;
1200 2247378 : maxvalp = t;
1201 : }
1202 : }
1203 : } else {
1204 1673316 : b->tnil = true;
1205 1673316 : b->tnonil = false;
1206 : }
1207 22006970 : p++;
1208 : }
1209 21988577 : if (minbound)
1210 0 : VALclear(&minprop);
1211 21989352 : if (maxbound)
1212 0 : VALclear(&maxprop);
1213 21985536 : if (b->thash) {
1214 5811 : p -= count;
1215 11622 : for (BUN i = 0; i < count; i++) {
1216 5811 : t = ((void **) values)[i];
1217 5811 : HASHappend_locked(b, p, t);
1218 5811 : p++;
1219 : }
1220 5811 : nunique = b->thash ? b->thash->nunique : 0;
1221 : }
1222 32618710 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1223 7881 : bi.minpos = bi.maxpos = BUN_NONE;
1224 7881 : minvalp = maxvalp = NULL;
1225 7881 : b->tnil = false;
1226 7881 : b->tnonil = true;
1227 15762 : for (BUN i = 0; i < count; i++) {
1228 7881 : t = (void *) ((char *) values + (i << b->tshift));
1229 7881 : mskSetVal(b, p, *(msk *) t);
1230 7881 : p++;
1231 : }
1232 : } else {
1233 32610829 : if (bi.minpos != BUN_NONE)
1234 31213789 : minvalp = BUNtloc(bi, bi.minpos);
1235 32610829 : if (bi.maxpos != BUN_NONE)
1236 31260514 : maxvalp = BUNtloc(bi, bi.maxpos);
1237 65262012 : for (BUN i = 0; i < count; i++) {
1238 32658782 : t = (void *) ((char *) values + (i << b->tshift));
1239 32658782 : gdk_return rc = tfastins_nocheckFIX(b, p, t);
1240 32652170 : if (rc != GDK_SUCCEED) {
1241 0 : MT_rwlock_wrunlock(&b->thashlock);
1242 0 : return rc;
1243 : }
1244 32652170 : if (b->thash) {
1245 482125 : HASHappend_locked(b, p, t);
1246 : }
1247 32652170 : if (atomcmp(t, atomnil) != 0) {
1248 31751015 : if (p == 0) {
1249 370674 : bi.minpos = bi.maxpos = 0;
1250 370674 : minvalp = maxvalp = t;
1251 : } else {
1252 62620632 : if (bi.minpos != BUN_NONE &&
1253 31238590 : atomcmp(minvalp, t) > 0) {
1254 40184 : bi.minpos = p;
1255 40184 : minvalp = t;
1256 : }
1257 62666736 : if (bi.maxpos != BUN_NONE &&
1258 31285456 : atomcmp(maxvalp, t) < 0) {
1259 7667580 : bi.maxpos = p;
1260 7667580 : maxvalp = t;
1261 : }
1262 : }
1263 : } else {
1264 899229 : b->tnil = true;
1265 899229 : b->tnonil = false;
1266 : }
1267 32651183 : p++;
1268 : }
1269 32603230 : nunique = b->thash ? b->thash->nunique : 0;
1270 : }
1271 : } else {
1272 : /* inserting nils, unless it's msk */
1273 18531 : for (BUN i = 0; i < count; i++) {
1274 10061 : gdk_return rc = tfastins_nocheck(b, p, t);
1275 10062 : if (rc != GDK_SUCCEED) {
1276 0 : MT_rwlock_wrunlock(&b->thashlock);
1277 0 : return rc;
1278 : }
1279 10062 : if (b->thash) {
1280 0 : HASHappend_locked(b, p, t);
1281 : }
1282 10062 : p++;
1283 : }
1284 8470 : nunique = b->thash ? b->thash->nunique : 0;
1285 8470 : b->tnil = b->ttype != TYPE_msk;
1286 8470 : b->tnonil = false;
1287 : }
1288 54605117 : MT_lock_set(&b->theaplock);
1289 54582467 : b->tminpos = bi.minpos;
1290 54582467 : b->tmaxpos = bi.maxpos;
1291 54582467 : if (count > BATcount(b) / gdk_unique_estimate_keep_fraction)
1292 15981535 : b->tunique_est = 0;
1293 :
1294 54582467 : if (b->ttype == TYPE_oid) {
1295 : /* spend extra effort on oid (possible candidate list) */
1296 534652 : if (values == NULL || is_oid_nil(((oid *) values)[0])) {
1297 158 : b->tsorted = false;
1298 158 : b->trevsorted = false;
1299 158 : b->tkey = false;
1300 158 : b->tseqbase = oid_nil;
1301 : } else {
1302 534494 : if (b->batCount == 0) {
1303 6336 : b->tsorted = true;
1304 6336 : b->trevsorted = true;
1305 6336 : b->tkey = true;
1306 6336 : b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
1307 : } else {
1308 528158 : if (!is_oid_nil(b->tseqbase) &&
1309 320689 : (count > 1 ||
1310 320689 : b->tseqbase + b->batCount != ((oid *) values)[0]))
1311 2368 : b->tseqbase = oid_nil;
1312 528158 : if (b->tsorted && !is_oid_nil(((oid *) b->theap->base)[b->batCount - 1]) && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
1313 51 : b->tsorted = false;
1314 51 : if (b->tnosorted == 0)
1315 51 : b->tnosorted = b->batCount;
1316 : }
1317 528158 : if (b->trevsorted && !is_oid_nil(((oid *) values)[0]) && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
1318 5527 : b->trevsorted = false;
1319 5527 : if (b->tnorevsorted == 0)
1320 5527 : b->tnorevsorted = b->batCount;
1321 : }
1322 528158 : if (b->tkey) {
1323 525538 : if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
1324 17 : b->tkey = false;
1325 17 : if (b->tnokey[1] == 0) {
1326 17 : b->tnokey[0] = b->batCount - 1;
1327 17 : b->tnokey[1] = b->batCount;
1328 : }
1329 525521 : } else if (!b->tsorted && !b->trevsorted)
1330 37 : b->tkey = false;
1331 : }
1332 : }
1333 534494 : for (BUN i = 1; i < count; i++) {
1334 0 : if (is_oid_nil(((oid *) values)[i])) {
1335 0 : b->tsorted = false;
1336 0 : b->trevsorted = false;
1337 0 : b->tkey = false;
1338 0 : b->tseqbase = oid_nil;
1339 0 : break;
1340 : }
1341 0 : if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
1342 0 : b->tkey = false;
1343 0 : if (b->tnokey[1] == 0) {
1344 0 : b->tnokey[0] = b->batCount + i - 1;
1345 0 : b->tnokey[1] = b->batCount + i;
1346 : }
1347 0 : } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
1348 0 : b->tsorted = false;
1349 0 : if (b->tnosorted == 0)
1350 0 : b->tnosorted = b->batCount + i;
1351 0 : if (!b->trevsorted)
1352 0 : b->tkey = false;
1353 : } else {
1354 0 : if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
1355 0 : b->tseqbase = oid_nil;
1356 0 : b->trevsorted = false;
1357 0 : if (b->tnorevsorted == 0)
1358 0 : b->tnorevsorted = b->batCount + i;
1359 0 : if (!b->tsorted)
1360 0 : b->tkey = false;
1361 : }
1362 : }
1363 : }
1364 54047815 : } else if (!ATOMlinear(b->ttype)) {
1365 7881 : b->tsorted = b->trevsorted = b->tkey = false;
1366 54039934 : } else if (b->batCount == 0) {
1367 574214 : if (values == NULL) {
1368 0 : b->tsorted = b->trevsorted = true;
1369 0 : b->tkey = count == 1;
1370 0 : b->tunique_est = 1;
1371 : } else {
1372 574214 : int c;
1373 574214 : switch (count) {
1374 573893 : case 1:
1375 573893 : b->tsorted = b->trevsorted = b->tkey = true;
1376 573893 : b->tunique_est = 1;
1377 573893 : break;
1378 122 : case 2:
1379 122 : if (b->tvheap)
1380 41 : c = ATOMcmp(b->ttype,
1381 : ((void **) values)[0],
1382 : ((void **) values)[1]);
1383 : else
1384 81 : c = ATOMcmp(b->ttype,
1385 : values,
1386 : (char *) values + b->twidth);
1387 122 : b->tsorted = c <= 0;
1388 122 : b->tnosorted = !b->tsorted;
1389 122 : b->trevsorted = c >= 0;
1390 122 : b->tnorevsorted = !b->trevsorted;
1391 122 : b->tkey = c != 0;
1392 122 : b->tnokey[0] = 0;
1393 122 : b->tnokey[1] = !b->tkey;
1394 122 : b->tunique_est = (double) (1 + b->tkey);
1395 122 : break;
1396 199 : default:
1397 199 : b->tsorted = b->trevsorted = b->tkey = false;
1398 199 : break;
1399 : }
1400 : }
1401 53465720 : } else if (b->batCount == 1 && count == 1) {
1402 427417 : bi = bat_iterator_nolock(b);
1403 427417 : t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1404 427417 : if (values != NULL) {
1405 424843 : if (b->tvheap)
1406 159671 : t = ((void **) values)[0];
1407 : else
1408 : t = values;
1409 : }
1410 427417 : int c = ATOMcmp(b->ttype, BUNtail(bi, 0), t);
1411 423858 : b->tsorted = c <= 0;
1412 423858 : b->tnosorted = !b->tsorted;
1413 423858 : b->trevsorted = c >= 0;
1414 423858 : b->tnorevsorted = !b->trevsorted;
1415 423858 : b->tkey = c != 0;
1416 423858 : b->tnokey[0] = 0;
1417 423858 : b->tnokey[1] = !b->tkey;
1418 423858 : b->tunique_est = (double) (1 + b->tkey);
1419 : } else {
1420 53038303 : b->tsorted = b->trevsorted = b->tkey = false;
1421 : }
1422 54578908 : BATsetcount(b, p);
1423 54578966 : if (nunique != 0)
1424 487936 : b->tunique_est = (double) nunique;
1425 54578966 : MT_lock_unset(&b->theaplock);
1426 54616017 : MT_rwlock_wrunlock(&b->thashlock);
1427 :
1428 54613262 : IMPSdestroy(b); /* no support for inserts in imprints yet */
1429 54622318 : OIDXdestroy(b);
1430 54619366 : STRMPdestroy(b); /* TODO: use STRMPappendBitstring */
1431 54613430 : RTREEdestroy(b);
1432 54613430 : return GDK_SUCCEED;
1433 : }
1434 :
1435 : /* Append a single value to the bat. */
1436 : gdk_return
1437 39761957 : BUNappend(BAT *b, const void *t, bool force)
1438 : {
1439 39761957 : return BUNappendmulti(b, b->ttype && b->tvheap ? (const void *) &t : (const void *) t, 1, force);
1440 : }
1441 :
1442 : gdk_return
1443 4 : BUNdelete(BAT *b, oid o)
1444 : {
1445 4 : BUN p;
1446 4 : BATiter bi = bat_iterator_nolock(b);
1447 4 : const void *val;
1448 4 : bool locked = false;
1449 4 : BUN nunique;
1450 :
1451 4 : assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
1452 4 : if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
1453 : /* value already not there */
1454 : return GDK_SUCCEED;
1455 : }
1456 4 : assert(BATcount(b) > 0); /* follows from "if" above */
1457 4 : p = o - b->hseqbase;
1458 4 : if (p < b->batInserted) {
1459 0 : GDKerror("cannot delete committed value\n");
1460 0 : return GDK_FAIL;
1461 : }
1462 4 : TRC_DEBUG(ALGO, ALGOBATFMT " deleting oid " OIDFMT "\n", ALGOBATPAR(b), o);
1463 : /* load hash so that we can maintain it */
1464 4 : (void) BATcheckhash(b);
1465 :
1466 4 : val = BUNtail(bi, p);
1467 : /* writing the values should be locked, reading could be done
1468 : * unlocked (since we're the only thread that should be changing
1469 : * anything) */
1470 4 : MT_lock_set(&b->theaplock);
1471 4 : if (b->tmaxpos == p)
1472 1 : b->tmaxpos = BUN_NONE;
1473 4 : if (b->tminpos == p)
1474 0 : b->tminpos = BUN_NONE;
1475 4 : MT_lock_unset(&b->theaplock);
1476 4 : nunique = HASHdelete(&bi, p, val);
1477 4 : ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
1478 4 : if (p != BATcount(b) - 1 &&
1479 2 : (b->ttype != TYPE_void || BATtdense(b))) {
1480 : /* replace to-be-delete BUN with last BUN; materialize
1481 : * void column before doing so */
1482 2 : if (b->ttype == TYPE_void &&
1483 0 : BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1484 : return GDK_FAIL;
1485 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1486 0 : msk mval = mskGetVal(b, BATcount(b) - 1);
1487 0 : assert(b->thash == NULL);
1488 0 : mskSetVal(b, p, mval);
1489 : /* don't leave garbage */
1490 0 : mskClr(b, BATcount(b) - 1);
1491 : } else {
1492 2 : val = Tloc(b, BATcount(b) - 1);
1493 2 : nunique = HASHdelete(&bi, BATcount(b) - 1, val);
1494 2 : memcpy(Tloc(b, p), val, b->twidth);
1495 2 : nunique = HASHinsert(&bi, p, val);
1496 2 : MT_lock_set(&b->theaplock);
1497 2 : locked = true;
1498 2 : if (b->tminpos == BATcount(b) - 1)
1499 0 : b->tminpos = p;
1500 2 : if (b->tmaxpos == BATcount(b) - 1)
1501 1 : b->tmaxpos = p;
1502 : }
1503 : /* no longer sorted */
1504 2 : if (!locked) {
1505 0 : MT_lock_set(&b->theaplock);
1506 0 : locked = true;
1507 : }
1508 2 : b->tsorted = b->trevsorted = false;
1509 2 : b->theap->dirty = true;
1510 : }
1511 4 : if (!locked)
1512 2 : MT_lock_set(&b->theaplock);
1513 4 : if (b->tnosorted >= p)
1514 0 : b->tnosorted = 0;
1515 4 : if (b->tnorevsorted >= p)
1516 1 : b->tnorevsorted = 0;
1517 4 : b->batCount--;
1518 4 : if (nunique != 0)
1519 0 : b->tunique_est = (double) nunique;
1520 4 : else if (BATcount(b) < gdk_unique_estimate_keep_fraction)
1521 4 : b->tunique_est = 0;
1522 4 : if (b->batCount <= 1) {
1523 : /* some trivial properties */
1524 0 : b->tkey = true;
1525 0 : b->tsorted = b->trevsorted = true;
1526 0 : b->tnosorted = b->tnorevsorted = 0;
1527 0 : if (b->batCount == 0) {
1528 0 : b->tnil = false;
1529 0 : b->tnonil = true;
1530 : }
1531 : }
1532 4 : MT_lock_unset(&b->theaplock);
1533 4 : IMPSdestroy(b);
1534 4 : OIDXdestroy(b);
1535 4 : return GDK_SUCCEED;
1536 : }
1537 :
1538 : /* @- BUN replace
1539 : * The last operation in this context is BUN replace. It assumes that
1540 : * the header denotes a key. The old value association is destroyed
1541 : * (if it exists in the first place) and the new value takes its
1542 : * place.
1543 : *
1544 : * In order to make updates on void columns workable; replaces on them
1545 : * are always done in-place. Performing them without bun-movements
1546 : * greatly simplifies the problem. The 'downside' is that when
1547 : * transaction management has to be performed, replaced values should
1548 : * be saved explicitly.
1549 : */
1550 : static gdk_return
1551 1044280 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
1552 : {
1553 1044280 : BUN prv, nxt;
1554 1044280 : const void *val;
1555 1044280 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1556 1044280 : const void *atomnil = ATOMnilptr(b->ttype);
1557 :
1558 1044280 : MT_lock_set(&b->theaplock);
1559 1043473 : BUN last = BATcount(b) - 1;
1560 1043473 : BATiter bi = bat_iterator_nolock(b);
1561 : /* zap alignment info */
1562 1043473 : if (!force && (b->batRestricted != BAT_WRITE ||
1563 561151 : ((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1))) {
1564 0 : MT_lock_unset(&b->theaplock);
1565 0 : GDKerror("access denied to %s, aborting.\n",
1566 : BATgetId(b));
1567 0 : assert(0);
1568 : return GDK_FAIL;
1569 : }
1570 1043473 : TRC_DEBUG(ALGO, ALGOBATFMT " replacing " BUNFMT " values\n", ALGOBATPAR(b), count);
1571 1037711 : if (b->ttype == TYPE_void) {
1572 0 : PROPdestroy(b);
1573 0 : b->tminpos = BUN_NONE;
1574 0 : b->tmaxpos = BUN_NONE;
1575 0 : b->tunique_est = 0.0;
1576 1037711 : } else if (count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1577 382689 : b->tunique_est = 0;
1578 : }
1579 1037711 : const ValRecord *prop;
1580 1037711 : ValRecord minprop, maxprop;
1581 1037711 : const void *minbound = NULL, *maxbound = NULL;
1582 1037711 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1583 0 : VALcopy(&minprop, prop) != NULL)
1584 0 : minbound = VALptr(&minprop);
1585 1038075 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1586 0 : VALcopy(&maxprop, prop) != NULL)
1587 0 : maxbound = VALptr(&maxprop);
1588 1038944 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1589 1046762 : MT_lock_unset(&b->theaplock);
1590 : /* load hash so that we can maintain it */
1591 1050835 : (void) BATcheckhash(b);
1592 1050402 : MT_rwlock_wrlock(&b->thashlock);
1593 2101337 : for (BUN i = 0; i < count; i++) {
1594 1051148 : BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
1595 1051101 : const void *t = b->ttype && b->tvheap ?
1596 1190924 : ((const void **) values)[i] :
1597 911372 : (const void *) ((const char *) values + (i << b->tshift));
1598 1051148 : bool isnil = atomnil && atomcmp(t, atomnil) == 0;
1599 1049393 : if (notnull && isnil) {
1600 0 : assert(0);
1601 : GDKerror("NULL value not within bounds\n");
1602 : MT_rwlock_wrunlock(&b->thashlock);
1603 : goto bailout;
1604 1049393 : } else if (!isnil &&
1605 0 : ((minbound &&
1606 1001346 : atomcmp(t, minbound) < 0) ||
1607 0 : (maxbound &&
1608 0 : atomcmp(t, maxbound) >= 0))) {
1609 0 : assert(0);
1610 : GDKerror("value not within bounds\n");
1611 : MT_rwlock_wrunlock(&b->thashlock);
1612 : goto bailout;
1613 : }
1614 :
1615 : /* retrieve old value, but if this comes from the
1616 : * logger, we need to deal with offsets that point
1617 : * outside of the valid vheap */
1618 1049036 : if (b->ttype == TYPE_void) {
1619 0 : val = BUNtpos(bi, p);
1620 1049036 : } else if (bi.type == TYPE_msk) {
1621 118 : val = BUNtmsk(bi, p);
1622 1048918 : } else if (b->tvheap) {
1623 138986 : size_t off = BUNtvaroff(bi, p);
1624 139522 : if (off < bi.vhfree)
1625 139696 : val = bi.vh->base + off;
1626 : else
1627 : val = NULL; /* bad offset */
1628 : } else {
1629 909932 : val = BUNtloc(bi, p);
1630 : }
1631 :
1632 1049746 : if (val) {
1633 1049746 : if (atomcmp(val, t) == 0)
1634 233079 : continue; /* nothing to do */
1635 817521 : if (!isnil &&
1636 165943 : b->tnil &&
1637 165944 : atomcmp(val, atomnil) == 0) {
1638 : /* if old value is nil and new value
1639 : * isn't, we're not sure anymore about
1640 : * the nil property, so we must clear
1641 : * it */
1642 165783 : MT_lock_set(&b->theaplock);
1643 165784 : b->tnil = false;
1644 165784 : MT_lock_unset(&b->theaplock);
1645 : }
1646 817525 : if (b->ttype != TYPE_void) {
1647 817050 : if (bi.maxpos != BUN_NONE) {
1648 526970 : if (!isnil && atomcmp(BUNtail(bi, bi.maxpos), t) < 0) {
1649 : /* new value is larger
1650 : * than previous
1651 : * largest */
1652 56168 : bi.maxpos = p;
1653 470800 : } else if (bi.maxpos == p && atomcmp(BUNtail(bi, bi.maxpos), t) != 0) {
1654 : /* old value is equal to
1655 : * largest and new value
1656 : * is smaller or nil (see
1657 : * above), so we don't
1658 : * know anymore which is
1659 : * the largest */
1660 499 : bi.maxpos = BUN_NONE;
1661 : }
1662 : }
1663 817048 : if (bi.minpos != BUN_NONE) {
1664 366380 : if (!isnil && atomcmp(BUNtail(bi, bi.minpos), t) > 0) {
1665 : /* new value is smaller
1666 : * than previous
1667 : * smallest */
1668 423 : bi.minpos = p;
1669 365957 : } else if (bi.minpos == p && atomcmp(BUNtail(bi, bi.minpos), t) != 0) {
1670 : /* old value is equal to
1671 : * smallest and new value
1672 : * is larger or nil (see
1673 : * above), so we don't
1674 : * know anymore which is
1675 : * the largest */
1676 618 : bi.minpos = BUN_NONE;
1677 : }
1678 : }
1679 : }
1680 817523 : HASHdelete_locked(&bi, p, val); /* first delete old value from hash */
1681 : } else {
1682 : /* out of range old value, so the properties and
1683 : * hash cannot be trusted */
1684 0 : PROPdestroy(b);
1685 0 : Hash *hs = b->thash;
1686 0 : if (hs) {
1687 0 : b->thash = NULL;
1688 0 : doHASHdestroy(b, hs);
1689 : }
1690 0 : MT_lock_set(&b->theaplock);
1691 0 : bi.minpos = BUN_NONE;
1692 0 : bi.maxpos = BUN_NONE;
1693 0 : b->tunique_est = 0.0;
1694 0 : MT_lock_unset(&b->theaplock);
1695 : }
1696 816235 : OIDXdestroy(b);
1697 817341 : IMPSdestroy(b);
1698 817866 : STRMPdestroy(b);
1699 817647 : RTREEdestroy(b);
1700 :
1701 817727 : if (b->tvheap && b->ttype) {
1702 66818 : var_t _d;
1703 66818 : ptr _ptr;
1704 66818 : _ptr = BUNtloc(bi, p);
1705 66818 : switch (b->twidth) {
1706 8751 : case 1:
1707 8751 : _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
1708 8751 : break;
1709 53214 : case 2:
1710 53214 : _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
1711 53214 : break;
1712 4853 : case 4:
1713 4853 : _d = (var_t) * (uint32_t *) _ptr;
1714 4853 : break;
1715 : #if SIZEOF_VAR_T == 8
1716 0 : case 8:
1717 0 : _d = (var_t) * (uint64_t *) _ptr;
1718 0 : break;
1719 : #endif
1720 : default:
1721 0 : MT_UNREACHABLE();
1722 : }
1723 66818 : MT_lock_set(&b->theaplock);
1724 66842 : if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
1725 0 : MT_lock_unset(&b->theaplock);
1726 0 : MT_rwlock_wrunlock(&b->thashlock);
1727 0 : goto bailout;
1728 : }
1729 66938 : MT_lock_unset(&b->theaplock);
1730 66548 : if (b->twidth < SIZEOF_VAR_T &&
1731 66991 : (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
1732 : /* doesn't fit in current heap, upgrade it */
1733 84 : if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
1734 0 : MT_rwlock_wrunlock(&b->thashlock);
1735 0 : goto bailout;
1736 : }
1737 : }
1738 : /* reinitialize iterator after possible heap upgrade */
1739 : {
1740 : /* save and restore minpos/maxpos */
1741 66548 : BUN minpos = bi.minpos;
1742 66548 : BUN maxpos = bi.maxpos;
1743 66548 : bi = bat_iterator_nolock(b);
1744 66548 : bi.minpos = minpos;
1745 66548 : bi.maxpos = maxpos;
1746 : }
1747 66548 : _ptr = BUNtloc(bi, p);
1748 66548 : switch (b->twidth) {
1749 8676 : case 1:
1750 8676 : * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
1751 8676 : break;
1752 53010 : case 2:
1753 53010 : * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
1754 53010 : break;
1755 4862 : case 4:
1756 4862 : * (uint32_t *) _ptr = (uint32_t) _d;
1757 4862 : break;
1758 : #if SIZEOF_VAR_T == 8
1759 0 : case 8:
1760 0 : * (uint64_t *) _ptr = (uint64_t) _d;
1761 0 : break;
1762 : #endif
1763 : default:
1764 66548 : MT_UNREACHABLE();
1765 : }
1766 750909 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1767 118 : mskSetVal(b, p, * (msk *) t);
1768 : } else {
1769 750791 : assert(BATatoms[b->ttype].atomPut == NULL);
1770 750791 : switch (ATOMsize(b->ttype)) {
1771 : case 0: /* void */
1772 : break;
1773 13081 : case 1:
1774 13081 : ((bte *) b->theap->base)[p] = * (bte *) t;
1775 13081 : break;
1776 6789 : case 2:
1777 6789 : ((sht *) b->theap->base)[p] = * (sht *) t;
1778 6789 : break;
1779 168956 : case 4:
1780 168956 : ((int *) b->theap->base)[p] = * (int *) t;
1781 168956 : break;
1782 561965 : case 8:
1783 561965 : ((lng *) b->theap->base)[p] = * (lng *) t;
1784 561965 : break;
1785 0 : case 16:
1786 : #ifdef HAVE_HGE
1787 0 : ((hge *) b->theap->base)[p] = * (hge *) t;
1788 : #else
1789 : ((uuid *) b->theap->base)[p] = * (uuid *) t;
1790 : #endif
1791 0 : break;
1792 0 : default:
1793 0 : memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
1794 0 : break;
1795 : }
1796 : }
1797 :
1798 817457 : HASHinsert_locked(&bi, p, t); /* insert new value into hash */
1799 :
1800 815884 : prv = p > 0 ? p - 1 : BUN_NONE;
1801 815884 : nxt = p < last ? p + 1 : BUN_NONE;
1802 :
1803 815884 : MT_lock_set(&b->theaplock);
1804 817036 : if (b->tsorted) {
1805 2956 : if (prv != BUN_NONE &&
1806 1167 : atomcmp(t, BUNtail(bi, prv)) < 0) {
1807 26 : b->tsorted = false;
1808 26 : b->tnosorted = p;
1809 2488 : } else if (nxt != BUN_NONE &&
1810 725 : atomcmp(t, BUNtail(bi, nxt)) > 0) {
1811 648 : b->tsorted = false;
1812 648 : b->tnosorted = nxt;
1813 1115 : } else if (b->ttype != TYPE_void && BATtdense(b)) {
1814 0 : if (prv != BUN_NONE &&
1815 0 : 1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
1816 0 : b->tseqbase = oid_nil;
1817 0 : } else if (nxt != BUN_NONE &&
1818 0 : * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
1819 0 : b->tseqbase = oid_nil;
1820 0 : } else if (prv == BUN_NONE &&
1821 0 : nxt == BUN_NONE) {
1822 0 : b->tseqbase = * (oid *) t;
1823 : }
1824 : }
1825 815247 : } else if (b->tnosorted >= p)
1826 3316 : b->tnosorted = 0;
1827 817036 : if (b->trevsorted) {
1828 1377 : if (prv != BUN_NONE &&
1829 406 : atomcmp(t, BUNtail(bi, prv)) > 0) {
1830 52 : b->trevsorted = false;
1831 52 : b->tnorevsorted = p;
1832 1057 : } else if (nxt != BUN_NONE &&
1833 138 : atomcmp(t, BUNtail(bi, nxt)) < 0) {
1834 27 : b->trevsorted = false;
1835 27 : b->tnorevsorted = nxt;
1836 : }
1837 816065 : } else if (b->tnorevsorted >= p)
1838 1696 : b->tnorevsorted = 0;
1839 817036 : if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
1840 938 : BATkey(b, false);
1841 816098 : } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
1842 865 : b->tnokey[0] = b->tnokey[1] = 0;
1843 817036 : if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
1844 592867 : b->tnonil = t && atomcmp(t, atomnil) != 0;
1845 1050107 : MT_lock_unset(&b->theaplock);
1846 : }
1847 1049518 : BUN nunique = b->thash ? b->thash->nunique : 0;
1848 1049518 : MT_rwlock_wrunlock(&b->thashlock);
1849 1050693 : MT_lock_set(&b->theaplock);
1850 1047877 : if (nunique != 0)
1851 21248 : b->tunique_est = (double) nunique;
1852 1047877 : b->tminpos = bi.minpos;
1853 1047877 : b->tmaxpos = bi.maxpos;
1854 1047877 : b->theap->dirty = true;
1855 1047877 : if (b->tvheap)
1856 138910 : b->tvheap->dirty = true;
1857 1047877 : MT_lock_unset(&b->theaplock);
1858 :
1859 1048565 : return GDK_SUCCEED;
1860 :
1861 0 : bailout:
1862 0 : if (minbound)
1863 0 : VALclear(&minprop);
1864 0 : if (maxbound)
1865 0 : VALclear(&maxprop);
1866 : return GDK_FAIL;
1867 : }
1868 :
1869 : /* Replace multiple values given by their positions with the given values. */
1870 : gdk_return
1871 564803 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
1872 : {
1873 564803 : BATcheck(b, GDK_FAIL);
1874 :
1875 564803 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1876 : return GDK_FAIL;
1877 :
1878 564803 : return BUNinplacemulti(b, positions, values, count, force, false);
1879 : }
1880 :
1881 : /* Replace multiple values starting from a given position with the given
1882 : * values. */
1883 : gdk_return
1884 479246 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
1885 : {
1886 479246 : BATcheck(b, GDK_FAIL);
1887 :
1888 479246 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1889 : return GDK_FAIL;
1890 :
1891 479246 : return BUNinplacemulti(b, &position, values, count, force, true);
1892 : }
1893 :
1894 : gdk_return
1895 564803 : BUNreplace(BAT *b, oid id, const void *t, bool force)
1896 : {
1897 564803 : return BUNreplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &t : t, 1, force);
1898 : }
1899 :
1900 : /* very much like BUNreplace, but this doesn't make any changes if the
1901 : * tail column is void */
1902 : gdk_return
1903 3543 : void_inplace(BAT *b, oid id, const void *val, bool force)
1904 : {
1905 3543 : assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
1906 3543 : if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
1907 : GDKerror("id out of range\n");
1908 : return GDK_FAIL;
1909 : }
1910 3543 : if (b->ttype == TYPE_void)
1911 : return GDK_SUCCEED;
1912 3543 : return BUNinplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &val : (const void *) val, 1, force, false);
1913 : }
1914 :
1915 : /*
1916 : * @- BUN Lookup
1917 : * Location of a BUN using a value should use the available indexes to
1918 : * speed up access. If indexes are lacking then a hash index is
1919 : * constructed under the assumption that 1) multiple access to the BAT
1920 : * can be expected and 2) building the hash is only slightly more
1921 : * expensive than the full linear scan. BUN_NONE is returned if no
1922 : * such element could be found. In those cases where the type is
1923 : * known and a hash index is available, one should use the inline
1924 : * functions to speed-up processing.
1925 : */
1926 : static BUN
1927 0 : slowfnd(BAT *b, const void *v)
1928 : {
1929 0 : BATiter bi = bat_iterator(b);
1930 0 : BUN p, q;
1931 0 : int (*cmp)(const void *, const void *) = ATOMcompare(bi.type);
1932 :
1933 0 : BATloop(b, p, q) {
1934 0 : if ((*cmp)(v, BUNtail(bi, p)) == 0) {
1935 0 : bat_iterator_end(&bi);
1936 0 : return p;
1937 : }
1938 : }
1939 0 : bat_iterator_end(&bi);
1940 0 : return BUN_NONE;
1941 : }
1942 :
1943 : static BUN
1944 0 : mskfnd(BAT *b, msk v)
1945 : {
1946 0 : BUN p, q;
1947 :
1948 0 : if (v) {
1949 : /* find a 1 value */
1950 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1951 0 : if (((uint32_t *) b->theap->base)[p] != 0) {
1952 : /* there's at least one 1 bit */
1953 0 : return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
1954 : }
1955 : }
1956 : } else {
1957 : /* find a 0 value */
1958 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1959 0 : if (((uint32_t *) b->theap->base)[p] != ~0U) {
1960 : /* there's at least one 0 bit */
1961 0 : return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
1962 : }
1963 : }
1964 : }
1965 : return BUN_NONE;
1966 : }
1967 :
1968 : BUN
1969 1616083 : BUNfnd(BAT *b, const void *v)
1970 : {
1971 1616083 : BUN r = BUN_NONE;
1972 1616083 : BATiter bi;
1973 :
1974 1616083 : BATcheck(b, BUN_NONE);
1975 1616083 : if (!v || BATcount(b) == 0)
1976 : return r;
1977 1204051 : if (complex_cand(b)) {
1978 0 : struct canditer ci;
1979 0 : canditer_init(&ci, NULL, b);
1980 0 : return canditer_search(&ci, * (const oid *) v, false);
1981 : }
1982 1204051 : if (BATtvoid(b))
1983 675957 : return BUNfndVOID(b, v);
1984 528094 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1985 0 : return mskfnd(b, *(msk*)v);
1986 : }
1987 528094 : if (!BATcheckhash(b)) {
1988 54078 : if (BATordered(b) || BATordered_rev(b))
1989 53744 : return SORTfnd(b, v);
1990 : }
1991 474333 : if (BAThash(b) == GDK_SUCCEED) {
1992 474345 : bi = bat_iterator(b); /* outside of hashlock */
1993 474356 : MT_rwlock_rdlock(&b->thashlock);
1994 474356 : if (b->thash == NULL) {
1995 0 : MT_rwlock_rdunlock(&b->thashlock);
1996 0 : bat_iterator_end(&bi);
1997 0 : goto hashfnd_failed;
1998 : }
1999 948516 : switch (ATOMbasetype(bi.type)) {
2000 5 : case TYPE_bte:
2001 5 : HASHloop_bte(bi, b->thash, r, v)
2002 : break;
2003 : break;
2004 0 : case TYPE_sht:
2005 0 : HASHloop_sht(bi, b->thash, r, v)
2006 : break;
2007 : break;
2008 32 : case TYPE_int:
2009 32 : HASHloop_int(bi, b->thash, r, v)
2010 : break;
2011 : break;
2012 0 : case TYPE_flt:
2013 0 : HASHloop_flt(bi, b->thash, r, v)
2014 : break;
2015 : break;
2016 0 : case TYPE_dbl:
2017 0 : HASHloop_dbl(bi, b->thash, r, v)
2018 : break;
2019 : break;
2020 179 : case TYPE_lng:
2021 179 : HASHloop_lng(bi, b->thash, r, v)
2022 : break;
2023 : break;
2024 : #ifdef HAVE_HGE
2025 0 : case TYPE_hge:
2026 0 : HASHloop_hge(bi, b->thash, r, v)
2027 : break;
2028 : break;
2029 : #endif
2030 0 : case TYPE_uuid:
2031 0 : HASHloop_uuid(bi, b->thash, r, v)
2032 : break;
2033 : break;
2034 474139 : case TYPE_str:
2035 658222 : HASHloop_str(bi, b->thash, r, v)
2036 : break;
2037 : break;
2038 1 : default:
2039 3 : HASHloop(bi, b->thash, r, v)
2040 : break;
2041 : break;
2042 : }
2043 474356 : MT_rwlock_rdunlock(&b->thashlock);
2044 474356 : bat_iterator_end(&bi);
2045 474356 : return r;
2046 : }
2047 0 : hashfnd_failed:
2048 : /* can't build hash table, search the slow way */
2049 0 : GDKclrerr();
2050 0 : return slowfnd(b, v);
2051 : }
2052 :
2053 : /*
2054 : * @+ BAT Property Management
2055 : *
2056 : * The function BATcount returns the number of active elements in a
2057 : * BAT. Counting is type independent. It can be implemented quickly,
2058 : * because the system ensures a dense BUN list.
2059 : */
2060 : void
2061 2981545 : BATsetcapacity(BAT *b, BUN cnt)
2062 : {
2063 2981545 : b->batCapacity = cnt;
2064 2981545 : assert(b->batCount <= cnt);
2065 2981545 : }
2066 :
2067 : /* Set the batCount value for the bat and also set some dependent
2068 : * properties. This function should be called only when it is save from
2069 : * concurrent use (e.g. when theaplock is being held). */
2070 : void
2071 79880322 : BATsetcount(BAT *b, BUN cnt)
2072 : {
2073 : /* head column is always VOID, and some head properties never change */
2074 79880322 : assert(!is_oid_nil(b->hseqbase));
2075 79880322 : assert(cnt <= BUN_MAX);
2076 :
2077 79880322 : b->batCount = cnt;
2078 79880322 : if (b->theap->parentid == b->batCacheid) {
2079 76845450 : b->theap->dirty |= b->ttype != TYPE_void && cnt > 0;
2080 76845450 : b->theap->free = tailsize(b, cnt);
2081 : }
2082 79880322 : if (b->ttype == TYPE_void)
2083 12100845 : b->batCapacity = cnt;
2084 79880322 : if (cnt <= 1) {
2085 17561094 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
2086 17561094 : b->tnosorted = b->tnorevsorted = 0;
2087 : }
2088 : /* if the BAT was made smaller, we need to zap some values */
2089 79880322 : if (b->tnosorted >= BATcount(b))
2090 15788501 : b->tnosorted = 0;
2091 79880322 : if (b->tnorevsorted >= BATcount(b))
2092 15580920 : b->tnorevsorted = 0;
2093 79880322 : if (b->tnokey[0] >= BATcount(b) || b->tnokey[1] >= BATcount(b)) {
2094 15595842 : b->tnokey[0] = 0;
2095 15595842 : b->tnokey[1] = 0;
2096 : }
2097 79880322 : if (b->ttype == TYPE_void) {
2098 12125857 : b->tsorted = true;
2099 12125857 : if (is_oid_nil(b->tseqbase)) {
2100 6234749 : b->tkey = cnt <= 1;
2101 6234749 : b->trevsorted = true;
2102 6234749 : b->tnil = true;
2103 6234749 : b->tnonil = false;
2104 : } else {
2105 5891108 : b->tkey = true;
2106 5891108 : b->trevsorted = cnt <= 1;
2107 5891108 : b->tnil = false;
2108 5891108 : b->tnonil = true;
2109 : }
2110 : }
2111 79880322 : assert(b->batCapacity >= cnt);
2112 79880322 : }
2113 :
2114 : /*
2115 : * The key and name properties can be changed at any time. Keyed
2116 : * dimensions are automatically supported by an auxiliary hash-based
2117 : * access structure to speed up searching. Turning off the key
2118 : * integrity property does not cause the index to disappear. It can
2119 : * still be used to speed-up retrieval. The routine BATkey sets the
2120 : * key property of the association head.
2121 : */
2122 : gdk_return
2123 53768 : BATkey(BAT *b, bool flag)
2124 : {
2125 53768 : BATcheck(b, GDK_FAIL);
2126 53768 : if (b->ttype == TYPE_void) {
2127 1134 : if (BATtdense(b) && !flag) {
2128 0 : GDKerror("dense column must be unique.\n");
2129 0 : return GDK_FAIL;
2130 : }
2131 1134 : if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
2132 0 : GDKerror("void column cannot be unique.\n");
2133 0 : return GDK_FAIL;
2134 : }
2135 : }
2136 53768 : b->tkey = flag;
2137 53768 : if (!flag) {
2138 38815 : b->tseqbase = oid_nil;
2139 : } else
2140 14953 : b->tnokey[0] = b->tnokey[1] = 0;
2141 53768 : gdk_return rc = GDK_SUCCEED;
2142 53768 : if (flag && VIEWtparent(b)) {
2143 : /* if a view is key, then so is the parent if the two
2144 : * are aligned */
2145 2 : BAT *bp = BATdescriptor(VIEWtparent(b));
2146 2 : if (bp != NULL) {
2147 2 : MT_lock_set(&bp->theaplock);
2148 4 : if (BATcount(b) == BATcount(bp) &&
2149 2 : ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
2150 2 : !BATtkey(bp) &&
2151 0 : ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
2152 : BATcount(b) == 0))
2153 0 : rc = BATkey(bp, true);
2154 2 : MT_lock_unset(&bp->theaplock);
2155 2 : BBPunfix(bp->batCacheid);
2156 : }
2157 : }
2158 : return rc;
2159 : }
2160 :
2161 : void
2162 2913580 : BAThseqbase(BAT *b, oid o)
2163 : {
2164 2913580 : if (b != NULL) {
2165 2913580 : assert(o <= GDK_oid_max); /* i.e., not oid_nil */
2166 2913580 : assert(o + BATcount(b) <= GDK_oid_max);
2167 2913580 : b->hseqbase = o;
2168 : }
2169 2913580 : }
2170 :
2171 : void
2172 7220405 : BATtseqbase(BAT *b, oid o)
2173 : {
2174 7220405 : assert(o <= oid_nil);
2175 7220405 : if (b == NULL)
2176 : return;
2177 7220405 : assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
2178 7220405 : if (ATOMtype(b->ttype) == TYPE_oid) {
2179 6117856 : b->tseqbase = o;
2180 :
2181 : /* adapt keyness */
2182 6117856 : if (BATtvoid(b)) {
2183 6091109 : b->tsorted = true;
2184 6091109 : if (is_oid_nil(o)) {
2185 112 : b->tkey = b->batCount <= 1;
2186 112 : b->tnonil = b->batCount == 0;
2187 112 : b->tnil = b->batCount > 0;
2188 112 : b->trevsorted = true;
2189 112 : b->tnosorted = b->tnorevsorted = 0;
2190 112 : if (!b->tkey) {
2191 0 : b->tnokey[0] = 0;
2192 0 : b->tnokey[1] = 1;
2193 : } else {
2194 112 : b->tnokey[0] = b->tnokey[1] = 0;
2195 : }
2196 : } else {
2197 6090997 : if (!b->tkey) {
2198 19370 : b->tkey = true;
2199 19370 : b->tnokey[0] = b->tnokey[1] = 0;
2200 : }
2201 6090997 : b->tnonil = true;
2202 6090997 : b->tnil = false;
2203 6090997 : b->trevsorted = b->batCount <= 1;
2204 6090997 : if (!b->trevsorted)
2205 19676 : b->tnorevsorted = 1;
2206 : }
2207 : }
2208 : } else {
2209 1102549 : assert(o == oid_nil);
2210 1102549 : b->tseqbase = oid_nil;
2211 : }
2212 : }
2213 :
2214 : /*
2215 : * @- Change the BAT access permissions (read, append, write)
2216 : * Regrettably, BAT access-permissions, persistent status and memory
2217 : * map modes, interact in ways that makes one's brain sizzle. This
2218 : * makes BATsetaccess and TMcommit (where a change in BAT persistence
2219 : * mode is made permanent) points in which the memory map status of
2220 : * bats needs to be carefully re-assessed and ensured.
2221 : *
2222 : * Another complication is the fact that during commit, concurrent
2223 : * users may access the heaps, such that the simple solution
2224 : * unmap;re-map is out of the question.
2225 : * Even worse, it is not possible to even rename an open mmap file in
2226 : * Windows. For this purpose, we dropped the old .priv scheme, which
2227 : * relied on file moves. Now, the file that is opened with mmap is
2228 : * always the X file, in case of newstorage=STORE_PRIV, we save in a
2229 : * new file X.new
2230 : *
2231 : * we must consider the following dimensions:
2232 : *
2233 : * persistence:
2234 : * not simply the current persistence mode but whether the bat *was*
2235 : * present at the last commit point (BBP status & BBPEXISTING).
2236 : * The crucial issue is namely whether we must guarantee recovery
2237 : * to a previous sane state.
2238 : *
2239 : * access:
2240 : * whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
2241 : * is usually the same as BAT_READ (as our concern are only data pages
2242 : * that already existed at the last commit).
2243 : *
2244 : * storage:
2245 : * the current way the heap file X is memory-mapped;
2246 : * STORE_MMAP uses direct mapping (so dirty pages may be flushed
2247 : * at any time to disk), STORE_PRIV uses copy-on-write.
2248 : *
2249 : * newstorage:
2250 : * the current save-regime. STORE_MMAP calls msync() on the heap X,
2251 : * whereas STORE_PRIV writes the *entire* heap in a file: X.new
2252 : * If a BAT is loaded from disk, the field newstorage is used
2253 : * to set storage as well (so before change-access and commit-
2254 : * persistence mayhem, we always have newstorage=storage).
2255 : *
2256 : * change-access:
2257 : * what happens if the bat-access mode is changed from
2258 : * BAT_READ into BAT_WRITE (or vice versa).
2259 : *
2260 : * commit-persistence:
2261 : * what happens during commit if the bat-persistence mode was
2262 : * changed (from TRANSIENT into PERSISTENT, or vice versa).
2263 : *
2264 : * this is the scheme:
2265 : *
2266 : * persistence access newstorage storage change-access commit-persistence
2267 : * =========== ========= ========== ========== ============= ==================
2268 : * 0 transient BAT_READ STORE_MMAP STORE_MMAP =>2 =>4
2269 : * 1 transient BAT_READ STORE_PRIV STORE_PRIV =>3 =>5
2270 : * 2 transient BAT_WRITE STORE_MMAP STORE_MMAP =>0 =>6+
2271 : * 3 transient BAT_WRITE STORE_PRIV STORE_PRIV =>1 =>7
2272 : * 4 persistent BAT_READ STORE_MMAP STORE_MMAP =>6+ =>0
2273 : * 5 persistent BAT_READ STORE_PRIV STORE_PRIV =>7 =>1
2274 : * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
2275 : * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5 =>3
2276 : *
2277 : * exception states:
2278 : * a transient BAT_READ STORE_PRIV STORE_MMAP =>b =>c
2279 : * b transient BAT_WRITE STORE_PRIV STORE_MMAP =>a =>6
2280 : * c persistent BAT_READ STORE_PRIV STORE_MMAP =>6 =>a
2281 : *
2282 : * (+) indicates that we must ensure that the heap gets saved in its new mode
2283 : *
2284 : * Note that we now allow a heap with save-regime STORE_PRIV that was
2285 : * actually mapped STORE_MMAP. In effect, the potential corruption of
2286 : * the X file is compensated by writing out full X.new files that take
2287 : * precedence. When transitioning out of this state towards one with
2288 : * both storage regime and OS as STORE_MMAP we need to move the X.new
2289 : * files into the backup directory. Then msync the X file and (on
2290 : * success) remove the X.new; see backup_new().
2291 : *
2292 : * Exception states are only reachable if the commit fails and those
2293 : * new persistent bats have already been processed (but never become
2294 : * part of a committed state). In that case a transition 2=>6 may end
2295 : * up 2=>b. Exception states a and c are reachable from b.
2296 : *
2297 : * Errors in HEAPchangeaccess() can be handled atomically inside the
2298 : * routine. The work on changing mmap modes HEAPcommitpersistence()
2299 : * is done during the BBPsync() for all bats that are newly persistent
2300 : * (BBPNEW). After the TMcommit(), it is done for those bats that are
2301 : * no longer persistent after the commit (BBPDELETED), only if it
2302 : * succeeds. Such transient bats cannot be processed before the
2303 : * commit, because the commit may fail and then the more unsafe
2304 : * transient mmap modes would be present on a persistent bat.
2305 : *
2306 : * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
2307 : * gdk_tm.c.
2308 : *
2309 : * Including the exception states, we have 11 of the 16
2310 : * combinations. As for the 5 avoided states, all four
2311 : * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
2312 : * omitted (this would amount to an msync() save regime on a
2313 : * copy-on-write heap -- which does not work). The remaining avoided
2314 : * state is the patently unsafe
2315 : * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
2316 : *
2317 : * Note that after a server restart exception states are gone, as on
2318 : * BAT loads the saved descriptor is inspected again (which will
2319 : * reproduce the state at the last succeeded commit).
2320 : *
2321 : * To avoid exception states, a TMsubcommit protocol would need to be
2322 : * used which is too heavy for BATsetaccess().
2323 : *
2324 : * Note that this code is not about making heaps mmap-ed in the first
2325 : * place. It is just about determining which flavor of mmap should be
2326 : * used. The MAL user is oblivious of such details.
2327 : */
2328 :
2329 : /* rather than deleting X.new, we comply with the commit protocol and
2330 : * move it to backup storage */
2331 : static gdk_return
2332 0 : backup_new(Heap *hp, bool lock)
2333 : {
2334 0 : int batret, bakret, ret = -1;
2335 0 : char *batpath, *bakpath;
2336 0 : struct stat st;
2337 :
2338 0 : char *bak_filename = NULL;
2339 0 : if ((bak_filename = strrchr(hp->filename, DIR_SEP)) != NULL)
2340 0 : bak_filename++;
2341 : else
2342 : bak_filename = hp->filename;
2343 : /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
2344 0 : batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, "new");
2345 0 : bakpath = GDKfilepath(hp->farmid, BAKDIR, bak_filename, "new");
2346 0 : if (batpath != NULL && bakpath != NULL) {
2347 : /* file actions here interact with the global commits */
2348 0 : if (lock)
2349 0 : BBPtmlock();
2350 :
2351 0 : batret = MT_stat(batpath, &st);
2352 0 : bakret = MT_stat(bakpath, &st);
2353 :
2354 0 : if (batret == 0 && bakret) {
2355 : /* no backup yet, so move the existing X.new there out
2356 : * of the way */
2357 0 : if ((ret = MT_rename(batpath, bakpath)) < 0)
2358 0 : GDKsyserror("backup_new: rename %s to %s failed\n",
2359 : batpath, bakpath);
2360 0 : TRC_DEBUG(IO_, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
2361 0 : } else if (batret == 0) {
2362 : /* there is a backup already; just remove the X.new */
2363 0 : if ((ret = MT_remove(batpath)) != 0)
2364 0 : GDKsyserror("backup_new: remove %s failed\n", batpath);
2365 0 : TRC_DEBUG(IO_, "remove(%s) = %d\n", batpath, ret);
2366 : } else {
2367 : ret = 0;
2368 : }
2369 0 : if (lock)
2370 0 : BBPtmunlock();
2371 : }
2372 0 : GDKfree(batpath);
2373 0 : GDKfree(bakpath);
2374 0 : return ret ? GDK_FAIL : GDK_SUCCEED;
2375 : }
2376 :
2377 : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
2378 :
2379 : /* transition heap from readonly to writable */
2380 : static storage_t
2381 7215232 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
2382 : {
2383 7215232 : if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
2384 7215232 : return hp->newstorage; /* 0<=>2,1<=>3,a<=>b */
2385 :
2386 0 : if (dstmode == BAT_WRITE) {
2387 0 : if (hp->storage != STORE_PRIV)
2388 0 : hp->dirty = true; /* exception c does not make it dirty */
2389 0 : return STORE_PRIV; /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
2390 : }
2391 0 : if (hp->storage == STORE_MMAP) { /* 6=>4 */
2392 0 : hp->dirty = true;
2393 0 : return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2394 : }
2395 : return hp->storage; /* 7=>5 */
2396 : }
2397 :
2398 : /* heap changes persistence mode (at commit point) */
2399 : static storage_t
2400 154573 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
2401 : {
2402 154573 : if (existing) { /* existing, ie will become transient */
2403 37265 : if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) { /* 6=>2 */
2404 0 : hp->dirty = true;
2405 0 : return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2406 : }
2407 37265 : return hp->newstorage; /* 4=>0,5=>1,7=>3,c=>a no change */
2408 : }
2409 : /* !existing, ie will become persistent */
2410 117308 : if (hp->newstorage == STORE_MEM)
2411 : return hp->newstorage;
2412 772 : if (hp->newstorage == STORE_MMAP && !writable)
2413 : return STORE_MMAP; /* 0=>4 STORE_MMAP */
2414 :
2415 0 : if (hp->newstorage == STORE_MMAP)
2416 0 : hp->dirty = true; /* 2=>6 */
2417 : return STORE_PRIV; /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
2418 : }
2419 :
2420 :
2421 : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
2422 :
2423 : /* change the heap modes at a commit */
2424 : gdk_return
2425 131338 : BATcheckmodes(BAT *b, bool existing)
2426 : {
2427 131338 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2428 131338 : bool dirty = false, wr;
2429 :
2430 131338 : BATcheck(b, GDK_FAIL);
2431 :
2432 131338 : wr = (b->batRestricted == BAT_WRITE);
2433 131338 : if (b->ttype) {
2434 131338 : m1 = HEAPcommitpersistence(b->theap, wr, existing);
2435 131338 : dirty |= (b->theap->newstorage != m1);
2436 : }
2437 :
2438 131338 : if (b->tvheap) {
2439 23235 : bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
2440 23235 : m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
2441 23235 : dirty |= (b->tvheap->newstorage != m3);
2442 : }
2443 131338 : if (m1 == STORE_INVALID || m3 == STORE_INVALID)
2444 : return GDK_FAIL;
2445 :
2446 131338 : if (dirty) {
2447 0 : b->theap->newstorage = m1;
2448 0 : if (b->tvheap)
2449 0 : b->tvheap->newstorage = m3;
2450 : }
2451 : return GDK_SUCCEED;
2452 : }
2453 :
2454 : BAT *
2455 9019386 : BATsetaccess(BAT *b, restrict_t newmode)
2456 : {
2457 9019386 : restrict_t bakmode;
2458 :
2459 9019386 : BATcheck(b, NULL);
2460 9019386 : if (newmode != BAT_READ &&
2461 19015 : (isVIEW(b) || (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
2462 0 : BAT *bn = COLcopy(b, b->ttype, true, b->batRole);
2463 0 : BBPunfix(b->batCacheid);
2464 0 : if (bn == NULL)
2465 : return NULL;
2466 : b = bn;
2467 : }
2468 9019386 : MT_lock_set(&b->theaplock);
2469 9052593 : bakmode = b->batRestricted;
2470 9052593 : if (bakmode != newmode) {
2471 6335810 : bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
2472 6335810 : bool wr = (newmode == BAT_WRITE);
2473 6335810 : bool rd = (bakmode == BAT_WRITE);
2474 6335810 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2475 6335810 : storage_t b1 = STORE_MEM, b3 = STORE_MEM;
2476 :
2477 6335810 : if (b->theap->parentid == b->batCacheid) {
2478 6320817 : b1 = b->theap->newstorage;
2479 6331060 : m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
2480 : }
2481 6320778 : if (b->tvheap && b->tvheap->parentid == b->batCacheid) {
2482 916973 : bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
2483 916973 : b3 = b->tvheap->newstorage;
2484 1835834 : m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
2485 : }
2486 6325049 : if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
2487 2403825 : MT_lock_unset(&b->theaplock);
2488 2407872 : BBPunfix(b->batCacheid);
2489 2407872 : return NULL;
2490 : }
2491 :
2492 : /* set new access mode and mmap modes */
2493 3921224 : b->batRestricted = newmode;
2494 3921224 : if (b->theap->parentid == b->batCacheid)
2495 3918568 : b->theap->newstorage = m1;
2496 3921224 : if (b->tvheap && b->tvheap->parentid == b->batCacheid)
2497 903388 : b->tvheap->newstorage = m3;
2498 :
2499 3921224 : MT_lock_unset(&b->theaplock);
2500 3931705 : if (existing && !isVIEW(b) && BBPsave(b) != GDK_SUCCEED) {
2501 : /* roll back all changes */
2502 0 : MT_lock_set(&b->theaplock);
2503 0 : b->batRestricted = bakmode;
2504 0 : b->theap->newstorage = b1;
2505 0 : if (b->tvheap)
2506 0 : b->tvheap->newstorage = b3;
2507 0 : MT_lock_unset(&b->theaplock);
2508 0 : BBPunfix(b->batCacheid);
2509 0 : return NULL;
2510 : }
2511 : } else {
2512 2716783 : MT_lock_unset(&b->theaplock);
2513 : }
2514 : return b;
2515 : }
2516 :
2517 : restrict_t
2518 0 : BATgetaccess(BAT *b)
2519 : {
2520 0 : BATcheck(b, BAT_WRITE);
2521 0 : MT_lock_set(&b->theaplock);
2522 0 : restrict_t restricted = b->batRestricted;
2523 0 : MT_lock_unset(&b->theaplock);
2524 0 : return restricted;
2525 : }
2526 :
2527 : /*
2528 : * @- change BAT persistency (persistent,session,transient)
2529 : * In the past, we prevented BATS with certain types from being saved at all:
2530 : * - BATs of BATs, as having recursive bats creates cascading
2531 : * complexities in commits/aborts.
2532 : * - any atom with refcounts, as the BBP has no overview of such
2533 : * user-defined refcounts.
2534 : * - pointer types, as the values they point to are bound to be transient.
2535 : *
2536 : * However, nowadays we do allow such saves, as the BBP swapping
2537 : * mechanism was altered to be able to save transient bats temporarily
2538 : * to disk in order to make room. Thus, we must be able to save any
2539 : * transient BAT to disk.
2540 : *
2541 : * What we don't allow is to make such bats persistent.
2542 : *
2543 : * Although the persistent state does influence the allowed mmap
2544 : * modes, this only goes for the *real* committed persistent
2545 : * state. Making the bat persistent with BATmode does not matter for
2546 : * the heap modes until the commit point is reached. So we do not need
2547 : * to do anything with heap modes yet at this point.
2548 : */
2549 : gdk_return
2550 428049 : BATmode(BAT *b, bool transient)
2551 : {
2552 428049 : BATcheck(b, GDK_FAIL);
2553 :
2554 : /* can only make a bat PERSISTENT if its role is already
2555 : * PERSISTENT */
2556 428049 : assert(transient || b->batRole == PERSISTENT);
2557 : /* cannot make a view PERSISTENT */
2558 244526 : assert(transient || !isVIEW(b));
2559 :
2560 428049 : if (b->batRole == TRANSIENT && !transient) {
2561 0 : GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
2562 0 : return GDK_FAIL;
2563 : }
2564 :
2565 428049 : BATiter bi = bat_iterator(b);
2566 428049 : bool mustrelease = false;
2567 428049 : bool mustretain = false;
2568 428049 : bat bid = b->batCacheid;
2569 :
2570 428049 : if (transient != bi.transient) {
2571 428049 : if (!transient) {
2572 244526 : if (ATOMisdescendant(b->ttype, TYPE_ptr)) {
2573 0 : GDKerror("%s type implies that %s[%s] "
2574 : "cannot be made persistent.\n",
2575 : ATOMname(b->ttype), BATgetId(b),
2576 : ATOMname(b->ttype));
2577 0 : bat_iterator_end(&bi);
2578 0 : return GDK_FAIL;
2579 : }
2580 : }
2581 :
2582 : /* we need to delay the calls to BBPretain and
2583 : * BBPrelease until after we have released our reference
2584 : * to the heaps (i.e. until after bat_iterator_end),
2585 : * because in either case, BBPfree can be called (either
2586 : * directly here or in BBPtrim) which waits for the heap
2587 : * reference to come down. BBPretain calls incref which
2588 : * waits until the trim that is waiting for us is done,
2589 : * so that causes deadlock, and BBPrelease can call
2590 : * BBPfree which causes deadlock with a single thread */
2591 183523 : if (!transient) {
2592 : /* persistent BATs get a logical reference */
2593 : mustretain = true;
2594 183523 : } else if (!bi.transient) {
2595 : /* transient BATs loose their logical reference */
2596 183523 : mustrelease = true;
2597 : }
2598 428049 : MT_lock_set(&GDKswapLock(bid));
2599 428049 : if (!transient) {
2600 244526 : if (BBP_status(bid) & BBPDELETED) {
2601 0 : BBP_status_on(bid, BBPEXISTING);
2602 0 : BBP_status_off(bid, BBPDELETED);
2603 : } else
2604 244526 : BBP_status_on(bid, BBPNEW);
2605 183523 : } else if (!bi.transient) {
2606 183523 : if (!(BBP_status(bid) & BBPNEW))
2607 37175 : BBP_status_on(bid, BBPDELETED);
2608 183523 : BBP_status_off(bid, BBPPERSISTENT);
2609 : }
2610 : /* session bats or persistent bats that did not
2611 : * witness a commit yet may have been saved */
2612 428049 : MT_lock_set(&b->theaplock);
2613 428049 : if (b->batCopiedtodisk) {
2614 27643 : if (!transient) {
2615 546 : BBP_status_off(bid, BBPTMP);
2616 : } else {
2617 : /* TMcommit must remove it to
2618 : * guarantee free space */
2619 27097 : BBP_status_on(bid, BBPTMP);
2620 : }
2621 : }
2622 428049 : b->batTransient = transient;
2623 428049 : MT_lock_unset(&b->theaplock);
2624 428049 : MT_lock_unset(&GDKswapLock(bid));
2625 : }
2626 428049 : bat_iterator_end(&bi);
2627 : /* retain/release after bat_iterator_end because of refs to heaps */
2628 428049 : if (mustretain)
2629 244526 : BBPretain(bid);
2630 183523 : else if (mustrelease)
2631 183523 : BBPrelease(bid);
2632 : return GDK_SUCCEED;
2633 : }
2634 :
2635 : /* BATassertProps checks whether properties are set correctly. Under
2636 : * no circumstances will it change any properties. Note that the
2637 : * "nil" property is not actually used anywhere, but it is checked. */
2638 :
2639 : #ifdef NDEBUG
2640 : /* assertions are disabled, turn failing tests into a message */
2641 : #undef assert
2642 : #define assert(test) ((void) ((test) || (TRC_CRITICAL_ENDIF(CHECK_, "Assertion `%s' failed\n", #test), 0)))
2643 : #endif
2644 :
2645 : static void
2646 173116809 : assert_ascii(const char *s)
2647 : {
2648 346233618 : if (!strNil(s)) {
2649 3457321198 : while (*s) {
2650 3330993364 : assert((*s & 0x80) == 0);
2651 3330993364 : s++;
2652 : }
2653 : }
2654 173116809 : }
2655 :
2656 : /* Assert that properties are set correctly.
2657 : *
2658 : * A BAT can have a bunch of properties set. Mostly, the property
2659 : * bits are set if we *know* the property holds, and not set if we
2660 : * don't know whether the property holds (or if we know it doesn't
2661 : * hold). All properties are per column.
2662 : *
2663 : * The properties currently maintained are:
2664 : *
2665 : * seqbase Only valid for TYPE_oid and TYPE_void columns: each
2666 : * value in the column is exactly one more than the
2667 : * previous value, starting at position 0 with the value
2668 : * stored in this property.
2669 : * This implies sorted, key, nonil (which therefore need
2670 : * to be set).
2671 : * nil There is at least one NIL value in the column.
2672 : * nonil There are no NIL values in the column.
2673 : * key All values in the column are distinct.
2674 : * sorted The column is sorted (ascending). If also revsorted,
2675 : * then all values are equal.
2676 : * revsorted The column is reversely sorted (descending). If
2677 : * also sorted, then all values are equal.
2678 : * nosorted BUN position which proofs not sorted (given position
2679 : * and one before are not ordered correctly).
2680 : * norevsorted BUN position which proofs not revsorted (given position
2681 : * and one before are not ordered correctly).
2682 : * nokey Pair of BUN positions that proof not all values are
2683 : * distinct (i.e. values at given locations are equal).
2684 : * ascii Only valid for TYPE_str columns: all strings in the column
2685 : * are ASCII, i.e. the UTF-8 encoding for all characters is a
2686 : * single byte.
2687 : *
2688 : * Note that the functions BATtseqbase and BATkey also set more
2689 : * properties than you might suspect. When setting properties on a
2690 : * newly created and filled BAT, you may want to first make sure the
2691 : * batCount is set correctly (e.g. by calling BATsetcount), then use
2692 : * BATtseqbase and BATkey, and finally set the other properties.
2693 : *
2694 : * For a view, we cannot check all properties, since it is possible with
2695 : * the way the SQL layer works, that a parent BAT gets changed, changing
2696 : * the properties, while there is a view. The view is supposed to look
2697 : * at only the non-changing part of the BAT (through candidate lists),
2698 : * but this means that the properties of the view might not be correct.
2699 : * For this reason, for views, we skip all property checking that looks
2700 : * at the BAT content.
2701 : */
2702 :
2703 : void
2704 20182068 : BATassertProps(BAT *b)
2705 : {
2706 20182068 : unsigned bbpstatus;
2707 20182068 : BUN p, q;
2708 20182068 : int (*cmpf)(const void *, const void *);
2709 20182068 : int cmp;
2710 20182068 : const void *prev = NULL, *valp, *nilp;
2711 20182068 : char filename[sizeof(b->theap->filename)];
2712 20182068 : bool isview1, isview2;
2713 :
2714 : /* do the complete check within a lock */
2715 20182068 : MT_lock_set(&b->theaplock);
2716 :
2717 : /* general BAT sanity */
2718 20216624 : assert(b != NULL);
2719 20216624 : assert(b->batCacheid > 0);
2720 20216624 : assert(b->batCacheid < getBBPsize());
2721 20129503 : assert(b == BBP_desc(b->batCacheid));
2722 20129503 : assert(b->batCount >= b->batInserted);
2723 :
2724 : /* headless */
2725 20129503 : assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
2726 20129503 : assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
2727 :
2728 20129503 : isview1 = b->theap->parentid != b->batCacheid;
2729 20129503 : isview2 = b->tvheap && b->tvheap->parentid != b->batCacheid;
2730 :
2731 20129503 : bbpstatus = BBP_status(b->batCacheid);
2732 : /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
2733 20129503 : assert(((bbpstatus & BBPDELETED) != 0) +
2734 : ((bbpstatus & BBPEXISTING) != 0) +
2735 : ((bbpstatus & BBPNEW) != 0) <= 1);
2736 :
2737 20129503 : assert(b->ttype >= TYPE_void);
2738 20129503 : assert(b->ttype < GDKatomcnt);
2739 20129503 : assert(isview1 ||
2740 : b->ttype == TYPE_void ||
2741 : BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
2742 20129503 : assert(isview2 ||
2743 : b->tvheap == NULL ||
2744 : (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
2745 :
2746 20129503 : cmpf = ATOMcompare(b->ttype);
2747 20129503 : nilp = ATOMnilptr(b->ttype);
2748 :
2749 20129503 : assert(isview1 || b->theap->free >= tailsize(b, BATcount(b)));
2750 20129503 : if (b->ttype != TYPE_void) {
2751 15133197 : assert(b->batCount <= b->batCapacity);
2752 15133197 : assert(isview1 || b->theap->size >= b->theap->free);
2753 15133197 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2754 : /* 32 values per 4-byte word (that's not the
2755 : * same as 8 values per byte...) */
2756 3913 : assert(isview1 || b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
2757 : } else
2758 15129284 : assert(isview1 || b->theap->size >> b->tshift >= b->batCapacity);
2759 : }
2760 20129503 : if (!isview1) {
2761 15333570 : strconcat_len(filename, sizeof(filename),
2762 15333570 : BBP_physical(b->theap->parentid),
2763 2689238 : b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
2764 : #if SIZEOF_VAR_T == 8
2765 : b->twidth == 4 ? ".tail4" :
2766 : #endif
2767 : ".tail" : ".tail",
2768 : NULL);
2769 15396206 : assert(strcmp(b->theap->filename, filename) == 0);
2770 : }
2771 20192139 : if (!isview2 && b->tvheap) {
2772 2532314 : strconcat_len(filename, sizeof(filename),
2773 2532314 : BBP_physical(b->tvheap->parentid),
2774 : ".theap",
2775 : NULL);
2776 2532067 : assert(strcmp(b->tvheap->filename, filename) == 0);
2777 : }
2778 :
2779 : /* void, str and blob imply varsized */
2780 20191892 : if (ATOMstorage(b->ttype) == TYPE_str ||
2781 : ATOMstorage(b->ttype) == TYPE_blob)
2782 3481962 : assert(b->tvheap != NULL);
2783 : /* other "known" types are not varsized */
2784 20191892 : if (ATOMstorage(b->ttype) > TYPE_void &&
2785 : ATOMstorage(b->ttype) < TYPE_str)
2786 11680325 : assert(b->tvheap == NULL);
2787 : /* shift and width have a particular relationship */
2788 20191892 : if (ATOMstorage(b->ttype) == TYPE_str)
2789 3463265 : assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
2790 : else
2791 16728627 : assert(b->twidth == ATOMsize(b->ttype));
2792 20191892 : assert(b->tseqbase <= oid_nil);
2793 : /* only oid/void columns can be dense */
2794 20191892 : assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
2795 : /* a column cannot both have and not have NILs */
2796 20191892 : assert(!b->tnil || !b->tnonil);
2797 : /* only string columns can be ASCII */
2798 20191892 : assert(!b->tascii || ATOMstorage(b->ttype) == TYPE_str);
2799 20191892 : if (b->ttype == TYPE_void) {
2800 5132557 : assert(b->tshift == 0);
2801 5132557 : assert(b->twidth == 0);
2802 5132557 : assert(b->tsorted);
2803 5132557 : if (is_oid_nil(b->tseqbase)) {
2804 266 : assert(b->tvheap == NULL);
2805 266 : assert(BATcount(b) == 0 || !b->tnonil);
2806 266 : assert(BATcount(b) <= 1 || !b->tkey);
2807 266 : assert(b->trevsorted);
2808 : } else {
2809 5132291 : if (b->tvheap != NULL) {
2810 : /* candidate list with exceptions */
2811 37383 : assert(b->batRole == TRANSIENT || b->batRole == SYSTRANS);
2812 37383 : assert(b->tvheap->free <= b->tvheap->size);
2813 37383 : assert(b->tvheap->free >= sizeof(ccand_t));
2814 37383 : assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
2815 37383 : if (negoid_cand(b) && ccand_free(b) > 0) {
2816 208 : const oid *oids = (const oid *) ccand_first(b);
2817 208 : q = ccand_free(b) / SIZEOF_OID;
2818 208 : assert(oids != NULL);
2819 208 : assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
2820 : /* exceptions within range */
2821 208 : assert(oids[0] >= b->tseqbase);
2822 208 : assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
2823 : /* exceptions sorted */
2824 188740 : for (p = 1; p < q; p++)
2825 188532 : assert(oids[p - 1] < oids[p]);
2826 : }
2827 : }
2828 5132291 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2829 5132291 : assert(BATcount(b) == 0 || !b->tnil);
2830 5132291 : assert(BATcount(b) <= 1 || !b->trevsorted);
2831 5132291 : assert(b->tkey);
2832 5132291 : assert(b->tnonil);
2833 : }
2834 5132557 : MT_lock_unset(&b->theaplock);
2835 12925005 : return;
2836 : }
2837 :
2838 15059335 : BATiter bi = bat_iterator_nolock(b);
2839 :
2840 15059335 : if (BATtdense(b)) {
2841 249035 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2842 249035 : assert(b->ttype == TYPE_oid);
2843 249035 : assert(b->tsorted);
2844 249035 : assert(b->tkey);
2845 249035 : assert(b->tnonil);
2846 249035 : if ((q = b->batCount) != 0) {
2847 181761 : const oid *o = (const oid *) Tloc(b, 0);
2848 181761 : assert(*o == b->tseqbase);
2849 32397121 : for (p = 1; p < q; p++)
2850 32215360 : assert(o[p - 1] + 1 == o[p]);
2851 : }
2852 249035 : MT_lock_unset(&b->theaplock);
2853 249209 : return;
2854 : }
2855 14810300 : assert(1 << b->tshift == b->twidth);
2856 : /* only linear atoms can be sorted */
2857 14810300 : assert(!b->tsorted || ATOMlinear(b->ttype));
2858 14810300 : assert(!b->trevsorted || ATOMlinear(b->ttype));
2859 14810300 : if (ATOMlinear(b->ttype)) {
2860 14784364 : assert(b->tnosorted == 0 ||
2861 : (b->tnosorted > 0 &&
2862 : b->tnosorted < b->batCount));
2863 14784364 : assert(!b->tsorted || b->tnosorted == 0);
2864 14784364 : if (!isview1 &&
2865 14784364 : !isview2 &&
2866 1786746 : !b->tsorted &&
2867 124596 : b->tnosorted > 0 &&
2868 124596 : b->tnosorted < b->batCount)
2869 124868 : assert(cmpf(BUNtail(bi, b->tnosorted - 1),
2870 : BUNtail(bi, b->tnosorted)) > 0);
2871 14783578 : assert(b->tnorevsorted == 0 ||
2872 : (b->tnorevsorted > 0 &&
2873 : b->tnorevsorted < b->batCount));
2874 14783578 : assert(!b->trevsorted || b->tnorevsorted == 0);
2875 14783578 : if (!isview1 &&
2876 9720772 : !isview2 &&
2877 2434231 : !b->trevsorted &&
2878 560472 : b->tnorevsorted > 0 &&
2879 560472 : b->tnorevsorted < b->batCount)
2880 559839 : assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
2881 : BUNtail(bi, b->tnorevsorted)) < 0);
2882 : }
2883 : /* if tkey property set, both tnokey values must be 0 */
2884 14808030 : assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
2885 14808030 : if (!isview1 &&
2886 14808030 : !isview2 &&
2887 2218439 : !b->tkey &&
2888 2218439 : (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
2889 : /* if tkey not set and tnokey indicates a proof of
2890 : * non-key-ness, make sure the tnokey values are in
2891 : * range and indeed provide a proof */
2892 555080 : assert(b->tnokey[0] != b->tnokey[1]);
2893 555080 : assert(b->tnokey[0] < b->batCount);
2894 555080 : assert(b->tnokey[1] < b->batCount);
2895 555080 : assert(cmpf(BUNtail(bi, b->tnokey[0]),
2896 : BUNtail(bi, b->tnokey[1])) == 0);
2897 : }
2898 : /* var heaps must have sane sizes */
2899 14806621 : assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
2900 :
2901 14806621 : if (!b->tkey && !b->tsorted && !b->trevsorted &&
2902 5068487 : !b->tnonil && !b->tnil) {
2903 : /* nothing more to prove */
2904 2387041 : MT_lock_unset(&b->theaplock);
2905 2384997 : return;
2906 : }
2907 :
2908 : /* only do a scan if the bat is not a view */
2909 12419580 : if (!isview1 && !isview2) {
2910 9277469 : const ValRecord *prop;
2911 9277469 : const void *maxval = NULL;
2912 9277469 : const void *minval = NULL;
2913 9277469 : const void *maxbound = NULL;
2914 9277469 : const void *minbound = NULL;
2915 9277469 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
2916 9264692 : bool seenmax = false, seenmin = false;
2917 9264692 : bool seennil = false;
2918 :
2919 9264692 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL)
2920 12 : maxbound = VALptr(prop);
2921 9308707 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL)
2922 2221 : minbound = VALptr(prop);
2923 9313761 : if (b->tmaxpos != BUN_NONE) {
2924 1099423 : assert(b->tmaxpos < BATcount(b));
2925 1099423 : maxval = BUNtail(bi, b->tmaxpos);
2926 1093395 : assert(cmpf(maxval, nilp) != 0);
2927 : }
2928 9306715 : if (b->tminpos != BUN_NONE) {
2929 1040783 : assert(b->tminpos < BATcount(b));
2930 1040783 : minval = BUNtail(bi, b->tminpos);
2931 1043196 : assert(cmpf(minval, nilp) != 0);
2932 : }
2933 9309124 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2934 : /* for now, don't do extra checks for bit mask */
2935 : ;
2936 9305271 : } else if (b->tsorted || b->trevsorted || !b->tkey) {
2937 : /* if sorted (either way), or we don't have to
2938 : * prove uniqueness, we can do a simple
2939 : * scan */
2940 : /* only call compare function if we have to */
2941 9286894 : bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
2942 :
2943 2129065719 : BATloop(b, p, q) {
2944 2119983869 : valp = BUNtail(bi, p);
2945 2119893693 : bool isnil = cmpf(valp, nilp) == 0;
2946 2061382019 : assert(!isnil || !notnull);
2947 2061382019 : assert(!b->tnonil || !isnil);
2948 2061382019 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
2949 2061382019 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
2950 2061382019 : if (b->tascii)
2951 170690554 : assert_ascii(valp);
2952 2061498804 : if (minbound && !isnil) {
2953 30 : cmp = cmpf(minbound, valp);
2954 30 : assert(cmp <= 0);
2955 : }
2956 2061498804 : if (maxbound && !isnil) {
2957 30 : cmp = cmpf(maxbound, valp);
2958 30 : assert(cmp > 0);
2959 : }
2960 2061498804 : if (maxval && !isnil) {
2961 74814187 : cmp = cmpf(maxval, valp);
2962 74818315 : assert(cmp >= 0);
2963 74818315 : seenmax |= cmp == 0;
2964 : }
2965 2061502932 : if (minval && !isnil) {
2966 48417126 : cmp = cmpf(minval, valp);
2967 48414026 : assert(cmp <= 0);
2968 48414026 : seenmin |= cmp == 0;
2969 : }
2970 2061499832 : if (prev && cmpprv) {
2971 1028570565 : cmp = cmpf(prev, valp);
2972 1087018114 : assert(!b->tsorted || cmp <= 0);
2973 1087018114 : assert(!b->trevsorted || cmp >= 0);
2974 1087018114 : assert(!b->tkey || cmp != 0);
2975 : }
2976 2119947381 : seennil |= isnil;
2977 2119947381 : if (seennil && !cmpprv &&
2978 4623354 : maxval == NULL && minval == NULL &&
2979 168515 : minbound == NULL && maxbound == NULL) {
2980 : /* we've done all the checking
2981 : * we can do */
2982 : break;
2983 : }
2984 2119778825 : prev = valp;
2985 : }
2986 : } else { /* b->tkey && !b->tsorted && !b->trevsorted */
2987 : /* we need to check for uniqueness the hard
2988 : * way (i.e. using a hash table) */
2989 18377 : const char *nme = BBP_physical(b->batCacheid);
2990 18377 : Hash *hs = NULL;
2991 18377 : BUN mask;
2992 :
2993 18377 : if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
2994 0 : TRC_WARNING(BAT_, "Cannot allocate hash table\n");
2995 0 : goto abort_check;
2996 : }
2997 18383 : if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heaplink.filename) ||
2998 18376 : snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heapbckt.filename)) {
2999 : /* cannot happen, see comment in gdk.h
3000 : * about sizes near definition of
3001 : * BBPINIT */
3002 0 : GDKfree(hs);
3003 0 : TRC_CRITICAL(BAT_, "Heap filename is too large\n");
3004 0 : goto abort_check;
3005 : }
3006 18380 : if (ATOMsize(b->ttype) == 1)
3007 : mask = (BUN) 1 << 8;
3008 18374 : else if (ATOMsize(b->ttype) == 2)
3009 : mask = (BUN) 1 << 16;
3010 : else
3011 18374 : mask = HASHmask(b->batCount);
3012 18380 : hs->heapbckt.parentid = b->batCacheid;
3013 18380 : hs->heaplink.parentid = b->batCacheid;
3014 18380 : if ((hs->heaplink.farmid = BBPselectfarm(
3015 18379 : TRANSIENT, b->ttype, hashheap)) < 0 ||
3016 18379 : (hs->heapbckt.farmid = BBPselectfarm(
3017 36752 : TRANSIENT, b->ttype, hashheap)) < 0 ||
3018 18379 : HASHnew(hs, b->ttype, BATcount(b),
3019 : mask, BUN_NONE, false) != GDK_SUCCEED) {
3020 0 : GDKfree(hs);
3021 0 : TRC_WARNING(BAT_, "Cannot allocate hash table\n");
3022 0 : goto abort_check;
3023 : }
3024 35146814 : BATloop(b, p, q) {
3025 35128434 : BUN hb;
3026 35128434 : BUN prb;
3027 35128434 : valp = BUNtail(bi, p);
3028 35128434 : bool isnil = cmpf(valp, nilp) == 0;
3029 35120409 : assert(!isnil || !notnull);
3030 35120409 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
3031 35120409 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
3032 35120409 : if (b->tascii)
3033 10156 : assert_ascii(valp);
3034 35120409 : if (minbound && !isnil) {
3035 0 : cmp = cmpf(minbound, valp);
3036 0 : assert(cmp <= 0);
3037 : }
3038 35120409 : if (maxbound && !isnil) {
3039 0 : cmp = cmpf(maxbound, valp);
3040 0 : assert(cmp > 0);
3041 : }
3042 35120409 : if (maxval && !isnil) {
3043 5699 : cmp = cmpf(maxval, valp);
3044 5699 : assert(cmp >= 0);
3045 5699 : seenmax |= cmp == 0;
3046 : }
3047 35120409 : if (minval && !isnil) {
3048 5699 : cmp = cmpf(minval, valp);
3049 5699 : assert(cmp <= 0);
3050 5699 : seenmin |= cmp == 0;
3051 : }
3052 35120409 : prb = HASHprobe(hs, valp);
3053 35122878 : for (hb = HASHget(hs, prb);
3054 35727987 : hb != BUN_NONE;
3055 605109 : hb = HASHgetlink(hs, hb))
3056 607235 : if (cmpf(valp, BUNtail(bi, hb)) == 0)
3057 0 : assert(!b->tkey);
3058 35120752 : HASHputlink(hs, p, HASHget(hs, prb));
3059 35123455 : HASHput(hs, prb, p);
3060 35128436 : assert(!b->tnonil || !isnil);
3061 35128436 : seennil |= isnil;
3062 : }
3063 18380 : HEAPfree(&hs->heaplink, true);
3064 18381 : HEAPfree(&hs->heapbckt, true);
3065 18384 : GDKfree(hs);
3066 : }
3067 9272644 : abort_check:
3068 9272644 : GDKclrerr();
3069 9239518 : assert(maxval == NULL || seenmax);
3070 9239518 : assert(minval == NULL || seenmin);
3071 9239518 : assert(!b->tnil || seennil);
3072 : }
3073 12381629 : MT_lock_unset(&b->theaplock);
3074 : }
|