Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : * @* BAT Module
16 : * In this Chapter we describe the BAT implementation in more detail.
17 : * The routines mentioned are primarily meant to simplify the library
18 : * implementation.
19 : *
20 : * @+ BAT Construction
21 : * BATs are implemented in several blocks of memory, prepared for disk
22 : * storage and easy shipment over a network.
23 : *
24 : * The BAT starts with a descriptor, which indicates the required BAT
25 : * library version and the BAT administration details. In particular,
26 : * it describes the binary relationship maintained and the location of
27 : * fields required for storage.
28 : *
29 : * The general layout of the BAT in this implementation is as follows.
30 : * Each BAT comes with a heap for the loc-size buns and, optionally,
31 : * with heaps to manage the variable-sized data items of both
32 : * dimensions. The buns are assumed to be stored as loc-size objects.
33 : * This is essentially an array of structs to store the associations.
34 : * The size is determined at BAT creation time using an upper bound on
35 : * the number of elements to be accommodated. In case of overflow,
36 : * its storage space is extended automatically.
37 : *
38 : * The capacity of a BAT places an upper limit on the number of BUNs
39 : * to be stored initially. The actual space set aside may be quite
40 : * large. Moreover, the size is aligned to int boundaries to speedup
41 : * access and avoid some machine limitations.
42 : *
43 : * Initialization of the variable parts rely on type specific routines
44 : * called atomHeap.
45 : */
46 : #include "monetdb_config.h"
47 : #include "gdk.h"
48 : #include "gdk_private.h"
49 : #include "mutils.h"
50 :
51 : #ifdef ALIGN
52 : #undef ALIGN
53 : #endif
54 : #define ALIGN(n,b) ((b)?(b)*(1+(((n)-1)/(b))):n)
55 :
56 : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
57 :
58 : BAT *
59 21848709 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
60 : {
61 21848709 : bat bid;
62 21848709 : BAT *bn;
63 21848709 : Heap *h = NULL, *vh = NULL;
64 :
65 : /*
66 : * Alloc space for the BAT and its dependent records.
67 : */
68 21848709 : assert(tt >= 0);
69 :
70 21848709 : if (heapnames) {
71 10555449 : if ((h = GDKmalloc(sizeof(Heap))) == NULL) {
72 : return NULL;
73 : }
74 21200605 : *h = (Heap) {
75 10602879 : .farmid = BBPselectfarm(role, tt, offheap),
76 : .dirty = true,
77 : .refs = ATOMIC_VAR_INIT(1),
78 : };
79 :
80 10597726 : if (ATOMneedheap(tt)) {
81 786424 : if ((vh = GDKmalloc(sizeof(Heap))) == NULL) {
82 0 : GDKfree(h);
83 0 : return NULL;
84 : }
85 793537 : *vh = (Heap) {
86 793170 : .farmid = BBPselectfarm(role, tt, varheap),
87 : .dirty = true,
88 : .refs = ATOMIC_VAR_INIT(1),
89 : };
90 : }
91 : }
92 :
93 21898099 : bid = BBPallocbat(tt);
94 21992548 : if (bid == 0) {
95 0 : GDKfree(h);
96 0 : GDKfree(vh);
97 0 : return NULL;
98 : }
99 21992548 : bn = BBP_desc(bid);
100 :
101 : /*
102 : * Fill in basic column info
103 : */
104 43932083 : *bn = (BAT) {
105 : .batCacheid = bid,
106 : .hseqbase = hseq,
107 :
108 : .ttype = tt,
109 : .tkey = true,
110 : .tnonil = true,
111 : .tnil = false,
112 21939535 : .tsorted = ATOMlinear(tt),
113 : .trevsorted = ATOMlinear(tt),
114 : .tseqbase = oid_nil,
115 : .tminpos = BUN_NONE,
116 : .tmaxpos = BUN_NONE,
117 : .tunique_est = 0.0,
118 :
119 : .batRole = role,
120 : .batTransient = true,
121 : .batRestricted = BAT_WRITE,
122 : .theap = h,
123 : .tvheap = vh,
124 21992548 : .creator_tid = MT_getpid(),
125 : };
126 :
127 21939535 : if (bn->theap) {
128 10598653 : bn->theap->parentid = bn->batCacheid;
129 10598653 : const char *nme = BBP_physical(bn->batCacheid);
130 10598653 : settailname(bn->theap, nme, tt, width);
131 :
132 10573761 : if (bn->tvheap) {
133 783237 : bn->tvheap->parentid = bn->batCacheid;
134 783237 : strconcat_len(bn->tvheap->filename,
135 : sizeof(bn->tvheap->filename),
136 : nme, ".theap", NULL);
137 : }
138 : }
139 21924299 : char name[MT_NAME_LEN];
140 21924299 : snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
141 21924299 : MT_lock_init(&bn->theaplock, name);
142 21912198 : snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
143 21912198 : MT_lock_init(&bn->batIdxLock, name);
144 21939307 : snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
145 21939307 : MT_rwlock_init(&bn->thashlock, name);
146 21844186 : return bn;
147 : }
148 :
149 : uint8_t
150 21098351 : ATOMelmshift(int sz)
151 : {
152 21098351 : uint8_t sh;
153 21098351 : int i = sz >> 1;
154 :
155 40384155 : for (sh = 0; i != 0; sh++) {
156 19285804 : i >>= 1;
157 : }
158 21098351 : return sh;
159 : }
160 :
161 :
162 : void
163 10564528 : BATsetdims(BAT *b, uint16_t width)
164 : {
165 10564528 : b->twidth = b->ttype == TYPE_str ? width > 0 ? width : 1 : ATOMsize(b->ttype);
166 10564528 : b->tshift = ATOMelmshift(b->twidth);
167 10564528 : assert_shift_width(b->tshift, b->twidth);
168 10564528 : }
169 :
170 : const char *
171 1242 : BATtailname(const BAT *b)
172 : {
173 1242 : if (b->ttype == TYPE_str) {
174 325 : switch (b->twidth) {
175 235 : case 1:
176 235 : return "tail1";
177 75 : case 2:
178 75 : return "tail2";
179 15 : case 4:
180 : #if SIZEOF_VAR_T == 8
181 15 : return "tail4";
182 : case 8:
183 : #endif
184 : break;
185 : default:
186 0 : MT_UNREACHABLE();
187 : }
188 : }
189 : return "tail";
190 : }
191 :
192 : void
193 10654225 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
194 : {
195 10654225 : if (tt == TYPE_str) {
196 820105 : switch (width) {
197 765694 : case 0:
198 : case 1:
199 765694 : strconcat_len(tail->filename,
200 : sizeof(tail->filename), physnme,
201 : ".tail1", NULL);
202 765694 : return;
203 50055 : case 2:
204 50055 : strconcat_len(tail->filename,
205 : sizeof(tail->filename), physnme,
206 : ".tail2", NULL);
207 50055 : return;
208 4356 : case 4:
209 : #if SIZEOF_VAR_T == 8
210 4356 : strconcat_len(tail->filename,
211 : sizeof(tail->filename), physnme,
212 : ".tail4", NULL);
213 4356 : return;
214 : case 8:
215 : #endif
216 : break;
217 : default:
218 0 : MT_UNREACHABLE();
219 : }
220 : }
221 9834120 : strconcat_len(tail->filename, sizeof(tail->filename), physnme,
222 : ".tail", NULL);
223 : }
224 :
225 : /*
226 : * @- BAT allocation
227 : * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
228 : * We now initialize new BATs with their heapname such that the
229 : * modified HEAPalloc/HEAPextend primitives can possibly use memory
230 : * mapped files as temporary heap storage.
231 : *
232 : * In case of huge bats, we want HEAPalloc to write a file to disk,
233 : * and memory map it. To make this possible, we must provide it with
234 : * filenames.
235 : */
236 : BAT *
237 10570749 : COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
238 : {
239 10570749 : BAT *bn;
240 :
241 10570749 : assert(cap <= BUN_MAX);
242 10570749 : assert(hseq <= oid_nil);
243 10570749 : ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
244 :
245 : /* round up to multiple of BATTINY */
246 10570749 : if (cap < BUN_MAX - BATTINY)
247 10559186 : cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
248 10570749 : if (ATOMstorage(tt) == TYPE_msk) {
249 155780 : if (cap < 8*BATTINY)
250 : cap = 8*BATTINY;
251 : else
252 55407 : cap = (cap + 31) & ~(BUN)31;
253 10414969 : } else if (cap < BATTINY)
254 : cap = BATTINY;
255 : /* limit the size */
256 3389467 : if (cap > BUN_MAX)
257 : cap = BUN_MAX;
258 :
259 10570749 : bn = BATcreatedesc(hseq, tt, true, role, width);
260 10554663 : if (bn == NULL)
261 : return NULL;
262 :
263 10554663 : BATsetdims(bn, width);
264 10546087 : bn->batCapacity = cap;
265 :
266 10546087 : if (ATOMstorage(tt) == TYPE_msk)
267 155732 : cap /= 8; /* 8 values per byte */
268 :
269 : /* alloc the main heaps */
270 10546087 : if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
271 0 : goto bailout;
272 : }
273 :
274 10592554 : if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
275 0 : HEAPfree(bn->theap, true);
276 0 : goto bailout;
277 : }
278 10586495 : DELTAinit(bn);
279 10582429 : if (BBPcacheit(bn, true) != GDK_SUCCEED) {
280 : /* cannot happen, function always returns success */
281 0 : goto bailout;
282 : }
283 10623664 : TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
284 : return bn;
285 0 : bailout:
286 0 : BBPclear(bn->batCacheid);
287 0 : return NULL;
288 : }
289 :
290 : BAT *
291 10313567 : COLnew(oid hseq, int tt, BUN cap, role_t role)
292 : {
293 10313567 : return COLnew2(hseq, tt, cap, role, 0);
294 : }
295 :
296 : BAT *
297 4955477 : BATdense(oid hseq, oid tseq, BUN cnt)
298 : {
299 4955477 : BAT *bn;
300 :
301 4955477 : bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
302 4965390 : if (bn != NULL) {
303 4965390 : BATtseqbase(bn, tseq);
304 4961917 : BATsetcount(bn, cnt);
305 4962254 : TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
306 : "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
307 : ALGOBATPAR(bn));
308 : }
309 4962254 : return bn;
310 : }
311 :
312 : BAT *
313 0 : BATattach(int tt, const char *heapfile, role_t role)
314 : {
315 0 : BAT *bn;
316 0 : char *p;
317 0 : size_t m;
318 0 : FILE *f;
319 :
320 0 : ERRORcheck(tt <= 0 , "bad tail type (<=0)\n", NULL);
321 0 : ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "bad tail type (varsized and not str)\n", NULL);
322 0 : ERRORcheck(heapfile == NULL, "bad heapfile name\n", NULL);
323 :
324 0 : if ((f = MT_fopen(heapfile, "rb")) == NULL) {
325 0 : GDKsyserror("BATattach: cannot open %s\n", heapfile);
326 0 : return NULL;
327 : }
328 0 : if (ATOMstorage(tt) == TYPE_str) {
329 0 : size_t n;
330 0 : char *s;
331 0 : int c, u;
332 :
333 0 : if ((bn = COLnew(0, tt, 0, role)) == NULL) {
334 0 : fclose(f);
335 0 : return NULL;
336 : }
337 0 : m = 4096;
338 0 : n = 0;
339 0 : u = 0;
340 0 : s = p = GDKmalloc(m);
341 0 : if (p == NULL) {
342 0 : fclose(f);
343 0 : BBPreclaim(bn);
344 0 : return NULL;
345 : }
346 0 : while ((c = getc(f)) != EOF) {
347 0 : if (n == m) {
348 0 : m += 4096;
349 0 : s = GDKrealloc(p, m);
350 0 : if (s == NULL) {
351 0 : GDKfree(p);
352 0 : BBPreclaim(bn);
353 0 : fclose(f);
354 0 : return NULL;
355 : }
356 0 : p = s;
357 0 : s = p + n;
358 : }
359 0 : if (c == '\n' && n > 0 && s[-1] == '\r') {
360 : /* deal with CR-LF sequence */
361 0 : s[-1] = c;
362 : } else {
363 0 : *s++ = c;
364 0 : n++;
365 : }
366 0 : if (u) {
367 0 : if ((c & 0xC0) == 0x80)
368 0 : u--;
369 : else
370 0 : goto notutf8;
371 0 : } else if ((c & 0xF8) == 0xF0)
372 : u = 3;
373 0 : else if ((c & 0xF0) == 0xE0)
374 : u = 2;
375 0 : else if ((c & 0xE0) == 0xC0)
376 : u = 1;
377 0 : else if ((c & 0x80) == 0x80)
378 0 : goto notutf8;
379 0 : else if (c == 0) {
380 0 : if (BUNappend(bn, p, false) != GDK_SUCCEED) {
381 0 : BBPreclaim(bn);
382 0 : fclose(f);
383 0 : GDKfree(p);
384 0 : return NULL;
385 : }
386 : s = p;
387 : n = 0;
388 : }
389 : }
390 0 : fclose(f);
391 0 : GDKfree(p);
392 0 : if (n > 0) {
393 0 : BBPreclaim(bn);
394 0 : GDKerror("last string is not null-terminated\n");
395 0 : return NULL;
396 : }
397 : } else {
398 0 : struct stat st;
399 0 : int atomsize;
400 0 : BUN cap;
401 0 : lng n;
402 :
403 0 : if (fstat(fileno(f), &st) < 0) {
404 0 : GDKsyserror("BATattach: cannot stat %s\n", heapfile);
405 0 : fclose(f);
406 0 : return NULL;
407 : }
408 0 : atomsize = ATOMsize(tt);
409 0 : if (st.st_size % atomsize != 0) {
410 0 : fclose(f);
411 0 : GDKerror("heapfile size not integral number of atoms\n");
412 0 : return NULL;
413 : }
414 0 : if (ATOMstorage(tt) == TYPE_msk ?
415 : (st.st_size > (off_t) (BUN_MAX / 8)) :
416 0 : ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) {
417 0 : fclose(f);
418 0 : GDKerror("heapfile too large\n");
419 0 : return NULL;
420 : }
421 0 : cap = (BUN) (ATOMstorage(tt) == TYPE_msk ?
422 0 : st.st_size * 8 :
423 0 : st.st_size / atomsize);
424 0 : bn = COLnew(0, tt, cap, role);
425 0 : if (bn == NULL) {
426 0 : fclose(f);
427 0 : return NULL;
428 : }
429 0 : p = Tloc(bn, 0);
430 0 : n = (lng) st.st_size;
431 0 : while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
432 0 : p += m;
433 0 : n -= m;
434 : }
435 0 : fclose(f);
436 0 : if (n > 0) {
437 0 : GDKerror("couldn't read the complete file\n");
438 0 : BBPreclaim(bn);
439 0 : return NULL;
440 : }
441 0 : BATsetcount(bn, cap);
442 0 : bn->tnonil = cap == 0;
443 0 : bn->tnil = false;
444 0 : bn->tseqbase = oid_nil;
445 0 : if (cap > 1) {
446 0 : bn->tsorted = false;
447 0 : bn->trevsorted = false;
448 0 : bn->tkey = false;
449 : } else {
450 0 : bn->tsorted = ATOMlinear(tt);
451 0 : bn->trevsorted = ATOMlinear(tt);
452 0 : bn->tkey = true;
453 : }
454 : }
455 : return bn;
456 :
457 0 : notutf8:
458 0 : fclose(f);
459 0 : BBPreclaim(bn);
460 0 : GDKfree(p);
461 0 : GDKerror("input is not UTF-8\n");
462 0 : return NULL;
463 : }
464 :
465 : /*
466 : * If the BAT runs out of storage for BUNS it will reallocate space.
467 : * For memory mapped BATs we simple extend the administration after
468 : * having an assurance that the BAT still can be safely stored away.
469 : */
470 : BUN
471 21276 : BATgrows(BAT *b)
472 : {
473 21276 : BUN oldcap, newcap;
474 :
475 21276 : BATcheck(b, 0);
476 :
477 21276 : newcap = oldcap = BATcapacity(b);
478 21276 : if (newcap < BATTINY)
479 : newcap = 2 * BATTINY;
480 21346 : else if (newcap < 10 * BATTINY)
481 19895 : newcap = 4 * newcap;
482 1451 : else if (newcap < 50 * BATTINY)
483 844 : newcap = 2 * newcap;
484 607 : else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
485 601 : newcap = (BUN) ((double) newcap * BATMARGIN);
486 : else
487 : newcap = BUN_MAX;
488 21346 : if (newcap == oldcap) {
489 0 : if (newcap <= BUN_MAX - 10)
490 0 : newcap += 10;
491 : else
492 : newcap = BUN_MAX;
493 : }
494 21276 : if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
495 0 : newcap = (newcap + 31) & ~(BUN)31;
496 : return newcap;
497 : }
498 :
499 : /*
500 : * The routine should ensure that the BAT keeps its location in the
501 : * BAT buffer.
502 : *
503 : * Overflow in the other heaps are dealt with in the atom routines.
504 : * Here we merely copy their references into the new administration
505 : * space.
506 : */
507 : gdk_return
508 179816 : BATextend(BAT *b, BUN newcap)
509 : {
510 179816 : size_t theap_size;
511 179816 : gdk_return rc = GDK_SUCCEED;
512 :
513 179816 : assert(newcap <= BUN_MAX);
514 179816 : BATcheck(b, GDK_FAIL);
515 : /*
516 : * The main issue is to properly predict the new BAT size.
517 : * storage overflow. The assumption taken is that capacity
518 : * overflow is rare. It is changed only when the position of
519 : * the next available BUN surpasses the free area marker. Be
520 : * aware that the newcap should be greater than the old value,
521 : * otherwise you may easily corrupt the administration of
522 : * malloc.
523 : */
524 179816 : if (newcap <= BATcapacity(b)) {
525 : return GDK_SUCCEED;
526 : }
527 :
528 30307 : if (ATOMstorage(b->ttype) == TYPE_msk) {
529 1160 : newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
530 1160 : theap_size = (size_t) (newcap / 8); /* in bytes */
531 : } else {
532 29147 : theap_size = (size_t) newcap << b->tshift;
533 : }
534 :
535 30307 : MT_lock_set(&b->theaplock);
536 30476 : if (b->theap->base) {
537 30476 : TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
538 : b->theap->filename, b->theap->size, theap_size);
539 30476 : rc = HEAPgrow(&b->theap, theap_size, b->batRestricted == BAT_READ);
540 30518 : if (rc == GDK_SUCCEED)
541 30520 : b->batCapacity = newcap;
542 : } else {
543 0 : b->batCapacity = newcap;
544 : }
545 30518 : MT_lock_unset(&b->theaplock);
546 :
547 30516 : return rc;
548 : }
549 :
550 :
551 :
552 : /*
553 : * @+ BAT destruction
554 : * BATclear quickly removes all elements from a BAT. It must respect
555 : * the transaction rules; so stable elements must be moved to the
556 : * "deleted" section of the BAT (they cannot be fully deleted
557 : * yet). For the elements that really disappear, we must free
558 : * heapspace. As an optimization, in the case of no stable elements, we quickly empty
559 : * the heaps by copying a standard small empty image over them.
560 : */
561 : gdk_return
562 391 : BATclear(BAT *b, bool force)
563 : {
564 391 : BUN p, q;
565 :
566 391 : BATcheck(b, GDK_FAIL);
567 :
568 391 : if (!force && b->batInserted > 0) {
569 0 : GDKerror("cannot clear committed BAT\n");
570 0 : return GDK_FAIL;
571 : }
572 :
573 391 : TRC_DEBUG(ALGO, ALGOBATFMT "\n", ALGOBATPAR(b));
574 :
575 : /* kill all search accelerators */
576 391 : HASHdestroy(b);
577 391 : IMPSdestroy(b);
578 391 : OIDXdestroy(b);
579 391 : STRMPdestroy(b);
580 391 : RTREEdestroy(b);
581 391 : PROPdestroy(b);
582 :
583 391 : bat tvp = 0;
584 :
585 : /* we must dispose of all inserted atoms */
586 391 : MT_lock_set(&b->theaplock);
587 391 : if (force && BATatoms[b->ttype].atomDel == NULL) {
588 384 : assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
589 : /* no stable elements: we do a quick heap clean */
590 : /* need to clean heap which keeps data even though the
591 : BUNs got removed. This means reinitialize when
592 : free > 0
593 : */
594 384 : if (b->tvheap && b->tvheap->free > 0) {
595 21 : Heap *th = GDKmalloc(sizeof(Heap));
596 :
597 21 : if (th == NULL) {
598 0 : MT_lock_unset(&b->theaplock);
599 0 : return GDK_FAIL;
600 : }
601 21 : *th = (Heap) {
602 21 : .farmid = b->tvheap->farmid,
603 21 : .parentid = b->tvheap->parentid,
604 : .dirty = true,
605 21 : .hasfile = b->tvheap->hasfile,
606 : .refs = ATOMIC_VAR_INIT(1),
607 : };
608 21 : strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
609 21 : if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
610 0 : MT_lock_unset(&b->theaplock);
611 0 : return GDK_FAIL;
612 : }
613 21 : tvp = b->tvheap->parentid;
614 21 : HEAPdecref(b->tvheap, false);
615 21 : b->tvheap = th;
616 : }
617 : } else {
618 : /* do heap-delete of all inserted atoms */
619 7 : void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
620 :
621 : /* TYPE_str has no del method, so we shouldn't get here */
622 7 : assert(tatmdel == NULL || b->twidth == sizeof(var_t));
623 0 : if (tatmdel) {
624 0 : BATiter bi = bat_iterator_nolock(b);
625 :
626 0 : for (p = b->batInserted, q = BATcount(b); p < q; p++)
627 0 : (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
628 0 : b->tvheap->dirty = true;
629 : }
630 : }
631 :
632 391 : b->batInserted = 0;
633 391 : b->batCount = 0;
634 391 : if (b->ttype == TYPE_void)
635 0 : b->batCapacity = 0;
636 391 : b->theap->free = 0;
637 391 : BAThseqbase(b, 0);
638 391 : BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
639 391 : b->theap->dirty = true;
640 391 : b->tnonil = true;
641 391 : b->tnil = false;
642 391 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
643 391 : b->tnosorted = b->tnorevsorted = 0;
644 391 : b->tkey = true;
645 391 : b->tnokey[0] = b->tnokey[1] = 0;
646 391 : b->tminpos = b->tmaxpos = BUN_NONE;
647 391 : b->tunique_est = 0;
648 391 : MT_lock_unset(&b->theaplock);
649 391 : if (tvp != 0 && tvp != b->batCacheid)
650 0 : BBPrelease(tvp);
651 : return GDK_SUCCEED;
652 : }
653 :
654 : /* free a cached BAT; leave the bat descriptor cached */
655 : void
656 521397 : BATfree(BAT *b)
657 : {
658 521397 : if (b == NULL)
659 : return;
660 :
661 : /* deallocate all memory for a bat */
662 521397 : MT_rwlock_rdlock(&b->thashlock);
663 521399 : BUN nunique = BUN_NONE;
664 521399 : if (b->thash && b->thash != (Hash *) 1) {
665 1760 : nunique = b->thash->nunique;
666 : }
667 521399 : MT_rwlock_rdunlock(&b->thashlock);
668 521398 : HASHfree(b);
669 521398 : IMPSfree(b);
670 521397 : OIDXfree(b);
671 521397 : STRMPfree(b);
672 521397 : RTREEfree(b);
673 521398 : MT_lock_set(&b->theaplock);
674 521398 : if (nunique != BUN_NONE) {
675 1760 : b->tunique_est = (double) nunique;
676 : }
677 : /* wait until there are no other references to the heap; a
678 : * reference is possible in e.g. BBPsync that uses a
679 : * bat_iterator directly on the BBP_desc, i.e. without fix */
680 521397 : while (b->theap && (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1) {
681 0 : MT_lock_unset(&b->theaplock);
682 0 : MT_sleep_ms(1);
683 521398 : MT_lock_set(&b->theaplock);
684 : }
685 521397 : if (b->theap) {
686 521397 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) == 1);
687 521397 : assert(b->theap->parentid == b->batCacheid);
688 521397 : HEAPfree(b->theap, false);
689 : }
690 : /* wait until there are no other references to the heap; a
691 : * reference is possible in e.g. BBPsync that uses a
692 : * bat_iterator directly on the BBP_desc, i.e. without fix */
693 27058 : while (b->tvheap && (ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) > 1) {
694 0 : MT_lock_unset(&b->theaplock);
695 0 : MT_sleep_ms(1);
696 521399 : MT_lock_set(&b->theaplock);
697 : }
698 521399 : if (b->tvheap) {
699 27058 : assert((ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) == 1);
700 27058 : assert(b->tvheap->parentid == b->batCacheid);
701 27058 : HEAPfree(b->tvheap, false);
702 : }
703 521399 : MT_lock_unset(&b->theaplock);
704 : }
705 :
706 : /* free a cached BAT descriptor */
707 : void
708 21949171 : BATdestroy(BAT *b)
709 : {
710 21949171 : if (b->tvheap) {
711 26169 : GDKfree(b->tvheap);
712 : }
713 21949171 : PROPdestroy_nolock(b);
714 21942485 : MT_lock_destroy(&b->theaplock);
715 21979907 : MT_lock_destroy(&b->batIdxLock);
716 21997224 : MT_rwlock_destroy(&b->thashlock);
717 22001393 : if (b->theap) {
718 518473 : GDKfree(b->theap);
719 : }
720 22007855 : if (b->oldtail) {
721 0 : ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
722 : /* the bat has not been committed, so we cannot remove
723 : * the old tail file */
724 0 : HEAPdecref(b->oldtail, false);
725 0 : b->oldtail = NULL;
726 : }
727 22007855 : *b = (BAT) {
728 : .batCacheid = 0,
729 : };
730 22007855 : }
731 :
732 : /*
733 : * @+ BAT copying
734 : *
735 : * BAT copying is an often used operation. So it deserves attention.
736 : * When making a copy of a BAT, the following aspects are of
737 : * importance:
738 : *
739 : * - the requested head and tail types. The purpose of the copy may be
740 : * to slightly change these types (e.g. void <-> oid). We may also
741 : * remap between types as long as they share the same
742 : * ATOMstorage(type), i.e. the types have the same physical
743 : * implementation. We may even want to allow 'dirty' trick such as
744 : * viewing a flt-column suddenly as int.
745 : *
746 : * To allow such changes, the desired column-types is a
747 : * parameter of COLcopy.
748 : *
749 : * - access mode. If we want a read-only copy of a read-only BAT, a
750 : * VIEW may do (in this case, the user may be after just an
751 : * independent BAT header and id). This is indicated by the
752 : * parameter (writable = FALSE).
753 : *
754 : * In other cases, we really want an independent physical copy
755 : * (writable = TRUE). Changing the mode to BAT_WRITE will be a
756 : * zero-cost operation if the BAT was copied with (writable = TRUE).
757 : *
758 : * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
759 : * TRUE).
760 : *
761 : * In these cases the copy becomes a logical view on the original,
762 : * which ensures that the original cannot be modified or destroyed
763 : * (which could affect the shared heaps).
764 : */
765 : static bool
766 414 : wrongtype(int t1, int t2)
767 : {
768 : /* check if types are compatible. be extremely forgiving */
769 414 : if (t1 != TYPE_void) {
770 414 : t1 = ATOMtype(ATOMstorage(t1));
771 414 : t2 = ATOMtype(ATOMstorage(t2));
772 414 : if (t1 != t2) {
773 352 : if (ATOMvarsized(t1) ||
774 352 : ATOMvarsized(t2) ||
775 352 : t1 == TYPE_msk || t2 == TYPE_msk ||
776 352 : ATOMsize(t1) != ATOMsize(t2))
777 0 : return true;
778 : }
779 : }
780 : return false;
781 : }
782 :
783 : /*
784 : * There are four main implementation cases:
785 : * (1) we are allowed to return a view (zero effort),
786 : * (2) the result is void,void (zero effort),
787 : * (3) we can copy the heaps (memcopy, or even VM page sharing)
788 : * (4) we must insert BUN-by-BUN into the result (fallback)
789 : * The latter case is still optimized for the case that the result
790 : * is bat[void,T] for a simple fixed-size type T. In that case we
791 : * do inline array[T] inserts.
792 : */
793 : BAT *
794 46379 : COLcopy(BAT *b, int tt, bool writable, role_t role)
795 : {
796 46379 : bool slowcopy = false;
797 46379 : BAT *bn = NULL;
798 46379 : BATiter bi;
799 46379 : char strhash[GDK_STRHASHSIZE];
800 :
801 46379 : BATcheck(b, NULL);
802 :
803 : /* maybe a bit ugly to change the requested bat type?? */
804 46379 : if (b->ttype == TYPE_void && !writable)
805 46379 : tt = TYPE_void;
806 :
807 46379 : if (tt != b->ttype && wrongtype(tt, b->ttype)) {
808 0 : GDKerror("wrong tail-type requested\n");
809 0 : return NULL;
810 : }
811 :
812 : /* in case of a string bat, we save the string heap hash table
813 : * while we have the lock so that we can restore it in the copy;
814 : * this is because during our operation, a parallel thread could
815 : * be adding strings to the vheap which would modify the hash
816 : * table and that would result in buckets containing values
817 : * beyond the original vheap that we're copying */
818 46379 : MT_lock_set(&b->theaplock);
819 46431 : bi = bat_iterator_nolock(b);
820 46431 : if (ATOMstorage(b->ttype) == TYPE_str && b->tvheap->free >= GDK_STRHASHSIZE)
821 12277 : memcpy(strhash, b->tvheap->base, GDK_STRHASHSIZE);
822 :
823 46431 : bat_iterator_incref(&bi);
824 46449 : MT_lock_unset(&b->theaplock);
825 :
826 : /* first try case (1); create a view, possibly with different
827 : * atom-types */
828 46434 : if (!writable &&
829 46434 : role == TRANSIENT &&
830 18590 : bi.restricted == BAT_READ &&
831 14798 : ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
832 14798 : (bi.h == NULL ||
833 14798 : bi.h->parentid == b->batCacheid ||
834 3553 : BBP_desc(bi.h->parentid)->batRestricted == BAT_READ)) {
835 14798 : bn = VIEWcreate(b->hseqbase, b, 0, BUN_MAX);
836 14798 : if (bn == NULL) {
837 0 : goto bunins_failed;
838 : }
839 14798 : if (tt != bn->ttype) {
840 59 : bn->ttype = tt;
841 59 : if (bn->tvheap && !ATOMvarsized(tt)) {
842 0 : if (bn->tvheap->parentid != bn->batCacheid)
843 0 : BBPrelease(bn->tvheap->parentid);
844 0 : HEAPdecref(bn->tvheap, false);
845 0 : bn->tvheap = NULL;
846 : }
847 59 : bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
848 : }
849 14798 : bat_iterator_end(&bi);
850 14798 : return bn;
851 : } else {
852 : /* check whether we need case (4); BUN-by-BUN copy (by
853 : * setting slowcopy to true) */
854 31636 : if (ATOMsize(tt) != ATOMsize(bi.type)) {
855 : /* oops, void materialization */
856 : slowcopy = true;
857 31285 : } else if (bi.h && bi.h->parentid != b->batCacheid &&
858 6795 : BATcapacity(BBP_desc(bi.h->parentid)) > bi.count + bi.count) {
859 : /* reduced slice view: do not copy too much
860 : * garbage */
861 : slowcopy = true;
862 24700 : } else if (bi.vh && bi.vh->parentid != b->batCacheid &&
863 10589 : BATcount(BBP_desc(bi.vh->parentid)) > bi.count + bi.count) {
864 : /* reduced vheap view: do not copy too much
865 : * garbage; this really is a heuristic since the
866 : * vheap could be used completely, even if the
867 : * offset heap is only (less than) half the size
868 : * of the parent's offset heap */
869 9439 : slowcopy = true;
870 : }
871 :
872 31636 : bn = COLnew2(b->hseqbase, tt, bi.count, role, bi.width);
873 31577 : if (bn == NULL) {
874 0 : goto bunins_failed;
875 : }
876 31577 : if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
877 : /* this combination can happen since the last
878 : * argument of COLnew2 not being zero triggers a
879 : * skip in the allocation of the tvheap */
880 12341 : if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
881 0 : goto bunins_failed;
882 : }
883 : }
884 :
885 31607 : if (tt == TYPE_void) {
886 : /* case (2): a void,void result => nothing to
887 : * copy! */
888 1090 : bn->theap->free = 0;
889 30517 : } else if (!slowcopy) {
890 : /* case (3): just copy the heaps */
891 21093 : if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
892 0 : goto bunins_failed;
893 : }
894 21089 : memcpy(bn->theap->base, bi.base, bi.hfree);
895 21089 : bn->theap->free = bi.hfree;
896 21089 : bn->theap->dirty = true;
897 21089 : if (bn->tvheap) {
898 9241 : memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
899 9241 : bn->tvheap->free = bi.vhfree;
900 9241 : bn->tvheap->dirty = true;
901 9241 : if (ATOMstorage(b->ttype) == TYPE_str && bi.vhfree >= GDK_STRHASHSIZE)
902 8633 : memcpy(bn->tvheap->base, strhash, GDK_STRHASHSIZE);
903 : }
904 :
905 : /* make sure we use the correct capacity */
906 21089 : if (ATOMstorage(bn->ttype) == TYPE_msk)
907 0 : bn->batCapacity = (BUN) (bn->theap->size * 8);
908 21089 : else if (bn->ttype)
909 21089 : bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
910 : else
911 0 : bn->batCapacity = 0;
912 18849 : } else if (tt != TYPE_void || ATOMextern(tt)) {
913 : /* case (4): one-by-one BUN insert (really slow) */
914 9424 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
915 :
916 7785872 : TIMEOUT_LOOP_IDX_DECL(p, bi.count, qry_ctx) {
917 7766564 : const void *t = BUNtail(bi, p);
918 :
919 7756818 : if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
920 0 : goto bunins_failed;
921 : }
922 : }
923 9432 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
924 9425 : bn->theap->dirty |= bi.count > 0;
925 : } else if (tt != TYPE_void && bi.type == TYPE_void) {
926 : /* case (4): optimized for unary void
927 : * materialization */
928 : oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
929 : const oid inc = !is_oid_nil(cur);
930 :
931 : bn->theap->free = bi.count * sizeof(oid);
932 : bn->theap->dirty |= bi.count > 0;
933 : for (BUN p = 0; p < bi.count; p++) {
934 : dst[p] = cur;
935 : cur += inc;
936 : }
937 : } else if (ATOMstorage(bi.type) == TYPE_msk) {
938 : /* convert number of bits to number of bytes,
939 : * and round the latter up to a multiple of
940 : * 4 (copy in units of 4 bytes) */
941 : bn->theap->free = ((bi.count + 31) / 32) * 4;
942 : bn->theap->dirty |= bi.count > 0;
943 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
944 : } else {
945 : /* case (4): optimized for simple array copy */
946 : bn->theap->free = bi.count << bn->tshift;
947 : bn->theap->dirty |= bi.count > 0;
948 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
949 : }
950 : /* copy all properties (size+other) from the source bat */
951 31604 : BATsetcount(bn, bi.count);
952 : }
953 : /* set properties (note that types may have changed in the copy) */
954 62844 : if (ATOMtype(tt) == ATOMtype(bi.type)) {
955 31603 : if (ATOMtype(tt) == TYPE_oid) {
956 7996 : BATtseqbase(bn, bi.tseq);
957 : } else {
958 23607 : BATtseqbase(bn, oid_nil);
959 : }
960 31562 : BATkey(bn, bi.key);
961 31548 : bn->tsorted = bi.sorted;
962 31548 : bn->trevsorted = bi.revsorted;
963 31548 : bn->tnorevsorted = bi.norevsorted;
964 31548 : if (bi.nokey[0] != bi.nokey[1]) {
965 2216 : bn->tnokey[0] = bi.nokey[0];
966 2216 : bn->tnokey[1] = bi.nokey[1];
967 : } else {
968 29332 : bn->tnokey[0] = bn->tnokey[1] = 0;
969 : }
970 31548 : bn->tnosorted = bi.nosorted;
971 31548 : bn->tnonil = bi.nonil;
972 31548 : bn->tnil = bi.nil;
973 31548 : bn->tminpos = bi.minpos;
974 31548 : bn->tmaxpos = bi.maxpos;
975 31548 : bn->tunique_est = bi.unique_est;
976 3 : } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
977 3 : ATOMcompare(tt) == ATOMcompare(b->ttype)) {
978 3 : BUN h = bi.count;
979 3 : bn->tsorted = bi.sorted;
980 3 : bn->trevsorted = bi.revsorted;
981 3 : BATkey(bn, bi.key);
982 3 : bn->tnonil = bi.nonil;
983 3 : bn->tnil = bi.nil;
984 3 : if (bi.nosorted > 0 && bi.nosorted < h)
985 1 : bn->tnosorted = bi.nosorted;
986 : else
987 2 : bn->tnosorted = 0;
988 3 : if (bi.norevsorted > 0 && bi.norevsorted < h)
989 3 : bn->tnorevsorted = bi.norevsorted;
990 : else
991 0 : bn->tnorevsorted = 0;
992 3 : if (bi.nokey[0] < h &&
993 3 : bi.nokey[1] < h &&
994 : bi.nokey[0] != bi.nokey[1]) {
995 0 : bn->tnokey[0] = bi.nokey[0];
996 0 : bn->tnokey[1] = bi.nokey[1];
997 : } else {
998 3 : bn->tnokey[0] = bn->tnokey[1] = 0;
999 : }
1000 3 : bn->tminpos = bi.minpos;
1001 3 : bn->tmaxpos = bi.maxpos;
1002 3 : bn->tunique_est = bi.unique_est;
1003 : } else {
1004 0 : bn->tsorted = bn->trevsorted = false; /* set based on count later */
1005 0 : bn->tnonil = bn->tnil = false;
1006 0 : bn->tkey = false;
1007 0 : bn->tnosorted = bn->tnorevsorted = 0;
1008 0 : bn->tnokey[0] = bn->tnokey[1] = 0;
1009 : }
1010 31551 : if (BATcount(bn) <= 1) {
1011 6003 : bn->tsorted = ATOMlinear(b->ttype);
1012 6003 : bn->trevsorted = ATOMlinear(b->ttype);
1013 6003 : bn->tkey = true;
1014 : }
1015 31551 : bat_iterator_end(&bi);
1016 31623 : if (!writable)
1017 3781 : bn->batRestricted = BAT_READ;
1018 31623 : TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
1019 : ALGOBATPAR(b), ALGOBATPAR(bn));
1020 : return bn;
1021 0 : bunins_failed:
1022 0 : bat_iterator_end(&bi);
1023 0 : BBPreclaim(bn);
1024 : return NULL;
1025 : }
1026 :
1027 : /* Append an array of values of length count to the bat. For
1028 : * fixed-sized values, `values' is an array of values, for
1029 : * variable-sized values, `values' is an array of pointers to values.
1030 : * If values equals NULL, count times nil will be appended. */
1031 : gdk_return
1032 61853244 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
1033 : {
1034 61853244 : BUN p;
1035 61853244 : BUN nunique = 0;
1036 :
1037 61853244 : BATcheck(b, GDK_FAIL);
1038 :
1039 61853244 : assert(!VIEWtparent(b));
1040 :
1041 61853244 : if (count == 0)
1042 : return GDK_SUCCEED;
1043 :
1044 61846977 : TRC_DEBUG(ALGO, ALGOBATFMT " appending " BUNFMT " values%s\n", ALGOBATPAR(b), count, values ? "" : " (all nil)");
1045 :
1046 61839761 : p = BATcount(b); /* insert at end */
1047 61839761 : if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
1048 0 : GDKerror("bat too large\n");
1049 0 : return GDK_FAIL;
1050 : }
1051 :
1052 61839761 : ALIGNapp(b, force, GDK_FAIL);
1053 : /* load hash so that we can maintain it */
1054 61902246 : (void) BATcheckhash(b);
1055 :
1056 61885558 : if (b->ttype == TYPE_void && BATtdense(b)) {
1057 0 : const oid *ovals = values;
1058 0 : bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
1059 0 : if (ovals) {
1060 0 : for (BUN i = 1; dense && i < count; i++) {
1061 0 : dense = ovals[i - 1] + 1 == ovals[i];
1062 : }
1063 : }
1064 0 : if (dense) {
1065 0 : MT_lock_set(&b->theaplock);
1066 0 : if (b->batCount == 0)
1067 0 : b->tseqbase = ovals ? ovals[0] : oid_nil;
1068 0 : BATsetcount(b, BATcount(b) + count);
1069 0 : MT_lock_unset(&b->theaplock);
1070 0 : return GDK_SUCCEED;
1071 : } else {
1072 : /* we need to materialize b; allocate enough capacity */
1073 0 : if (BATmaterialize(b, BATcount(b) + count) != GDK_SUCCEED)
1074 : return GDK_FAIL;
1075 : }
1076 : }
1077 :
1078 61885558 : if (unshare_varsized_heap(b) != GDK_SUCCEED) {
1079 : return GDK_FAIL;
1080 : }
1081 :
1082 61872533 : if (BATcount(b) + count > BATcapacity(b)) {
1083 : /* if needed space exceeds a normal growth extend just
1084 : * with what's needed */
1085 10615 : BUN ncap = BATcount(b) + count;
1086 10615 : BUN grows = BATgrows(b);
1087 :
1088 10612 : if (ncap > grows)
1089 : grows = ncap;
1090 10612 : gdk_return rc = BATextend(b, grows);
1091 10624 : if (rc != GDK_SUCCEED)
1092 : return rc;
1093 : }
1094 :
1095 61872542 : const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1096 61872542 : MT_lock_set(&b->theaplock);
1097 61899373 : BATiter bi = bat_iterator_nolock(b);
1098 61899373 : const ValRecord *prop;
1099 61899373 : ValRecord minprop, maxprop;
1100 61899373 : const void *minbound = NULL, *maxbound = NULL;
1101 61899384 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1102 11 : VALcopy(&minprop, prop) != NULL)
1103 11 : minbound = VALptr(&minprop);
1104 61815400 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1105 10 : VALcopy(&maxprop, prop) != NULL)
1106 10 : maxbound = VALptr(&maxprop);
1107 61864756 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1108 61875061 : MT_lock_unset(&b->theaplock);
1109 61918389 : MT_rwlock_wrlock(&b->thashlock);
1110 61914402 : if (values && b->ttype) {
1111 61907392 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1112 61907392 : const void *atomnil = ATOMnilptr(b->ttype);
1113 61907392 : const void *minvalp = NULL, *maxvalp = NULL;
1114 61907392 : if (b->tvheap) {
1115 29084773 : if (bi.minpos != BUN_NONE)
1116 26258895 : minvalp = BUNtvar(bi, bi.minpos);
1117 29076620 : if (bi.maxpos != BUN_NONE)
1118 26251992 : maxvalp = BUNtvar(bi, bi.maxpos);
1119 29081517 : const void *vbase = b->tvheap->base;
1120 58195415 : for (BUN i = 0; i < count; i++) {
1121 29114991 : t = ((void **) values)[i];
1122 29114991 : bool isnil = atomcmp(t, atomnil) == 0;
1123 29106725 : gdk_return rc;
1124 29106725 : if (notnull && isnil) {
1125 0 : assert(0);
1126 : GDKerror("NULL value not within bounds\n");
1127 : rc = GDK_FAIL;
1128 29106725 : } else if (minbound &&
1129 29106725 : !isnil &&
1130 0 : atomcmp(t, minbound) < 0) {
1131 0 : assert(0);
1132 : GDKerror("value not within bounds\n");
1133 : rc = GDK_FAIL;
1134 29106725 : } else if (maxbound &&
1135 0 : !isnil &&
1136 0 : atomcmp(t, maxbound) >= 0) {
1137 0 : assert(0);
1138 : GDKerror("value not within bounds\n");
1139 : rc = GDK_FAIL;
1140 : } else {
1141 29106725 : rc = tfastins_nocheckVAR(b, p, t);
1142 : }
1143 29113749 : if (rc != GDK_SUCCEED) {
1144 0 : MT_rwlock_wrunlock(&b->thashlock);
1145 0 : if (minbound)
1146 0 : VALclear(&minprop);
1147 0 : if (maxbound)
1148 0 : VALclear(&maxprop);
1149 0 : return rc;
1150 : }
1151 29113749 : if (vbase != b->tvheap->base) {
1152 : /* tvheap changed location, so
1153 : * pointers may need to be
1154 : * updated (not if they were
1155 : * initialized from t below, but
1156 : * we don't know) */
1157 3387 : BUN minpos = bi.minpos;
1158 3387 : BUN maxpos = bi.maxpos;
1159 3387 : MT_lock_set(&b->theaplock);
1160 3387 : bi = bat_iterator_nolock(b);
1161 3387 : MT_lock_unset(&b->theaplock);
1162 3387 : bi.minpos = minpos;
1163 3387 : bi.maxpos = maxpos;
1164 3387 : vbase = b->tvheap->base;
1165 3387 : if (bi.minpos != BUN_NONE)
1166 2572 : minvalp = BUNtvar(bi, bi.minpos);
1167 3387 : if (bi.maxpos != BUN_NONE)
1168 2570 : maxvalp = BUNtvar(bi, bi.maxpos);
1169 : }
1170 29113749 : if (!isnil) {
1171 25478785 : if (p == 0) {
1172 193021 : bi.minpos = bi.maxpos = 0;
1173 193021 : minvalp = maxvalp = t;
1174 : } else {
1175 49505617 : if (bi.minpos != BUN_NONE &&
1176 24220164 : atomcmp(minvalp, t) > 0) {
1177 71014 : bi.minpos = p;
1178 71014 : minvalp = t;
1179 : }
1180 49507060 : if (bi.maxpos != BUN_NONE &&
1181 24221147 : atomcmp(maxvalp, t) < 0) {
1182 2243211 : bi.maxpos = p;
1183 2243211 : maxvalp = t;
1184 : }
1185 : }
1186 : }
1187 29113898 : p++;
1188 : }
1189 29080424 : if (minbound)
1190 0 : VALclear(&minprop);
1191 29081707 : if (maxbound)
1192 0 : VALclear(&maxprop);
1193 29077783 : if (b->thash) {
1194 6283 : p -= count;
1195 12566 : for (BUN i = 0; i < count; i++) {
1196 6283 : t = ((void **) values)[i];
1197 6283 : HASHappend_locked(b, p, t);
1198 6283 : p++;
1199 : }
1200 6283 : nunique = b->thash ? b->thash->nunique : 0;
1201 : }
1202 32822619 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1203 16213 : bi.minpos = bi.maxpos = BUN_NONE;
1204 16213 : minvalp = maxvalp = NULL;
1205 32426 : for (BUN i = 0; i < count; i++) {
1206 16213 : t = (void *) ((char *) values + (i << b->tshift));
1207 16213 : mskSetVal(b, p, *(msk *) t);
1208 16213 : p++;
1209 : }
1210 : } else {
1211 32806406 : if (bi.minpos != BUN_NONE)
1212 31552019 : minvalp = BUNtloc(bi, bi.minpos);
1213 32806406 : if (bi.maxpos != BUN_NONE)
1214 31601063 : maxvalp = BUNtloc(bi, bi.maxpos);
1215 65678213 : for (BUN i = 0; i < count; i++) {
1216 32873011 : t = (void *) ((char *) values + (i << b->tshift));
1217 32873011 : gdk_return rc = tfastins_nocheckFIX(b, p, t);
1218 32875906 : if (rc != GDK_SUCCEED) {
1219 0 : MT_rwlock_wrunlock(&b->thashlock);
1220 0 : return rc;
1221 : }
1222 32875906 : if (b->thash) {
1223 477476 : HASHappend_locked(b, p, t);
1224 : }
1225 32875906 : if (atomcmp(t, atomnil) != 0) {
1226 32101420 : if (p == 0) {
1227 360116 : bi.minpos = bi.maxpos = 0;
1228 360116 : minvalp = maxvalp = t;
1229 : } else {
1230 63345854 : if (bi.minpos != BUN_NONE &&
1231 31602617 : atomcmp(minvalp, t) > 0) {
1232 34919 : bi.minpos = p;
1233 34919 : minvalp = t;
1234 : }
1235 63395879 : if (bi.maxpos != BUN_NONE &&
1236 31652784 : atomcmp(maxvalp, t) < 0) {
1237 7642524 : bi.maxpos = p;
1238 7642524 : maxvalp = t;
1239 : }
1240 : }
1241 : }
1242 32871807 : p++;
1243 : }
1244 32805202 : nunique = b->thash ? b->thash->nunique : 0;
1245 : }
1246 : } else {
1247 15087 : for (BUN i = 0; i < count; i++) {
1248 8077 : gdk_return rc = tfastins_nocheck(b, p, t);
1249 8077 : if (rc != GDK_SUCCEED) {
1250 0 : MT_rwlock_wrunlock(&b->thashlock);
1251 0 : return rc;
1252 : }
1253 8077 : if (b->thash) {
1254 0 : HASHappend_locked(b, p, t);
1255 : }
1256 8077 : p++;
1257 : }
1258 7010 : nunique = b->thash ? b->thash->nunique : 0;
1259 : }
1260 61906208 : MT_lock_set(&b->theaplock);
1261 61884955 : b->tminpos = bi.minpos;
1262 61884955 : b->tmaxpos = bi.maxpos;
1263 61884955 : if (count > BATcount(b) / gdk_unique_estimate_keep_fraction)
1264 15668809 : b->tunique_est = 0;
1265 :
1266 61884955 : if (b->ttype == TYPE_oid) {
1267 : /* spend extra effort on oid (possible candidate list) */
1268 405367 : if (values == NULL || is_oid_nil(((oid *) values)[0])) {
1269 149 : b->tnil = true;
1270 149 : b->tnonil = false;
1271 149 : b->tsorted = false;
1272 149 : b->trevsorted = false;
1273 149 : b->tkey = false;
1274 149 : b->tseqbase = oid_nil;
1275 : } else {
1276 405218 : if (b->batCount == 0) {
1277 5221 : b->tsorted = true;
1278 5221 : b->trevsorted = true;
1279 5221 : b->tkey = true;
1280 5221 : b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
1281 5221 : b->tnil = false;
1282 5221 : b->tnonil = true;
1283 : } else {
1284 399997 : if (!is_oid_nil(b->tseqbase) &&
1285 225369 : (count > 1 ||
1286 225369 : b->tseqbase + b->batCount != ((oid *) values)[0]))
1287 1685 : b->tseqbase = oid_nil;
1288 399997 : if (b->tsorted && !is_oid_nil(((oid *) b->theap->base)[b->batCount - 1]) && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
1289 56 : b->tsorted = false;
1290 56 : if (b->tnosorted == 0)
1291 56 : b->tnosorted = b->batCount;
1292 : }
1293 399997 : if (b->trevsorted && !is_oid_nil(((oid *) values)[0]) && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
1294 4514 : b->trevsorted = false;
1295 4514 : if (b->tnorevsorted == 0)
1296 4514 : b->tnorevsorted = b->batCount;
1297 : }
1298 399997 : if (b->tkey) {
1299 396770 : if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
1300 17 : b->tkey = false;
1301 17 : if (b->tnokey[1] == 0) {
1302 17 : b->tnokey[0] = b->batCount - 1;
1303 17 : b->tnokey[1] = b->batCount;
1304 : }
1305 396753 : } else if (!b->tsorted && !b->trevsorted)
1306 42 : b->tkey = false;
1307 : }
1308 : }
1309 405218 : for (BUN i = 1; i < count; i++) {
1310 0 : if (is_oid_nil(((oid *) values)[i])) {
1311 0 : b->tnil = true;
1312 0 : b->tnonil = false;
1313 0 : b->tsorted = false;
1314 0 : b->trevsorted = false;
1315 0 : b->tkey = false;
1316 0 : b->tseqbase = oid_nil;
1317 0 : break;
1318 : }
1319 0 : if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
1320 0 : b->tkey = false;
1321 0 : if (b->tnokey[1] == 0) {
1322 0 : b->tnokey[0] = b->batCount + i - 1;
1323 0 : b->tnokey[1] = b->batCount + i;
1324 : }
1325 0 : } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
1326 0 : b->tsorted = false;
1327 0 : if (b->tnosorted == 0)
1328 0 : b->tnosorted = b->batCount + i;
1329 0 : if (!b->trevsorted)
1330 0 : b->tkey = false;
1331 : } else {
1332 0 : if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
1333 0 : b->tseqbase = oid_nil;
1334 0 : b->trevsorted = false;
1335 0 : if (b->tnorevsorted == 0)
1336 0 : b->tnorevsorted = b->batCount + i;
1337 0 : if (!b->tsorted)
1338 0 : b->tkey = false;
1339 : }
1340 : }
1341 : }
1342 61479588 : } else if (!ATOMlinear(b->ttype)) {
1343 16213 : b->tnil = b->tnonil = false;
1344 16213 : b->tsorted = b->trevsorted = b->tkey = false;
1345 61463375 : } else if (b->batCount == 0) {
1346 549617 : if (values == NULL) {
1347 0 : b->tsorted = b->trevsorted = true;
1348 0 : b->tkey = count == 1;
1349 0 : b->tnil = true;
1350 0 : b->tnonil = false;
1351 0 : b->tunique_est = 1;
1352 : } else {
1353 549617 : int c;
1354 549617 : b->tnil = b->tnonil = false;
1355 549617 : switch (count) {
1356 549040 : case 1:
1357 549040 : b->tsorted = b->trevsorted = b->tkey = true;
1358 549040 : b->tunique_est = 1;
1359 549040 : break;
1360 138 : case 2:
1361 138 : if (b->tvheap)
1362 67 : c = ATOMcmp(b->ttype,
1363 : ((void **) values)[0],
1364 : ((void **) values)[1]);
1365 : else
1366 71 : c = ATOMcmp(b->ttype,
1367 : values,
1368 : (char *) values + b->twidth);
1369 138 : b->tsorted = c <= 0;
1370 138 : b->tnosorted = !b->tsorted;
1371 138 : b->trevsorted = c >= 0;
1372 138 : b->tnorevsorted = !b->trevsorted;
1373 138 : b->tkey = c != 0;
1374 138 : b->tnokey[0] = 0;
1375 138 : b->tnokey[1] = !b->tkey;
1376 138 : b->tunique_est = (double) (1 + b->tkey);
1377 138 : break;
1378 439 : default:
1379 439 : b->tsorted = b->trevsorted = b->tkey = false;
1380 439 : break;
1381 : }
1382 : }
1383 60913758 : } else if (b->batCount == 1 && count == 1) {
1384 407015 : bi = bat_iterator_nolock(b);
1385 407015 : t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1386 407015 : if (values != NULL) {
1387 403805 : if (b->tvheap)
1388 147694 : t = ((void **) values)[0];
1389 : else
1390 : t = values;
1391 : }
1392 407015 : int c = ATOMcmp(b->ttype, BUNtail(bi, 0), t);
1393 405398 : b->tsorted = c <= 0;
1394 405398 : b->tnosorted = !b->tsorted;
1395 405398 : b->trevsorted = c >= 0;
1396 405398 : b->tnorevsorted = !b->trevsorted;
1397 405398 : b->tkey = c != 0;
1398 405398 : b->tnokey[0] = 0;
1399 405398 : b->tnokey[1] = !b->tkey;
1400 405398 : b->tunique_est = (double) (1 + b->tkey);
1401 405398 : b->tnil |= values == NULL;
1402 405398 : b->tnonil = false;
1403 : } else {
1404 60506743 : b->tnil |= values == NULL;
1405 60506743 : b->tnonil = false;
1406 60506743 : b->tsorted = b->trevsorted = b->tkey = false;
1407 : }
1408 61883338 : BATsetcount(b, p);
1409 61884553 : if (nunique != 0)
1410 483759 : b->tunique_est = (double) nunique;
1411 61884553 : MT_lock_unset(&b->theaplock);
1412 61906014 : MT_rwlock_wrunlock(&b->thashlock);
1413 :
1414 61903675 : IMPSdestroy(b); /* no support for inserts in imprints yet */
1415 61858727 : OIDXdestroy(b);
1416 61847720 : STRMPdestroy(b); /* TODO: use STRMPappendBitstring */
1417 61840435 : RTREEdestroy(b);
1418 61840435 : return GDK_SUCCEED;
1419 : }
1420 :
1421 : /* Append a single value to the bat. */
1422 : gdk_return
1423 47335775 : BUNappend(BAT *b, const void *t, bool force)
1424 : {
1425 47335775 : return BUNappendmulti(b, b->ttype && b->tvheap ? (const void *) &t : (const void *) t, 1, force);
1426 : }
1427 :
1428 : gdk_return
1429 4 : BUNdelete(BAT *b, oid o)
1430 : {
1431 4 : BUN p;
1432 4 : BATiter bi = bat_iterator_nolock(b);
1433 4 : const void *val;
1434 4 : bool locked = false;
1435 4 : BUN nunique;
1436 :
1437 4 : assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
1438 4 : if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
1439 : /* value already not there */
1440 : return GDK_SUCCEED;
1441 : }
1442 4 : assert(BATcount(b) > 0); /* follows from "if" above */
1443 4 : p = o - b->hseqbase;
1444 4 : if (p < b->batInserted) {
1445 0 : GDKerror("cannot delete committed value\n");
1446 0 : return GDK_FAIL;
1447 : }
1448 4 : TRC_DEBUG(ALGO, ALGOBATFMT " deleting oid " OIDFMT "\n", ALGOBATPAR(b), o);
1449 : /* load hash so that we can maintain it */
1450 4 : (void) BATcheckhash(b);
1451 :
1452 4 : val = BUNtail(bi, p);
1453 : /* writing the values should be locked, reading could be done
1454 : * unlocked (since we're the only thread that should be changing
1455 : * anything) */
1456 4 : MT_lock_set(&b->theaplock);
1457 4 : if (b->tmaxpos == p)
1458 1 : b->tmaxpos = BUN_NONE;
1459 4 : if (b->tminpos == p)
1460 0 : b->tminpos = BUN_NONE;
1461 4 : MT_lock_unset(&b->theaplock);
1462 4 : nunique = HASHdelete(&bi, p, val);
1463 4 : ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
1464 4 : if (p != BATcount(b) - 1 &&
1465 2 : (b->ttype != TYPE_void || BATtdense(b))) {
1466 : /* replace to-be-delete BUN with last BUN; materialize
1467 : * void column before doing so */
1468 2 : if (b->ttype == TYPE_void &&
1469 0 : BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1470 : return GDK_FAIL;
1471 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1472 0 : msk mval = mskGetVal(b, BATcount(b) - 1);
1473 0 : assert(b->thash == NULL);
1474 0 : mskSetVal(b, p, mval);
1475 : /* don't leave garbage */
1476 0 : mskClr(b, BATcount(b) - 1);
1477 : } else {
1478 2 : val = Tloc(b, BATcount(b) - 1);
1479 2 : nunique = HASHdelete(&bi, BATcount(b) - 1, val);
1480 2 : memcpy(Tloc(b, p), val, b->twidth);
1481 2 : nunique = HASHinsert(&bi, p, val);
1482 2 : MT_lock_set(&b->theaplock);
1483 2 : locked = true;
1484 2 : if (b->tminpos == BATcount(b) - 1)
1485 0 : b->tminpos = p;
1486 2 : if (b->tmaxpos == BATcount(b) - 1)
1487 1 : b->tmaxpos = p;
1488 : }
1489 : /* no longer sorted */
1490 2 : if (!locked) {
1491 0 : MT_lock_set(&b->theaplock);
1492 0 : locked = true;
1493 : }
1494 2 : b->tsorted = b->trevsorted = false;
1495 2 : b->theap->dirty = true;
1496 : }
1497 4 : if (!locked)
1498 2 : MT_lock_set(&b->theaplock);
1499 4 : if (b->tnosorted >= p)
1500 0 : b->tnosorted = 0;
1501 4 : if (b->tnorevsorted >= p)
1502 1 : b->tnorevsorted = 0;
1503 4 : b->batCount--;
1504 4 : if (nunique != 0)
1505 0 : b->tunique_est = (double) nunique;
1506 4 : else if (BATcount(b) < gdk_unique_estimate_keep_fraction)
1507 4 : b->tunique_est = 0;
1508 4 : if (b->batCount <= 1) {
1509 : /* some trivial properties */
1510 0 : b->tkey = true;
1511 0 : b->tsorted = b->trevsorted = true;
1512 0 : b->tnosorted = b->tnorevsorted = 0;
1513 0 : if (b->batCount == 0) {
1514 0 : b->tnil = false;
1515 0 : b->tnonil = true;
1516 : }
1517 : }
1518 4 : MT_lock_unset(&b->theaplock);
1519 4 : IMPSdestroy(b);
1520 4 : OIDXdestroy(b);
1521 4 : return GDK_SUCCEED;
1522 : }
1523 :
1524 : /* @- BUN replace
1525 : * The last operation in this context is BUN replace. It assumes that
1526 : * the header denotes a key. The old value association is destroyed
1527 : * (if it exists in the first place) and the new value takes its
1528 : * place.
1529 : *
1530 : * In order to make updates on void columns workable; replaces on them
1531 : * are always done in-place. Performing them without bun-movements
1532 : * greatly simplifies the problem. The 'downside' is that when
1533 : * transaction management has to be performed, replaced values should
1534 : * be saved explicitly.
1535 : */
1536 : static gdk_return
1537 1084852 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
1538 : {
1539 1084852 : BUN prv, nxt;
1540 1084852 : const void *val;
1541 1084852 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1542 1084852 : const void *atomnil = ATOMnilptr(b->ttype);
1543 :
1544 1084852 : MT_lock_set(&b->theaplock);
1545 1092497 : BUN last = BATcount(b) - 1;
1546 1092497 : BATiter bi = bat_iterator_nolock(b);
1547 : /* zap alignment info */
1548 1092497 : if (!force && (b->batRestricted != BAT_WRITE ||
1549 557568 : ((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1))) {
1550 0 : MT_lock_unset(&b->theaplock);
1551 0 : GDKerror("access denied to %s, aborting.\n",
1552 : BATgetId(b));
1553 0 : assert(0);
1554 : return GDK_FAIL;
1555 : }
1556 1092497 : TRC_DEBUG(ALGO, ALGOBATFMT " replacing " BUNFMT " values\n", ALGOBATPAR(b), count);
1557 1083321 : if (b->ttype == TYPE_void) {
1558 0 : PROPdestroy(b);
1559 0 : b->tminpos = BUN_NONE;
1560 0 : b->tmaxpos = BUN_NONE;
1561 0 : b->tunique_est = 0.0;
1562 1083321 : } else if (count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1563 396751 : b->tunique_est = 0;
1564 : }
1565 1083321 : const ValRecord *prop;
1566 1083321 : ValRecord minprop, maxprop;
1567 1083321 : const void *minbound = NULL, *maxbound = NULL;
1568 1083321 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1569 0 : VALcopy(&minprop, prop) != NULL)
1570 0 : minbound = VALptr(&minprop);
1571 1082617 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1572 0 : VALcopy(&maxprop, prop) != NULL)
1573 0 : maxbound = VALptr(&maxprop);
1574 1083779 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1575 1092644 : MT_lock_unset(&b->theaplock);
1576 : /* load hash so that we can maintain it */
1577 1099672 : (void) BATcheckhash(b);
1578 1100013 : MT_rwlock_wrlock(&b->thashlock);
1579 2201287 : for (BUN i = 0; i < count; i++) {
1580 1100964 : BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
1581 1101011 : const void *t = b->ttype && b->tvheap ?
1582 1249657 : ((const void **) values)[i] :
1583 952271 : (const void *) ((const char *) values + (i << b->tshift));
1584 1100964 : bool isnil = atomnil && atomcmp(t, atomnil) == 0;
1585 1099375 : if (notnull && isnil) {
1586 0 : assert(0);
1587 : GDKerror("NULL value not within bounds\n");
1588 : MT_rwlock_wrunlock(&b->thashlock);
1589 : goto bailout;
1590 1099375 : } else if (!isnil &&
1591 0 : ((minbound &&
1592 1053365 : atomcmp(t, minbound) < 0) ||
1593 0 : (maxbound &&
1594 0 : atomcmp(t, maxbound) >= 0))) {
1595 0 : assert(0);
1596 : GDKerror("value not within bounds\n");
1597 : MT_rwlock_wrunlock(&b->thashlock);
1598 : goto bailout;
1599 : }
1600 :
1601 : /* retrieve old value, but if this comes from the
1602 : * logger, we need to deal with offsets that point
1603 : * outside of the valid vheap */
1604 1099050 : if (b->ttype == TYPE_void) {
1605 0 : val = BUNtpos(bi, p);
1606 1099050 : } else if (bi.type == TYPE_msk) {
1607 4223 : val = BUNtmsk(bi, p);
1608 1094827 : } else if (b->tvheap) {
1609 148177 : size_t off = BUNtvaroff(bi, p);
1610 148490 : if (off < bi.vhfree)
1611 148635 : val = bi.vh->base + off;
1612 : else
1613 : val = NULL; /* bad offset */
1614 : } else {
1615 946650 : val = BUNtloc(bi, p);
1616 : }
1617 :
1618 1099508 : if (val) {
1619 1099508 : if (atomcmp(val, t) == 0)
1620 253558 : continue; /* nothing to do */
1621 846640 : if (!isnil &&
1622 6745 : b->tnil &&
1623 6744 : atomcmp(val, atomnil) == 0) {
1624 : /* if old value is nil and new value
1625 : * isn't, we're not sure anymore about
1626 : * the nil property, so we must clear
1627 : * it */
1628 6741 : MT_lock_set(&b->theaplock);
1629 6738 : b->tnil = false;
1630 6738 : MT_lock_unset(&b->theaplock);
1631 : }
1632 846647 : if (b->ttype != TYPE_void) {
1633 846471 : if (bi.maxpos != BUN_NONE) {
1634 514279 : if (!isnil && atomcmp(BUNtail(bi, bi.maxpos), t) < 0) {
1635 : /* new value is larger
1636 : * than previous
1637 : * largest */
1638 52378 : bi.maxpos = p;
1639 461900 : } else if (bi.maxpos == p && atomcmp(BUNtail(bi, bi.maxpos), t) != 0) {
1640 : /* old value is equal to
1641 : * largest and new value
1642 : * is smaller or nil (see
1643 : * above), so we don't
1644 : * know anymore which is
1645 : * the largest */
1646 436 : bi.maxpos = BUN_NONE;
1647 : }
1648 : }
1649 846470 : if (bi.minpos != BUN_NONE) {
1650 352365 : if (!isnil && atomcmp(BUNtail(bi, bi.minpos), t) > 0) {
1651 : /* new value is smaller
1652 : * than previous
1653 : * smallest */
1654 403 : bi.minpos = p;
1655 351965 : } else if (bi.minpos == p && atomcmp(BUNtail(bi, bi.minpos), t) != 0) {
1656 : /* old value is equal to
1657 : * smallest and new value
1658 : * is larger or nil (see
1659 : * above), so we don't
1660 : * know anymore which is
1661 : * the largest */
1662 597 : bi.minpos = BUN_NONE;
1663 : }
1664 : }
1665 : }
1666 846649 : HASHdelete_locked(&bi, p, val); /* first delete old value from hash */
1667 : } else {
1668 : /* out of range old value, so the properties and
1669 : * hash cannot be trusted */
1670 0 : PROPdestroy(b);
1671 0 : Hash *hs = b->thash;
1672 0 : if (hs) {
1673 0 : b->thash = NULL;
1674 0 : doHASHdestroy(b, hs);
1675 : }
1676 0 : MT_lock_set(&b->theaplock);
1677 0 : bi.minpos = BUN_NONE;
1678 0 : bi.maxpos = BUN_NONE;
1679 0 : b->tunique_est = 0.0;
1680 0 : MT_lock_unset(&b->theaplock);
1681 : }
1682 844870 : OIDXdestroy(b);
1683 844498 : IMPSdestroy(b);
1684 844331 : STRMPdestroy(b);
1685 844280 : RTREEdestroy(b);
1686 :
1687 846302 : if (b->tvheap && b->ttype) {
1688 73704 : var_t _d;
1689 73704 : ptr _ptr;
1690 73704 : _ptr = BUNtloc(bi, p);
1691 73704 : switch (b->twidth) {
1692 8034 : case 1:
1693 8034 : _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
1694 8034 : break;
1695 59735 : case 2:
1696 59735 : _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
1697 59735 : break;
1698 5935 : case 4:
1699 5935 : _d = (var_t) * (uint32_t *) _ptr;
1700 5935 : break;
1701 : #if SIZEOF_VAR_T == 8
1702 0 : case 8:
1703 0 : _d = (var_t) * (uint64_t *) _ptr;
1704 0 : break;
1705 : #endif
1706 : default:
1707 0 : MT_UNREACHABLE();
1708 : }
1709 73704 : MT_lock_set(&b->theaplock);
1710 74235 : if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
1711 0 : MT_lock_unset(&b->theaplock);
1712 0 : MT_rwlock_wrunlock(&b->thashlock);
1713 0 : goto bailout;
1714 : }
1715 74346 : MT_lock_unset(&b->theaplock);
1716 74275 : if (b->twidth < SIZEOF_VAR_T &&
1717 74469 : (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
1718 : /* doesn't fit in current heap, upgrade it */
1719 69 : if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
1720 0 : MT_rwlock_wrunlock(&b->thashlock);
1721 0 : goto bailout;
1722 : }
1723 : }
1724 : /* reinitialize iterator after possible heap upgrade */
1725 : {
1726 : /* save and restore minpos/maxpos */
1727 74275 : BUN minpos = bi.minpos;
1728 74275 : BUN maxpos = bi.maxpos;
1729 74275 : bi = bat_iterator_nolock(b);
1730 74275 : bi.minpos = minpos;
1731 74275 : bi.maxpos = maxpos;
1732 : }
1733 74275 : _ptr = BUNtloc(bi, p);
1734 74275 : switch (b->twidth) {
1735 7973 : case 1:
1736 7973 : * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
1737 7973 : break;
1738 60360 : case 2:
1739 60360 : * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
1740 60360 : break;
1741 5942 : case 4:
1742 5942 : * (uint32_t *) _ptr = (uint32_t) _d;
1743 5942 : break;
1744 : #if SIZEOF_VAR_T == 8
1745 0 : case 8:
1746 0 : * (uint64_t *) _ptr = (uint64_t) _d;
1747 0 : break;
1748 : #endif
1749 : default:
1750 74275 : MT_UNREACHABLE();
1751 : }
1752 772598 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1753 4196 : mskSetVal(b, p, * (msk *) t);
1754 : } else {
1755 768402 : assert(BATatoms[b->ttype].atomPut == NULL);
1756 768402 : switch (ATOMsize(b->ttype)) {
1757 : case 0: /* void */
1758 : break;
1759 15819 : case 1:
1760 15819 : ((bte *) b->theap->base)[p] = * (bte *) t;
1761 15819 : break;
1762 7860 : case 2:
1763 7860 : ((sht *) b->theap->base)[p] = * (sht *) t;
1764 7860 : break;
1765 186505 : case 4:
1766 186505 : ((int *) b->theap->base)[p] = * (int *) t;
1767 186505 : break;
1768 558218 : case 8:
1769 558218 : ((lng *) b->theap->base)[p] = * (lng *) t;
1770 558218 : break;
1771 0 : case 16:
1772 : #ifdef HAVE_HGE
1773 0 : ((hge *) b->theap->base)[p] = * (hge *) t;
1774 : #else
1775 : ((uuid *) b->theap->base)[p] = * (uuid *) t;
1776 : #endif
1777 0 : break;
1778 0 : default:
1779 0 : memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
1780 0 : break;
1781 : }
1782 : }
1783 :
1784 846873 : HASHinsert_locked(&bi, p, t); /* insert new value into hash */
1785 :
1786 844870 : prv = p > 0 ? p - 1 : BUN_NONE;
1787 844870 : nxt = p < last ? p + 1 : BUN_NONE;
1788 :
1789 844870 : MT_lock_set(&b->theaplock);
1790 846234 : if (b->tsorted) {
1791 2938 : if (prv != BUN_NONE &&
1792 1170 : atomcmp(t, BUNtail(bi, prv)) < 0) {
1793 37 : b->tsorted = false;
1794 37 : b->tnosorted = p;
1795 2424 : } else if (nxt != BUN_NONE &&
1796 693 : atomcmp(t, BUNtail(bi, nxt)) > 0) {
1797 623 : b->tsorted = false;
1798 623 : b->tnosorted = nxt;
1799 1108 : } else if (b->ttype != TYPE_void && BATtdense(b)) {
1800 0 : if (prv != BUN_NONE &&
1801 0 : 1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
1802 0 : b->tseqbase = oid_nil;
1803 0 : } else if (nxt != BUN_NONE &&
1804 0 : * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
1805 0 : b->tseqbase = oid_nil;
1806 0 : } else if (prv == BUN_NONE &&
1807 0 : nxt == BUN_NONE) {
1808 0 : b->tseqbase = * (oid *) t;
1809 : }
1810 : }
1811 844466 : } else if (b->tnosorted >= p)
1812 3065 : b->tnosorted = 0;
1813 846234 : if (b->trevsorted) {
1814 1551 : if (prv != BUN_NONE &&
1815 503 : atomcmp(t, BUNtail(bi, prv)) > 0) {
1816 52 : b->trevsorted = false;
1817 52 : b->tnorevsorted = p;
1818 1225 : } else if (nxt != BUN_NONE &&
1819 229 : atomcmp(t, BUNtail(bi, nxt)) < 0) {
1820 40 : b->trevsorted = false;
1821 40 : b->tnorevsorted = nxt;
1822 : }
1823 845186 : } else if (b->tnorevsorted >= p)
1824 1610 : b->tnorevsorted = 0;
1825 846234 : if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
1826 903 : BATkey(b, false);
1827 845331 : } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
1828 900 : b->tnokey[0] = b->tnokey[1] = 0;
1829 846234 : if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
1830 78290 : b->tnonil = t && atomcmp(t, atomnil) != 0;
1831 1099830 : MT_lock_unset(&b->theaplock);
1832 : }
1833 1099060 : BUN nunique = b->thash ? b->thash->nunique : 0;
1834 1099060 : MT_rwlock_wrunlock(&b->thashlock);
1835 1100298 : MT_lock_set(&b->theaplock);
1836 1096915 : if (nunique != 0)
1837 38482 : b->tunique_est = (double) nunique;
1838 1096915 : b->tminpos = bi.minpos;
1839 1096915 : b->tmaxpos = bi.maxpos;
1840 1096915 : b->theap->dirty = true;
1841 1096915 : if (b->tvheap)
1842 147340 : b->tvheap->dirty = true;
1843 1096915 : MT_lock_unset(&b->theaplock);
1844 :
1845 1097458 : return GDK_SUCCEED;
1846 :
1847 0 : bailout:
1848 0 : if (minbound)
1849 0 : VALclear(&minprop);
1850 0 : if (maxbound)
1851 0 : VALclear(&maxprop);
1852 : return GDK_FAIL;
1853 : }
1854 :
1855 : /* Replace multiple values given by their positions with the given values. */
1856 : gdk_return
1857 565210 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
1858 : {
1859 565210 : BATcheck(b, GDK_FAIL);
1860 :
1861 565210 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1862 : return GDK_FAIL;
1863 :
1864 565210 : return BUNinplacemulti(b, positions, values, count, force, false);
1865 : }
1866 :
1867 : /* Replace multiple values starting from a given position with the given
1868 : * values. */
1869 : gdk_return
1870 527010 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
1871 : {
1872 527010 : BATcheck(b, GDK_FAIL);
1873 :
1874 527010 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1875 : return GDK_FAIL;
1876 :
1877 527010 : return BUNinplacemulti(b, &position, values, count, force, true);
1878 : }
1879 :
1880 : gdk_return
1881 565210 : BUNreplace(BAT *b, oid id, const void *t, bool force)
1882 : {
1883 565210 : return BUNreplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &t : t, 1, force);
1884 : }
1885 :
1886 : /* very much like BUNreplace, but this doesn't make any changes if the
1887 : * tail column is void */
1888 : gdk_return
1889 3677 : void_inplace(BAT *b, oid id, const void *val, bool force)
1890 : {
1891 3677 : assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
1892 3677 : if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
1893 : GDKerror("id out of range\n");
1894 : return GDK_FAIL;
1895 : }
1896 3677 : if (b->ttype == TYPE_void)
1897 : return GDK_SUCCEED;
1898 3677 : return BUNinplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &val : (const void *) val, 1, force, false);
1899 : }
1900 :
1901 : /*
1902 : * @- BUN Lookup
1903 : * Location of a BUN using a value should use the available indexes to
1904 : * speed up access. If indexes are lacking then a hash index is
1905 : * constructed under the assumption that 1) multiple access to the BAT
1906 : * can be expected and 2) building the hash is only slightly more
1907 : * expensive than the full linear scan. BUN_NONE is returned if no
1908 : * such element could be found. In those cases where the type is
1909 : * known and a hash index is available, one should use the inline
1910 : * functions to speed-up processing.
1911 : */
1912 : static BUN
1913 0 : slowfnd(BAT *b, const void *v)
1914 : {
1915 0 : BATiter bi = bat_iterator(b);
1916 0 : BUN p, q;
1917 0 : int (*cmp)(const void *, const void *) = ATOMcompare(bi.type);
1918 :
1919 0 : BATloop(b, p, q) {
1920 0 : if ((*cmp)(v, BUNtail(bi, p)) == 0) {
1921 0 : bat_iterator_end(&bi);
1922 0 : return p;
1923 : }
1924 : }
1925 0 : bat_iterator_end(&bi);
1926 0 : return BUN_NONE;
1927 : }
1928 :
1929 : static BUN
1930 0 : mskfnd(BAT *b, msk v)
1931 : {
1932 0 : BUN p, q;
1933 :
1934 0 : if (v) {
1935 : /* find a 1 value */
1936 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1937 0 : if (((uint32_t *) b->theap->base)[p] != 0) {
1938 : /* there's at least one 1 bit */
1939 0 : return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
1940 : }
1941 : }
1942 : } else {
1943 : /* find a 0 value */
1944 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1945 0 : if (((uint32_t *) b->theap->base)[p] != ~0U) {
1946 : /* there's at least one 0 bit */
1947 0 : return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
1948 : }
1949 : }
1950 : }
1951 : return BUN_NONE;
1952 : }
1953 :
1954 : BUN
1955 1366155 : BUNfnd(BAT *b, const void *v)
1956 : {
1957 1366155 : BUN r = BUN_NONE;
1958 1366155 : BATiter bi;
1959 :
1960 1366155 : BATcheck(b, BUN_NONE);
1961 1366155 : if (!v || BATcount(b) == 0)
1962 : return r;
1963 951216 : if (complex_cand(b)) {
1964 0 : struct canditer ci;
1965 0 : canditer_init(&ci, NULL, b);
1966 0 : return canditer_search(&ci, * (const oid *) v, false);
1967 : }
1968 951216 : if (BATtvoid(b))
1969 429208 : return BUNfndVOID(b, v);
1970 522008 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1971 0 : return mskfnd(b, *(msk*)v);
1972 : }
1973 522008 : if (!BATcheckhash(b)) {
1974 53852 : if (BATordered(b) || BATordered_rev(b))
1975 53506 : return SORTfnd(b, v);
1976 : }
1977 468489 : if (BAThash(b) == GDK_SUCCEED) {
1978 468485 : bi = bat_iterator(b); /* outside of hashlock */
1979 468503 : MT_rwlock_rdlock(&b->thashlock);
1980 468503 : if (b->thash == NULL) {
1981 0 : MT_rwlock_rdunlock(&b->thashlock);
1982 0 : bat_iterator_end(&bi);
1983 0 : goto hashfnd_failed;
1984 : }
1985 936816 : switch (ATOMbasetype(bi.type)) {
1986 7 : case TYPE_bte:
1987 7 : HASHloop_bte(bi, b->thash, r, v)
1988 : break;
1989 : break;
1990 0 : case TYPE_sht:
1991 0 : HASHloop_sht(bi, b->thash, r, v)
1992 : break;
1993 : break;
1994 59 : case TYPE_int:
1995 59 : HASHloop_int(bi, b->thash, r, v)
1996 : break;
1997 : break;
1998 0 : case TYPE_flt:
1999 0 : HASHloop_flt(bi, b->thash, r, v)
2000 : break;
2001 : break;
2002 0 : case TYPE_dbl:
2003 0 : HASHloop_dbl(bi, b->thash, r, v)
2004 : break;
2005 : break;
2006 171 : case TYPE_lng:
2007 171 : HASHloop_lng(bi, b->thash, r, v)
2008 : break;
2009 : break;
2010 : #ifdef HAVE_HGE
2011 0 : case TYPE_hge:
2012 0 : HASHloop_hge(bi, b->thash, r, v)
2013 : break;
2014 : break;
2015 : #endif
2016 0 : case TYPE_uuid:
2017 0 : HASHloop_uuid(bi, b->thash, r, v)
2018 : break;
2019 : break;
2020 468265 : case TYPE_str:
2021 649515 : HASHloop_str(bi, b->thash, r, v)
2022 : break;
2023 : break;
2024 1 : default:
2025 3 : HASHloop(bi, b->thash, r, v)
2026 : break;
2027 : break;
2028 : }
2029 468500 : MT_rwlock_rdunlock(&b->thashlock);
2030 468500 : bat_iterator_end(&bi);
2031 468500 : return r;
2032 : }
2033 0 : hashfnd_failed:
2034 : /* can't build hash table, search the slow way */
2035 0 : GDKclrerr();
2036 0 : return slowfnd(b, v);
2037 : }
2038 :
2039 : /*
2040 : * @+ BAT Property Management
2041 : *
2042 : * The function BATcount returns the number of active elements in a
2043 : * BAT. Counting is type independent. It can be implemented quickly,
2044 : * because the system ensures a dense BUN list.
2045 : */
2046 : void
2047 2868564 : BATsetcapacity(BAT *b, BUN cnt)
2048 : {
2049 2868564 : b->batCapacity = cnt;
2050 2868564 : assert(b->batCount <= cnt);
2051 2868564 : }
2052 :
2053 : /* Set the batCount value for the bat and also set some dependent
2054 : * properties. This function should be called only when it is save from
2055 : * concurrent use (e.g. when theaplock is being held). */
2056 : void
2057 84846500 : BATsetcount(BAT *b, BUN cnt)
2058 : {
2059 : /* head column is always VOID, and some head properties never change */
2060 84846500 : assert(!is_oid_nil(b->hseqbase));
2061 84846500 : assert(cnt <= BUN_MAX);
2062 :
2063 84846500 : b->batCount = cnt;
2064 84846500 : if (b->theap->parentid == b->batCacheid) {
2065 81902902 : b->theap->dirty |= b->ttype != TYPE_void && cnt > 0;
2066 81902902 : b->theap->free = tailsize(b, cnt);
2067 : }
2068 84846500 : if (b->ttype == TYPE_void)
2069 10575377 : b->batCapacity = cnt;
2070 84846500 : if (cnt <= 1) {
2071 15527521 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
2072 15527521 : b->tnosorted = b->tnorevsorted = 0;
2073 : }
2074 : /* if the BAT was made smaller, we need to zap some values */
2075 84846500 : if (b->tnosorted >= BATcount(b))
2076 13846888 : b->tnosorted = 0;
2077 84846500 : if (b->tnorevsorted >= BATcount(b))
2078 13636173 : b->tnorevsorted = 0;
2079 84846500 : if (b->tnokey[0] >= BATcount(b) || b->tnokey[1] >= BATcount(b)) {
2080 13652337 : b->tnokey[0] = 0;
2081 13652337 : b->tnokey[1] = 0;
2082 : }
2083 84846500 : if (b->ttype == TYPE_void) {
2084 10595257 : b->tsorted = true;
2085 10595257 : if (is_oid_nil(b->tseqbase)) {
2086 5462001 : b->tkey = cnt <= 1;
2087 5462001 : b->trevsorted = true;
2088 5462001 : b->tnil = true;
2089 5462001 : b->tnonil = false;
2090 : } else {
2091 5133256 : b->tkey = true;
2092 5133256 : b->trevsorted = cnt <= 1;
2093 5133256 : b->tnil = false;
2094 5133256 : b->tnonil = true;
2095 : }
2096 : }
2097 84846500 : assert(b->batCapacity >= cnt);
2098 84846500 : }
2099 :
2100 : /*
2101 : * The key and name properties can be changed at any time. Keyed
2102 : * dimensions are automatically supported by an auxiliary hash-based
2103 : * access structure to speed up searching. Turning off the key
2104 : * integrity property does not cause the index to disappear. It can
2105 : * still be used to speed-up retrieval. The routine BATkey sets the
2106 : * key property of the association head.
2107 : */
2108 : gdk_return
2109 51191 : BATkey(BAT *b, bool flag)
2110 : {
2111 51191 : BATcheck(b, GDK_FAIL);
2112 51191 : if (b->ttype == TYPE_void) {
2113 1096 : if (BATtdense(b) && !flag) {
2114 0 : GDKerror("dense column must be unique.\n");
2115 0 : return GDK_FAIL;
2116 : }
2117 1096 : if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
2118 0 : GDKerror("void column cannot be unique.\n");
2119 0 : return GDK_FAIL;
2120 : }
2121 : }
2122 51191 : b->tkey = flag;
2123 51191 : if (!flag) {
2124 36155 : b->tseqbase = oid_nil;
2125 : } else
2126 15036 : b->tnokey[0] = b->tnokey[1] = 0;
2127 51191 : gdk_return rc = GDK_SUCCEED;
2128 51191 : if (flag && VIEWtparent(b)) {
2129 : /* if a view is key, then so is the parent if the two
2130 : * are aligned */
2131 2 : BAT *bp = BATdescriptor(VIEWtparent(b));
2132 2 : if (bp != NULL) {
2133 2 : MT_lock_set(&bp->theaplock);
2134 4 : if (BATcount(b) == BATcount(bp) &&
2135 2 : ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
2136 2 : !BATtkey(bp) &&
2137 0 : ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
2138 : BATcount(b) == 0))
2139 0 : rc = BATkey(bp, true);
2140 2 : MT_lock_unset(&bp->theaplock);
2141 2 : BBPunfix(bp->batCacheid);
2142 : }
2143 : }
2144 : return rc;
2145 : }
2146 :
2147 : void
2148 2756570 : BAThseqbase(BAT *b, oid o)
2149 : {
2150 2756570 : if (b != NULL) {
2151 2756570 : assert(o <= GDK_oid_max); /* i.e., not oid_nil */
2152 2756570 : assert(o + BATcount(b) <= GDK_oid_max);
2153 2756570 : b->hseqbase = o;
2154 : }
2155 2756570 : }
2156 :
2157 : void
2158 6351749 : BATtseqbase(BAT *b, oid o)
2159 : {
2160 6351749 : assert(o <= oid_nil);
2161 6351749 : if (b == NULL)
2162 : return;
2163 6351749 : assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
2164 6351749 : if (ATOMtype(b->ttype) == TYPE_oid) {
2165 5389667 : b->tseqbase = o;
2166 :
2167 : /* adapt keyness */
2168 5389667 : if (BATtvoid(b)) {
2169 5293990 : b->tsorted = true;
2170 5293990 : if (is_oid_nil(o)) {
2171 95 : b->tkey = b->batCount <= 1;
2172 95 : b->tnonil = b->batCount == 0;
2173 95 : b->tnil = b->batCount > 0;
2174 95 : b->trevsorted = true;
2175 95 : b->tnosorted = b->tnorevsorted = 0;
2176 95 : if (!b->tkey) {
2177 0 : b->tnokey[0] = 0;
2178 0 : b->tnokey[1] = 1;
2179 : } else {
2180 95 : b->tnokey[0] = b->tnokey[1] = 0;
2181 : }
2182 : } else {
2183 5293895 : if (!b->tkey) {
2184 16107 : b->tkey = true;
2185 16107 : b->tnokey[0] = b->tnokey[1] = 0;
2186 : }
2187 5293895 : b->tnonil = true;
2188 5293895 : b->tnil = false;
2189 5293895 : b->trevsorted = b->batCount <= 1;
2190 5293895 : if (!b->trevsorted)
2191 16401 : b->tnorevsorted = 1;
2192 : }
2193 : }
2194 : } else {
2195 962082 : assert(o == oid_nil);
2196 962082 : b->tseqbase = oid_nil;
2197 : }
2198 : }
2199 :
2200 : /*
2201 : * @- Change the BAT access permissions (read, append, write)
2202 : * Regrettably, BAT access-permissions, persistent status and memory
2203 : * map modes, interact in ways that makes one's brain sizzle. This
2204 : * makes BATsetaccess and TMcommit (where a change in BAT persistence
2205 : * mode is made permanent) points in which the memory map status of
2206 : * bats needs to be carefully re-assessed and ensured.
2207 : *
2208 : * Another complication is the fact that during commit, concurrent
2209 : * users may access the heaps, such that the simple solution
2210 : * unmap;re-map is out of the question.
2211 : * Even worse, it is not possible to even rename an open mmap file in
2212 : * Windows. For this purpose, we dropped the old .priv scheme, which
2213 : * relied on file moves. Now, the file that is opened with mmap is
2214 : * always the X file, in case of newstorage=STORE_PRIV, we save in a
2215 : * new file X.new
2216 : *
2217 : * we must consider the following dimensions:
2218 : *
2219 : * persistence:
2220 : * not simply the current persistence mode but whether the bat *was*
2221 : * present at the last commit point (BBP status & BBPEXISTING).
2222 : * The crucial issue is namely whether we must guarantee recovery
2223 : * to a previous sane state.
2224 : *
2225 : * access:
2226 : * whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
2227 : * is usually the same as BAT_READ (as our concern are only data pages
2228 : * that already existed at the last commit).
2229 : *
2230 : * storage:
2231 : * the current way the heap file X is memory-mapped;
2232 : * STORE_MMAP uses direct mapping (so dirty pages may be flushed
2233 : * at any time to disk), STORE_PRIV uses copy-on-write.
2234 : *
2235 : * newstorage:
2236 : * the current save-regime. STORE_MMAP calls msync() on the heap X,
2237 : * whereas STORE_PRIV writes the *entire* heap in a file: X.new
2238 : * If a BAT is loaded from disk, the field newstorage is used
2239 : * to set storage as well (so before change-access and commit-
2240 : * persistence mayhem, we always have newstorage=storage).
2241 : *
2242 : * change-access:
2243 : * what happens if the bat-access mode is changed from
2244 : * BAT_READ into BAT_WRITE (or vice versa).
2245 : *
2246 : * commit-persistence:
2247 : * what happens during commit if the bat-persistence mode was
2248 : * changed (from TRANSIENT into PERSISTENT, or vice versa).
2249 : *
2250 : * this is the scheme:
2251 : *
2252 : * persistence access newstorage storage change-access commit-persistence
2253 : * =========== ========= ========== ========== ============= ==================
2254 : * 0 transient BAT_READ STORE_MMAP STORE_MMAP =>2 =>4
2255 : * 1 transient BAT_READ STORE_PRIV STORE_PRIV =>3 =>5
2256 : * 2 transient BAT_WRITE STORE_MMAP STORE_MMAP =>0 =>6+
2257 : * 3 transient BAT_WRITE STORE_PRIV STORE_PRIV =>1 =>7
2258 : * 4 persistent BAT_READ STORE_MMAP STORE_MMAP =>6+ =>0
2259 : * 5 persistent BAT_READ STORE_PRIV STORE_PRIV =>7 =>1
2260 : * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
2261 : * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5 =>3
2262 : *
2263 : * exception states:
2264 : * a transient BAT_READ STORE_PRIV STORE_MMAP =>b =>c
2265 : * b transient BAT_WRITE STORE_PRIV STORE_MMAP =>a =>6
2266 : * c persistent BAT_READ STORE_PRIV STORE_MMAP =>6 =>a
2267 : *
2268 : * (+) indicates that we must ensure that the heap gets saved in its new mode
2269 : *
2270 : * Note that we now allow a heap with save-regime STORE_PRIV that was
2271 : * actually mapped STORE_MMAP. In effect, the potential corruption of
2272 : * the X file is compensated by writing out full X.new files that take
2273 : * precedence. When transitioning out of this state towards one with
2274 : * both storage regime and OS as STORE_MMAP we need to move the X.new
2275 : * files into the backup directory. Then msync the X file and (on
2276 : * success) remove the X.new; see backup_new().
2277 : *
2278 : * Exception states are only reachable if the commit fails and those
2279 : * new persistent bats have already been processed (but never become
2280 : * part of a committed state). In that case a transition 2=>6 may end
2281 : * up 2=>b. Exception states a and c are reachable from b.
2282 : *
2283 : * Errors in HEAPchangeaccess() can be handled atomically inside the
2284 : * routine. The work on changing mmap modes HEAPcommitpersistence()
2285 : * is done during the BBPsync() for all bats that are newly persistent
2286 : * (BBPNEW). After the TMcommit(), it is done for those bats that are
2287 : * no longer persistent after the commit (BBPDELETED), only if it
2288 : * succeeds. Such transient bats cannot be processed before the
2289 : * commit, because the commit may fail and then the more unsafe
2290 : * transient mmap modes would be present on a persistent bat.
2291 : *
2292 : * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
2293 : * gdk_tm.c.
2294 : *
2295 : * Including the exception states, we have 11 of the 16
2296 : * combinations. As for the 5 avoided states, all four
2297 : * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
2298 : * omitted (this would amount to an msync() save regime on a
2299 : * copy-on-write heap -- which does not work). The remaining avoided
2300 : * state is the patently unsafe
2301 : * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
2302 : *
2303 : * Note that after a server restart exception states are gone, as on
2304 : * BAT loads the saved descriptor is inspected again (which will
2305 : * reproduce the state at the last succeeded commit).
2306 : *
2307 : * To avoid exception states, a TMsubcommit protocol would need to be
2308 : * used which is too heavy for BATsetaccess().
2309 : *
2310 : * Note that this code is not about making heaps mmap-ed in the first
2311 : * place. It is just about determining which flavor of mmap should be
2312 : * used. The MAL user is oblivious of such details.
2313 : */
2314 :
2315 : /* rather than deleting X.new, we comply with the commit protocol and
2316 : * move it to backup storage */
2317 : static gdk_return
2318 0 : backup_new(Heap *hp, bool lock)
2319 : {
2320 0 : int batret, bakret, ret = -1;
2321 0 : char *batpath, *bakpath;
2322 0 : struct stat st;
2323 :
2324 0 : char *bak_filename = NULL;
2325 0 : if ((bak_filename = strrchr(hp->filename, DIR_SEP)) != NULL)
2326 0 : bak_filename++;
2327 : else
2328 : bak_filename = hp->filename;
2329 : /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
2330 0 : batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, "new");
2331 0 : bakpath = GDKfilepath(hp->farmid, BAKDIR, bak_filename, "new");
2332 0 : if (batpath != NULL && bakpath != NULL) {
2333 : /* file actions here interact with the global commits */
2334 0 : if (lock)
2335 0 : BBPtmlock();
2336 :
2337 0 : batret = MT_stat(batpath, &st);
2338 0 : bakret = MT_stat(bakpath, &st);
2339 :
2340 0 : if (batret == 0 && bakret) {
2341 : /* no backup yet, so move the existing X.new there out
2342 : * of the way */
2343 0 : if ((ret = MT_rename(batpath, bakpath)) < 0)
2344 0 : GDKsyserror("backup_new: rename %s to %s failed\n",
2345 : batpath, bakpath);
2346 0 : TRC_DEBUG(IO_, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
2347 0 : } else if (batret == 0) {
2348 : /* there is a backup already; just remove the X.new */
2349 0 : if ((ret = MT_remove(batpath)) != 0)
2350 0 : GDKsyserror("backup_new: remove %s failed\n", batpath);
2351 0 : TRC_DEBUG(IO_, "remove(%s) = %d\n", batpath, ret);
2352 : } else {
2353 : ret = 0;
2354 : }
2355 0 : if (lock)
2356 0 : BBPtmunlock();
2357 : }
2358 0 : GDKfree(batpath);
2359 0 : GDKfree(bakpath);
2360 0 : return ret ? GDK_FAIL : GDK_SUCCEED;
2361 : }
2362 :
2363 : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
2364 :
2365 : /* transition heap from readonly to writable */
2366 : static storage_t
2367 5579132 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
2368 : {
2369 5579132 : if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
2370 5579132 : return hp->newstorage; /* 0<=>2,1<=>3,a<=>b */
2371 :
2372 0 : if (dstmode == BAT_WRITE) {
2373 0 : if (hp->storage != STORE_PRIV)
2374 0 : hp->dirty = true; /* exception c does not make it dirty */
2375 0 : return STORE_PRIV; /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
2376 : }
2377 0 : if (hp->storage == STORE_MMAP) { /* 6=>4 */
2378 0 : hp->dirty = true;
2379 0 : return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2380 : }
2381 : return hp->storage; /* 7=>5 */
2382 : }
2383 :
2384 : /* heap changes persistence mode (at commit point) */
2385 : static storage_t
2386 152567 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
2387 : {
2388 152567 : if (existing) { /* existing, ie will become transient */
2389 37575 : if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) { /* 6=>2 */
2390 0 : hp->dirty = true;
2391 0 : return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2392 : }
2393 37575 : return hp->newstorage; /* 4=>0,5=>1,7=>3,c=>a no change */
2394 : }
2395 : /* !existing, ie will become persistent */
2396 114992 : if (hp->newstorage == STORE_MEM)
2397 : return hp->newstorage;
2398 746 : if (hp->newstorage == STORE_MMAP && !writable)
2399 : return STORE_MMAP; /* 0=>4 STORE_MMAP */
2400 :
2401 0 : if (hp->newstorage == STORE_MMAP)
2402 0 : hp->dirty = true; /* 2=>6 */
2403 : return STORE_PRIV; /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
2404 : }
2405 :
2406 :
2407 : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
2408 :
2409 : /* change the heap modes at a commit */
2410 : gdk_return
2411 131127 : BATcheckmodes(BAT *b, bool existing)
2412 : {
2413 131127 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2414 131127 : bool dirty = false, wr;
2415 :
2416 131127 : BATcheck(b, GDK_FAIL);
2417 :
2418 131127 : wr = (b->batRestricted == BAT_WRITE);
2419 131127 : if (b->ttype) {
2420 131127 : m1 = HEAPcommitpersistence(b->theap, wr, existing);
2421 131127 : dirty |= (b->theap->newstorage != m1);
2422 : }
2423 :
2424 131127 : if (b->tvheap) {
2425 21440 : bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
2426 21440 : m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
2427 21440 : dirty |= (b->tvheap->newstorage != m3);
2428 : }
2429 131127 : if (m1 == STORE_INVALID || m3 == STORE_INVALID)
2430 : return GDK_FAIL;
2431 :
2432 131127 : if (dirty) {
2433 0 : b->theap->newstorage = m1;
2434 0 : if (b->tvheap)
2435 0 : b->tvheap->newstorage = m3;
2436 : }
2437 : return GDK_SUCCEED;
2438 : }
2439 :
2440 : BAT *
2441 7445276 : BATsetaccess(BAT *b, restrict_t newmode)
2442 : {
2443 7445276 : restrict_t bakmode;
2444 :
2445 7445276 : BATcheck(b, NULL);
2446 7445276 : if (newmode != BAT_READ &&
2447 19177 : (isVIEW(b) || (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
2448 0 : BAT *bn = COLcopy(b, b->ttype, true, b->batRole);
2449 0 : BBPunfix(b->batCacheid);
2450 0 : if (bn == NULL)
2451 : return NULL;
2452 : b = bn;
2453 : }
2454 7445279 : MT_lock_set(&b->theaplock);
2455 7502992 : bakmode = b->batRestricted;
2456 7502992 : if (bakmode != newmode) {
2457 4958104 : bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
2458 4958104 : bool wr = (newmode == BAT_WRITE);
2459 4958104 : bool rd = (bakmode == BAT_WRITE);
2460 4958104 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2461 4958104 : storage_t b1 = STORE_MEM, b3 = STORE_MEM;
2462 :
2463 4958104 : if (b->theap->parentid == b->batCacheid) {
2464 4946570 : b1 = b->theap->newstorage;
2465 4956523 : m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
2466 : }
2467 4940566 : if (b->tvheap && b->tvheap->parentid == b->batCacheid) {
2468 661463 : bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
2469 661463 : b3 = b->tvheap->newstorage;
2470 1323668 : m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
2471 : }
2472 4941743 : if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
2473 1708201 : MT_lock_unset(&b->theaplock);
2474 1712064 : BBPunfix(b->batCacheid);
2475 1712064 : return NULL;
2476 : }
2477 :
2478 : /* set new access mode and mmap modes */
2479 3233542 : b->batRestricted = newmode;
2480 3233542 : if (b->theap->parentid == b->batCacheid)
2481 3229543 : b->theap->newstorage = m1;
2482 3233542 : if (b->tvheap && b->tvheap->parentid == b->batCacheid)
2483 649256 : b->tvheap->newstorage = m3;
2484 :
2485 3233542 : MT_lock_unset(&b->theaplock);
2486 3244858 : if (existing && !isVIEW(b) && BBPsave(b) != GDK_SUCCEED) {
2487 : /* roll back all changes */
2488 0 : MT_lock_set(&b->theaplock);
2489 0 : b->batRestricted = bakmode;
2490 0 : b->theap->newstorage = b1;
2491 0 : if (b->tvheap)
2492 0 : b->tvheap->newstorage = b3;
2493 0 : MT_lock_unset(&b->theaplock);
2494 0 : BBPunfix(b->batCacheid);
2495 0 : return NULL;
2496 : }
2497 : } else {
2498 2544888 : MT_lock_unset(&b->theaplock);
2499 : }
2500 : return b;
2501 : }
2502 :
2503 : restrict_t
2504 0 : BATgetaccess(BAT *b)
2505 : {
2506 0 : BATcheck(b, BAT_WRITE);
2507 0 : MT_lock_set(&b->theaplock);
2508 0 : restrict_t restricted = b->batRestricted;
2509 0 : MT_lock_unset(&b->theaplock);
2510 0 : return restricted;
2511 : }
2512 :
2513 : /*
2514 : * @- change BAT persistency (persistent,session,transient)
2515 : * In the past, we prevented BATS with certain types from being saved at all:
2516 : * - BATs of BATs, as having recursive bats creates cascading
2517 : * complexities in commits/aborts.
2518 : * - any atom with refcounts, as the BBP has no overview of such
2519 : * user-defined refcounts.
2520 : * - pointer types, as the values they point to are bound to be transient.
2521 : *
2522 : * However, nowadays we do allow such saves, as the BBP swapping
2523 : * mechanism was altered to be able to save transient bats temporarily
2524 : * to disk in order to make room. Thus, we must be able to save any
2525 : * transient BAT to disk.
2526 : *
2527 : * What we don't allow is to make such bats persistent.
2528 : *
2529 : * Although the persistent state does influence the allowed mmap
2530 : * modes, this only goes for the *real* committed persistent
2531 : * state. Making the bat persistent with BATmode does not matter for
2532 : * the heap modes until the commit point is reached. So we do not need
2533 : * to do anything with heap modes yet at this point.
2534 : */
2535 : gdk_return
2536 422392 : BATmode(BAT *b, bool transient)
2537 : {
2538 422392 : BATcheck(b, GDK_FAIL);
2539 :
2540 : /* can only make a bat PERSISTENT if its role is already
2541 : * PERSISTENT */
2542 422392 : assert(transient || b->batRole == PERSISTENT);
2543 : /* cannot make a view PERSISTENT */
2544 240831 : assert(transient || !isVIEW(b));
2545 :
2546 422392 : if (b->batRole == TRANSIENT && !transient) {
2547 0 : GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
2548 0 : return GDK_FAIL;
2549 : }
2550 :
2551 422392 : BATiter bi = bat_iterator(b);
2552 422392 : bool mustrelease = false;
2553 422392 : bat bid = b->batCacheid;
2554 :
2555 422392 : if (transient != bi.transient) {
2556 422392 : if (!transient) {
2557 240831 : if (ATOMisdescendant(b->ttype, TYPE_ptr)) {
2558 0 : GDKerror("%s type implies that %s[%s] "
2559 : "cannot be made persistent.\n",
2560 : ATOMname(b->ttype), BATgetId(b),
2561 : ATOMname(b->ttype));
2562 0 : bat_iterator_end(&bi);
2563 0 : return GDK_FAIL;
2564 : }
2565 : }
2566 :
2567 : /* persistent BATs get a logical reference */
2568 422392 : if (!transient) {
2569 240831 : BBPretain(bid);
2570 181561 : } else if (!bi.transient) {
2571 : /* we need to delay the release because if there
2572 : * is no fix and the bat is loaded, BBPrelease
2573 : * can call BBPfree which calls BATfree which
2574 : * may hang while waiting for the heap reference
2575 : * that we have because of the BAT iterator to
2576 : * come down, in other words, deadlock */
2577 422392 : mustrelease = true;
2578 : }
2579 422392 : MT_lock_set(&GDKswapLock(bid));
2580 422392 : if (!transient) {
2581 240831 : if (BBP_status(bid) & BBPDELETED) {
2582 0 : BBP_status_on(bid, BBPEXISTING);
2583 0 : BBP_status_off(bid, BBPDELETED);
2584 : } else
2585 240831 : BBP_status_on(bid, BBPNEW);
2586 181561 : } else if (!bi.transient) {
2587 181561 : if (!(BBP_status(bid) & BBPNEW))
2588 37923 : BBP_status_on(bid, BBPDELETED);
2589 181561 : BBP_status_off(bid, BBPPERSISTENT);
2590 : }
2591 : /* session bats or persistent bats that did not
2592 : * witness a commit yet may have been saved */
2593 422392 : MT_lock_set(&b->theaplock);
2594 422392 : if (b->batCopiedtodisk) {
2595 28624 : if (!transient) {
2596 529 : BBP_status_off(bid, BBPTMP);
2597 : } else {
2598 : /* TMcommit must remove it to
2599 : * guarantee free space */
2600 28095 : BBP_status_on(bid, BBPTMP);
2601 : }
2602 : }
2603 422392 : b->batTransient = transient;
2604 422392 : MT_lock_unset(&b->theaplock);
2605 422392 : MT_lock_unset(&GDKswapLock(bid));
2606 : }
2607 422392 : bat_iterator_end(&bi);
2608 : /* release after bat_iterator_end because of refs to heaps */
2609 422392 : if (mustrelease)
2610 181561 : BBPrelease(bid);
2611 : return GDK_SUCCEED;
2612 : }
2613 :
2614 : /* BATassertProps checks whether properties are set correctly. Under
2615 : * no circumstances will it change any properties. Note that the
2616 : * "nil" property is not actually used anywhere, but it is checked. */
2617 :
2618 : #ifdef NDEBUG
2619 : /* assertions are disabled, turn failing tests into a message */
2620 : #undef assert
2621 : #define assert(test) ((void) ((test) || (TRC_CRITICAL_ENDIF(CHECK_, "Assertion `%s' failed\n", #test), 0)))
2622 : #endif
2623 :
2624 : /* Assert that properties are set correctly.
2625 : *
2626 : * A BAT can have a bunch of properties set. Mostly, the property
2627 : * bits are set if we *know* the property holds, and not set if we
2628 : * don't know whether the property holds (or if we know it doesn't
2629 : * hold). All properties are per column.
2630 : *
2631 : * The properties currently maintained are:
2632 : *
2633 : * seqbase Only valid for TYPE_oid and TYPE_void columns: each
2634 : * value in the column is exactly one more than the
2635 : * previous value, starting at position 0 with the value
2636 : * stored in this property.
2637 : * This implies sorted, key, nonil (which therefore need
2638 : * to be set).
2639 : * nil There is at least one NIL value in the column.
2640 : * nonil There are no NIL values in the column.
2641 : * key All values in the column are distinct.
2642 : * sorted The column is sorted (ascending). If also revsorted,
2643 : * then all values are equal.
2644 : * revsorted The column is reversely sorted (descending). If
2645 : * also sorted, then all values are equal.
2646 : * nosorted BUN position which proofs not sorted (given position
2647 : * and one before are not ordered correctly).
2648 : * norevsorted BUN position which proofs not revsorted (given position
2649 : * and one before are not ordered correctly).
2650 : * nokey Pair of BUN positions that proof not all values are
2651 : * distinct (i.e. values at given locations are equal).
2652 : *
2653 : * Note that the functions BATtseqbase and BATkey also set more
2654 : * properties than you might suspect. When setting properties on a
2655 : * newly created and filled BAT, you may want to first make sure the
2656 : * batCount is set correctly (e.g. by calling BATsetcount), then use
2657 : * BATtseqbase and BATkey, and finally set the other properties.
2658 : *
2659 : * For a view, we cannot check all properties, since it is possible with
2660 : * the way the SQL layer works, that a parent BAT gets changed, changing
2661 : * the properties, while there is a view. The view is supposed to look
2662 : * at only the non-changing part of the BAT (through candidate lists),
2663 : * but this means that the properties of the view might not be correct.
2664 : * For this reason, for views, we skip all property checking that looks
2665 : * at the BAT content.
2666 : */
2667 :
2668 : void
2669 32135784 : BATassertProps(BAT *b)
2670 : {
2671 32135784 : unsigned bbpstatus;
2672 32135784 : BUN p, q;
2673 32135784 : int (*cmpf)(const void *, const void *);
2674 32135784 : int cmp;
2675 32135784 : const void *prev = NULL, *valp, *nilp;
2676 32135784 : char filename[sizeof(b->theap->filename)];
2677 32135784 : bool isview1, isview2;
2678 :
2679 : /* do the complete check within a lock */
2680 32135784 : MT_lock_set(&b->theaplock);
2681 :
2682 : /* general BAT sanity */
2683 32213610 : assert(b != NULL);
2684 32213610 : assert(b->batCacheid > 0);
2685 32213610 : assert(b->batCacheid < getBBPsize());
2686 32095773 : assert(b == BBP_desc(b->batCacheid));
2687 32095773 : assert(b->batCount >= b->batInserted);
2688 :
2689 : /* headless */
2690 32095773 : assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
2691 32095773 : assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
2692 :
2693 32095773 : isview1 = b->theap->parentid != b->batCacheid;
2694 32095773 : isview2 = b->tvheap && b->tvheap->parentid != b->batCacheid;
2695 :
2696 32095773 : bbpstatus = BBP_status(b->batCacheid);
2697 : /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
2698 32095773 : assert(((bbpstatus & BBPDELETED) != 0) +
2699 : ((bbpstatus & BBPEXISTING) != 0) +
2700 : ((bbpstatus & BBPNEW) != 0) <= 1);
2701 :
2702 32095773 : assert(b->ttype >= TYPE_void);
2703 32095773 : assert(b->ttype < GDKatomcnt);
2704 32095773 : assert(isview1 ||
2705 : b->ttype == TYPE_void ||
2706 : BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
2707 32095773 : assert(isview2 ||
2708 : b->tvheap == NULL ||
2709 : (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
2710 :
2711 32095773 : cmpf = ATOMcompare(b->ttype);
2712 32095773 : nilp = ATOMnilptr(b->ttype);
2713 :
2714 32095773 : assert(isview1 || b->theap->free >= tailsize(b, BATcount(b)));
2715 32095773 : if (b->ttype != TYPE_void) {
2716 28488529 : assert(b->batCount <= b->batCapacity);
2717 28488529 : assert(isview1 || b->theap->size >= b->theap->free);
2718 28488529 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2719 : /* 32 values per 4-byte word (that's not the
2720 : * same as 8 values per byte...) */
2721 4376 : assert(isview1 || b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
2722 : } else
2723 28484153 : assert(isview1 || b->theap->size >> b->tshift >= b->batCapacity);
2724 : }
2725 32095773 : if (!isview1) {
2726 27611528 : strconcat_len(filename, sizeof(filename),
2727 27611528 : BBP_physical(b->theap->parentid),
2728 2134691 : b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
2729 : #if SIZEOF_VAR_T == 8
2730 : b->twidth == 4 ? ".tail4" :
2731 : #endif
2732 : ".tail" : ".tail",
2733 : NULL);
2734 27638123 : assert(strcmp(b->theap->filename, filename) == 0);
2735 : }
2736 32122368 : if (!isview2 && b->tvheap) {
2737 1956580 : strconcat_len(filename, sizeof(filename),
2738 1956580 : BBP_physical(b->tvheap->parentid),
2739 : ".theap",
2740 : NULL);
2741 1957731 : assert(strcmp(b->tvheap->filename, filename) == 0);
2742 : }
2743 :
2744 : /* void, str and blob imply varsized */
2745 32123519 : if (ATOMstorage(b->ttype) == TYPE_str ||
2746 : ATOMstorage(b->ttype) == TYPE_blob)
2747 2848753 : assert(b->tvheap != NULL);
2748 : /* other "known" types are not varsized */
2749 32123519 : if (ATOMstorage(b->ttype) > TYPE_void &&
2750 : ATOMstorage(b->ttype) < TYPE_str)
2751 25636882 : assert(b->tvheap == NULL);
2752 : /* shift and width have a particular relationship */
2753 32123519 : if (ATOMstorage(b->ttype) == TYPE_str)
2754 2844576 : assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
2755 : else
2756 29278943 : assert(b->twidth == ATOMsize(b->ttype));
2757 32123519 : assert(b->tseqbase <= oid_nil);
2758 : /* only oid/void columns can be dense */
2759 32123519 : assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
2760 : /* a column cannot both have and not have NILs */
2761 32123519 : assert(!b->tnil || !b->tnonil);
2762 32123519 : if (b->ttype == TYPE_void) {
2763 3752369 : assert(b->tshift == 0);
2764 3752369 : assert(b->twidth == 0);
2765 3752369 : assert(b->tsorted);
2766 3752369 : if (is_oid_nil(b->tseqbase)) {
2767 298 : assert(b->tvheap == NULL);
2768 298 : assert(BATcount(b) == 0 || !b->tnonil);
2769 298 : assert(BATcount(b) <= 1 || !b->tkey);
2770 298 : assert(b->trevsorted);
2771 : } else {
2772 3752071 : if (b->tvheap != NULL) {
2773 : /* candidate list with exceptions */
2774 31309 : assert(b->batRole == TRANSIENT || b->batRole == SYSTRANS);
2775 31309 : assert(b->tvheap->free <= b->tvheap->size);
2776 31309 : assert(b->tvheap->free >= sizeof(ccand_t));
2777 31309 : assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
2778 31309 : if (negoid_cand(b) && ccand_free(b) > 0) {
2779 4 : const oid *oids = (const oid *) ccand_first(b);
2780 4 : q = ccand_free(b) / SIZEOF_OID;
2781 4 : assert(oids != NULL);
2782 4 : assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
2783 : /* exceptions within range */
2784 4 : assert(oids[0] >= b->tseqbase);
2785 4 : assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
2786 : /* exceptions sorted */
2787 6 : for (p = 1; p < q; p++)
2788 2 : assert(oids[p - 1] < oids[p]);
2789 : }
2790 : }
2791 3752071 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2792 3752071 : assert(BATcount(b) == 0 || !b->tnil);
2793 3752071 : assert(BATcount(b) <= 1 || !b->trevsorted);
2794 3752071 : assert(b->tkey);
2795 3752071 : assert(b->tnonil);
2796 : }
2797 3752369 : MT_lock_unset(&b->theaplock);
2798 27254442 : return;
2799 : }
2800 :
2801 28371150 : BATiter bi = bat_iterator_nolock(b);
2802 :
2803 28371150 : if (BATtdense(b)) {
2804 251113 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2805 251113 : assert(b->ttype == TYPE_oid);
2806 251113 : assert(b->tsorted);
2807 251113 : assert(b->tkey);
2808 251113 : assert(b->tnonil);
2809 251113 : if ((q = b->batCount) != 0) {
2810 170633 : const oid *o = (const oid *) Tloc(b, 0);
2811 170633 : assert(*o == b->tseqbase);
2812 27640828 : for (p = 1; p < q; p++)
2813 27470195 : assert(o[p - 1] + 1 == o[p]);
2814 : }
2815 251113 : MT_lock_unset(&b->theaplock);
2816 251459 : return;
2817 : }
2818 28120037 : assert(1 << b->tshift == b->twidth);
2819 : /* only linear atoms can be sorted */
2820 28120037 : assert(!b->tsorted || ATOMlinear(b->ttype));
2821 28120037 : assert(!b->trevsorted || ATOMlinear(b->ttype));
2822 28120037 : if (ATOMlinear(b->ttype)) {
2823 28121820 : assert(b->tnosorted == 0 ||
2824 : (b->tnosorted > 0 &&
2825 : b->tnosorted < b->batCount));
2826 28121820 : assert(!b->tsorted || b->tnosorted == 0);
2827 28121820 : if (!isview1 &&
2828 28121820 : !isview2 &&
2829 16996692 : !b->tsorted &&
2830 10131424 : b->tnosorted > 0 &&
2831 10131424 : b->tnosorted < b->batCount)
2832 10131389 : assert(cmpf(BUNtail(bi, b->tnosorted - 1),
2833 : BUNtail(bi, b->tnosorted)) > 0);
2834 28121122 : assert(b->tnorevsorted == 0 ||
2835 : (b->tnorevsorted > 0 &&
2836 : b->tnorevsorted < b->batCount));
2837 28121122 : assert(!b->trevsorted || b->tnorevsorted == 0);
2838 28121122 : if (!isview1 &&
2839 23324859 : !isview2 &&
2840 17590352 : !b->trevsorted &&
2841 5705605 : b->tnorevsorted > 0 &&
2842 5705605 : b->tnorevsorted < b->batCount)
2843 5705080 : assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
2844 : BUNtail(bi, b->tnorevsorted)) < 0);
2845 : }
2846 : /* if tkey property set, both tnokey values must be 0 */
2847 28116694 : assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
2848 28116694 : if (!isview1 &&
2849 28116694 : !isview2 &&
2850 17396485 : !b->tkey &&
2851 17396485 : (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
2852 : /* if tkey not set and tnokey indicates a proof of
2853 : * non-key-ness, make sure the tnokey values are in
2854 : * range and indeed provide a proof */
2855 730044 : assert(b->tnokey[0] != b->tnokey[1]);
2856 730044 : assert(b->tnokey[0] < b->batCount);
2857 730044 : assert(b->tnokey[1] < b->batCount);
2858 730044 : assert(cmpf(BUNtail(bi, b->tnokey[0]),
2859 : BUNtail(bi, b->tnokey[1])) == 0);
2860 : }
2861 : /* var heaps must have sane sizes */
2862 28115344 : assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
2863 :
2864 28115344 : if (!b->tkey && !b->tsorted && !b->trevsorted &&
2865 20078631 : !b->tnonil && !b->tnil) {
2866 : /* nothing more to prove */
2867 19466036 : MT_lock_unset(&b->theaplock);
2868 19462398 : return;
2869 : }
2870 :
2871 : /* only do a scan if the bat is not a view */
2872 8649308 : if (!isview1 && !isview2) {
2873 6980965 : const ValRecord *prop;
2874 6980965 : const void *maxval = NULL;
2875 6980965 : const void *minval = NULL;
2876 6980965 : const void *maxbound = NULL;
2877 6980965 : const void *minbound = NULL;
2878 6980965 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
2879 6949142 : bool seenmax = false, seenmin = false;
2880 6949142 : bool seennil = false;
2881 :
2882 6949142 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL)
2883 12 : maxbound = VALptr(prop);
2884 6986206 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL)
2885 0 : minbound = VALptr(prop);
2886 7011815 : if (b->tmaxpos != BUN_NONE) {
2887 480752 : assert(b->tmaxpos < BATcount(b));
2888 480752 : maxval = BUNtail(bi, b->tmaxpos);
2889 475506 : assert(cmpf(maxval, nilp) != 0);
2890 : }
2891 7005513 : if (b->tminpos != BUN_NONE) {
2892 425695 : assert(b->tminpos < BATcount(b));
2893 425695 : minval = BUNtail(bi, b->tminpos);
2894 427560 : assert(cmpf(minval, nilp) != 0);
2895 : }
2896 7008846 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2897 : /* for now, don't do extra checks for bit mask */
2898 : ;
2899 7004528 : } else if (b->tsorted || b->trevsorted || !b->tkey) {
2900 : /* if sorted (either way), or we don't have to
2901 : * prove uniqueness, we can do a simple
2902 : * scan */
2903 : /* only call compare function if we have to */
2904 6987294 : bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
2905 :
2906 2089606483 : BATloop(b, p, q) {
2907 2082834817 : valp = BUNtail(bi, p);
2908 2082846543 : bool isnil = cmpf(valp, nilp) == 0;
2909 2026834190 : assert(!isnil || !notnull);
2910 2026834190 : assert(!b->tnonil || !isnil);
2911 2026834190 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
2912 2026834190 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
2913 2026834190 : if (minbound && !isnil) {
2914 30 : cmp = cmpf(minbound, valp);
2915 30 : assert(cmp <= 0);
2916 : }
2917 2026834190 : if (maxbound && !isnil) {
2918 30 : cmp = cmpf(maxbound, valp);
2919 30 : assert(cmp > 0);
2920 : }
2921 2026834190 : if (maxval && !isnil) {
2922 27749708 : cmp = cmpf(maxval, valp);
2923 27757165 : assert(cmp >= 0);
2924 27757165 : seenmax |= cmp == 0;
2925 : }
2926 2026841647 : if (minval && !isnil) {
2927 3322730 : cmp = cmpf(minval, valp);
2928 3326385 : assert(cmp <= 0);
2929 3326385 : seenmin |= cmp == 0;
2930 : }
2931 2026845302 : if (prev && cmpprv) {
2932 1309634226 : cmp = cmpf(prev, valp);
2933 1365575341 : assert(!b->tsorted || cmp <= 0);
2934 1365575341 : assert(!b->trevsorted || cmp >= 0);
2935 1365575341 : assert(!b->tkey || cmp != 0);
2936 : }
2937 2082786417 : seennil |= isnil;
2938 2082786417 : if (seennil && !cmpprv &&
2939 167208 : maxval == NULL && minval == NULL &&
2940 167249 : minbound == NULL && maxbound == NULL) {
2941 : /* we've done all the checking
2942 : * we can do */
2943 : break;
2944 : }
2945 2082619189 : prev = valp;
2946 : }
2947 : } else { /* b->tkey && !b->tsorted && !b->trevsorted */
2948 : /* we need to check for uniqueness the hard
2949 : * way (i.e. using a hash table) */
2950 17234 : const char *nme = BBP_physical(b->batCacheid);
2951 17234 : Hash *hs = NULL;
2952 17234 : BUN mask;
2953 :
2954 17234 : if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
2955 0 : TRC_WARNING(BAT_, "Cannot allocate hash table\n");
2956 0 : goto abort_check;
2957 : }
2958 17241 : if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heaplink.filename) ||
2959 17228 : snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heapbckt.filename)) {
2960 : /* cannot happen, see comment in gdk.h
2961 : * about sizes near definition of
2962 : * BBPINIT */
2963 0 : GDKfree(hs);
2964 0 : TRC_CRITICAL(BAT_, "Heap filename is too large\n");
2965 0 : goto abort_check;
2966 : }
2967 17238 : if (ATOMsize(b->ttype) == 1)
2968 : mask = (BUN) 1 << 8;
2969 17231 : else if (ATOMsize(b->ttype) == 2)
2970 : mask = (BUN) 1 << 16;
2971 : else
2972 17231 : mask = HASHmask(b->batCount);
2973 17238 : hs->heapbckt.parentid = b->batCacheid;
2974 17238 : hs->heaplink.parentid = b->batCacheid;
2975 17238 : if ((hs->heaplink.farmid = BBPselectfarm(
2976 17239 : TRANSIENT, b->ttype, hashheap)) < 0 ||
2977 17239 : (hs->heapbckt.farmid = BBPselectfarm(
2978 34464 : TRANSIENT, b->ttype, hashheap)) < 0 ||
2979 17239 : HASHnew(hs, b->ttype, BATcount(b),
2980 : mask, BUN_NONE, false) != GDK_SUCCEED) {
2981 0 : GDKfree(hs);
2982 0 : TRC_WARNING(BAT_, "Cannot allocate hash table\n");
2983 0 : goto abort_check;
2984 : }
2985 34461888 : BATloop(b, p, q) {
2986 34444648 : BUN hb;
2987 34444648 : BUN prb;
2988 34444648 : valp = BUNtail(bi, p);
2989 34444648 : bool isnil = cmpf(valp, nilp) == 0;
2990 34439513 : assert(!isnil || !notnull);
2991 34439513 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
2992 34439513 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
2993 34439513 : if (minbound && !isnil) {
2994 0 : cmp = cmpf(minbound, valp);
2995 0 : assert(cmp <= 0);
2996 : }
2997 34439513 : if (maxbound && !isnil) {
2998 0 : cmp = cmpf(maxbound, valp);
2999 0 : assert(cmp > 0);
3000 : }
3001 34439513 : if (maxval && !isnil) {
3002 1666 : cmp = cmpf(maxval, valp);
3003 1666 : assert(cmp >= 0);
3004 1666 : seenmax |= cmp == 0;
3005 : }
3006 34439513 : if (minval && !isnil) {
3007 1666 : cmp = cmpf(minval, valp);
3008 1666 : assert(cmp <= 0);
3009 1666 : seenmin |= cmp == 0;
3010 : }
3011 34439513 : prb = HASHprobe(hs, valp);
3012 34442781 : for (hb = HASHget(hs, prb);
3013 35051022 : hb != BUN_NONE;
3014 608241 : hb = HASHgetlink(hs, hb))
3015 609351 : if (cmpf(valp, BUNtail(bi, hb)) == 0)
3016 0 : assert(!b->tkey);
3017 34441671 : HASHputlink(hs, p, HASHget(hs, prb));
3018 34442544 : HASHput(hs, prb, p);
3019 34444654 : assert(!b->tnonil || !isnil);
3020 34444654 : seennil |= isnil;
3021 : }
3022 17240 : HEAPfree(&hs->heaplink, true);
3023 17235 : HEAPfree(&hs->heapbckt, true);
3024 17239 : GDKfree(hs);
3025 : }
3026 6960445 : abort_check:
3027 6960445 : GDKclrerr();
3028 6952743 : assert(maxval == NULL || seenmax);
3029 6952743 : assert(minval == NULL || seenmin);
3030 6952743 : assert(!b->tnil || seennil);
3031 : }
3032 8621086 : MT_lock_unset(&b->theaplock);
3033 : }
|