Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : * @* BAT Module
16 : * In this Chapter we describe the BAT implementation in more detail.
17 : * The routines mentioned are primarily meant to simplify the library
18 : * implementation.
19 : *
20 : * @+ BAT Construction
21 : * BATs are implemented in several blocks of memory, prepared for disk
22 : * storage and easy shipment over a network.
23 : *
24 : * The BAT starts with a descriptor, which indicates the required BAT
25 : * library version and the BAT administration details. In particular,
26 : * it describes the binary relationship maintained and the location of
27 : * fields required for storage.
28 : *
29 : * The general layout of the BAT in this implementation is as follows.
30 : * Each BAT comes with a heap for the loc-size buns and, optionally,
31 : * with heaps to manage the variable-sized data items of both
32 : * dimensions. The buns are assumed to be stored as loc-size objects.
33 : * This is essentially an array of structs to store the associations.
34 : * The size is determined at BAT creation time using an upper bound on
35 : * the number of elements to be accommodated. In case of overflow,
36 : * its storage space is extended automatically.
37 : *
38 : * The capacity of a BAT places an upper limit on the number of BUNs
39 : * to be stored initially. The actual space set aside may be quite
40 : * large. Moreover, the size is aligned to int boundaries to speedup
41 : * access and avoid some machine limitations.
42 : *
43 : * Initialization of the variable parts rely on type specific routines
44 : * called atomHeap.
45 : */
46 : #include "monetdb_config.h"
47 : #include "gdk.h"
48 : #include "gdk_private.h"
49 : #include "mutils.h"
50 :
51 : #ifdef ALIGN
52 : #undef ALIGN
53 : #endif
54 : #define ALIGN(n,b) ((b)?(b)*(1+(((n)-1)/(b))):n)
55 :
56 : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
57 :
58 : BAT *
59 15696054 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
60 : {
61 15696054 : BAT *bn;
62 :
63 : /*
64 : * Alloc space for the BAT and its dependent records.
65 : */
66 15696054 : assert(tt >= 0);
67 :
68 15696054 : bn = GDKmalloc(sizeof(BAT));
69 :
70 15697464 : if (bn == NULL)
71 : return NULL;
72 :
73 : /*
74 : * Fill in basic column info
75 : */
76 15697464 : *bn = (BAT) {
77 : .hseqbase = hseq,
78 :
79 : .ttype = tt,
80 : .tkey = false,
81 : .tnonil = true,
82 : .tnil = false,
83 15697464 : .tsorted = ATOMlinear(tt),
84 : .trevsorted = ATOMlinear(tt),
85 : .tseqbase = oid_nil,
86 : .tminpos = BUN_NONE,
87 : .tmaxpos = BUN_NONE,
88 : .tunique_est = 0.0,
89 :
90 : .batRole = role,
91 : .batTransient = true,
92 : .batRestricted = BAT_WRITE,
93 : };
94 :
95 15697464 : if (heapnames) {
96 8889392 : if ((bn->theap = GDKmalloc(sizeof(Heap))) == NULL) {
97 0 : GDKfree(bn);
98 0 : return NULL;
99 : }
100 17775899 : *bn->theap = (Heap) {
101 8887859 : .farmid = BBPselectfarm(role, bn->ttype, offheap),
102 : .dirty = true,
103 : };
104 :
105 8888040 : if (ATOMneedheap(tt)) {
106 612879 : if ((bn->tvheap = GDKmalloc(sizeof(Heap))) == NULL) {
107 0 : GDKfree(bn->theap);
108 0 : GDKfree(bn);
109 0 : return NULL;
110 : }
111 612687 : *bn->tvheap = (Heap) {
112 612726 : .farmid = BBPselectfarm(role, bn->ttype, varheap),
113 : .dirty = true,
114 : };
115 : }
116 : }
117 : /*
118 : * add to BBP
119 : */
120 15695920 : if (BBPinsert(bn) == 0) {
121 0 : GDKfree(bn->tvheap);
122 0 : GDKfree(bn->theap);
123 0 : GDKfree(bn);
124 0 : return NULL;
125 : }
126 15695175 : if (bn->theap) {
127 8888149 : bn->theap->parentid = bn->batCacheid;
128 8888149 : ATOMIC_INIT(&bn->theap->refs, 1);
129 8888149 : const char *nme = BBP_physical(bn->batCacheid);
130 8888149 : settailname(bn->theap, nme, tt, width);
131 :
132 8887057 : if (bn->tvheap) {
133 612760 : bn->tvheap->parentid = bn->batCacheid;
134 612760 : ATOMIC_INIT(&bn->tvheap->refs, 1);
135 612760 : strconcat_len(bn->tvheap->filename,
136 : sizeof(bn->tvheap->filename),
137 : nme, ".theap", NULL);
138 : }
139 : }
140 15693975 : char name[MT_NAME_LEN];
141 15693975 : snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
142 15693975 : MT_lock_init(&bn->theaplock, name);
143 15694333 : snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
144 15694333 : MT_lock_init(&bn->batIdxLock, name);
145 15696324 : snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
146 15696324 : MT_rwlock_init(&bn->thashlock, name);
147 15697820 : return bn;
148 : }
149 :
150 : uint8_t
151 17805975 : ATOMelmshift(int sz)
152 : {
153 17805975 : uint8_t sh;
154 17805975 : int i = sz >> 1;
155 :
156 36242537 : for (sh = 0; i != 0; sh++) {
157 18436562 : i >>= 1;
158 : }
159 17805975 : return sh;
160 : }
161 :
162 :
163 : void
164 8889243 : BATsetdims(BAT *b, uint16_t width)
165 : {
166 8889243 : b->twidth = b->ttype == TYPE_str ? width > 0 ? width : 1 : ATOMsize(b->ttype);
167 8889243 : b->tshift = ATOMelmshift(b->twidth);
168 8889243 : assert_shift_width(b->tshift, b->twidth);
169 8889243 : }
170 :
171 : const char *
172 1242 : BATtailname(const BAT *b)
173 : {
174 1242 : if (b->ttype == TYPE_str) {
175 325 : switch (b->twidth) {
176 235 : case 1:
177 235 : return "tail1";
178 75 : case 2:
179 75 : return "tail2";
180 15 : case 4:
181 : #if SIZEOF_VAR_T == 8
182 15 : return "tail4";
183 : case 8:
184 : #endif
185 : break;
186 : default:
187 0 : MT_UNREACHABLE();
188 : }
189 : }
190 : return "tail";
191 : }
192 :
193 : void
194 8948742 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
195 : {
196 8948742 : if (tt == TYPE_str) {
197 644937 : switch (width) {
198 595396 : case 0:
199 : case 1:
200 595396 : strconcat_len(tail->filename,
201 : sizeof(tail->filename), physnme,
202 : ".tail1", NULL);
203 595396 : return;
204 45590 : case 2:
205 45590 : strconcat_len(tail->filename,
206 : sizeof(tail->filename), physnme,
207 : ".tail2", NULL);
208 45590 : return;
209 3951 : case 4:
210 : #if SIZEOF_VAR_T == 8
211 3951 : strconcat_len(tail->filename,
212 : sizeof(tail->filename), physnme,
213 : ".tail4", NULL);
214 3951 : return;
215 : case 8:
216 : #endif
217 : break;
218 : default:
219 0 : MT_UNREACHABLE();
220 : }
221 : }
222 8303805 : strconcat_len(tail->filename, sizeof(tail->filename), physnme,
223 : ".tail", NULL);
224 : }
225 :
226 : /*
227 : * @- BAT allocation
228 : * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
229 : * We now initialize new BATs with their heapname such that the
230 : * modified HEAPalloc/HEAPextend primitives can possibly use memory
231 : * mapped files as temporary heap storage.
232 : *
233 : * In case of huge bats, we want HEAPalloc to write a file to disk,
234 : * and memory map it. To make this possible, we must provide it with
235 : * filenames.
236 : */
237 : BAT *
238 8888327 : COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
239 : {
240 8888327 : BAT *bn;
241 :
242 8888327 : assert(cap <= BUN_MAX);
243 8888327 : assert(hseq <= oid_nil);
244 8888327 : assert(tt != TYPE_bat);
245 8888327 : ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
246 :
247 : /* round up to multiple of BATTINY */
248 8888327 : if (cap < BUN_MAX - BATTINY)
249 8888524 : cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
250 8888327 : if (ATOMstorage(tt) == TYPE_msk) {
251 136335 : if (cap < 8*BATTINY)
252 : cap = 8*BATTINY;
253 : else
254 50729 : cap = (cap + 31) & ~(BUN)31;
255 8751992 : } else if (cap < BATTINY)
256 : cap = BATTINY;
257 : /* limit the size */
258 3439391 : if (cap > BUN_MAX)
259 : cap = BUN_MAX;
260 :
261 8888327 : bn = BATcreatedesc(hseq, tt, true, role, width);
262 8888125 : if (bn == NULL)
263 : return NULL;
264 :
265 8888125 : BATsetdims(bn, width);
266 8888531 : bn->batCapacity = cap;
267 :
268 8888531 : if (ATOMstorage(tt) == TYPE_msk)
269 136335 : cap /= 8; /* 8 values per byte */
270 :
271 : /* alloc the main heaps */
272 8888531 : if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
273 0 : goto bailout;
274 : }
275 :
276 8887343 : if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
277 0 : HEAPfree(bn->theap, true);
278 0 : goto bailout;
279 : }
280 8887385 : DELTAinit(bn);
281 8887753 : if (BBPcacheit(bn, true) != GDK_SUCCEED) {
282 : /* cannot happen, function always returns success */
283 0 : goto bailout;
284 : }
285 8888917 : TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
286 : return bn;
287 0 : bailout:
288 0 : BBPclear(bn->batCacheid);
289 0 : return NULL;
290 : }
291 :
292 : BAT *
293 8669410 : COLnew(oid hseq, int tt, BUN cap, role_t role)
294 : {
295 8669410 : return COLnew2(hseq, tt, cap, role, 0);
296 : }
297 :
298 : BAT *
299 3930509 : BATdense(oid hseq, oid tseq, BUN cnt)
300 : {
301 3930509 : BAT *bn;
302 :
303 3930509 : bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
304 3930470 : if (bn != NULL) {
305 3930470 : BATtseqbase(bn, tseq);
306 3930378 : BATsetcount(bn, cnt);
307 3930159 : TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
308 : "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
309 : ALGOBATPAR(bn));
310 : }
311 3930159 : return bn;
312 : }
313 :
314 : BAT *
315 0 : BATattach(int tt, const char *heapfile, role_t role)
316 : {
317 0 : BAT *bn;
318 0 : char *p;
319 0 : size_t m;
320 0 : FILE *f;
321 :
322 0 : ERRORcheck(tt <= 0 , "bad tail type (<=0)\n", NULL);
323 0 : ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "bad tail type (varsized and not str)\n", NULL);
324 0 : ERRORcheck(heapfile == NULL, "bad heapfile name\n", NULL);
325 :
326 0 : if ((f = MT_fopen(heapfile, "rb")) == NULL) {
327 0 : GDKsyserror("BATattach: cannot open %s\n", heapfile);
328 0 : return NULL;
329 : }
330 0 : if (ATOMstorage(tt) == TYPE_str) {
331 0 : size_t n;
332 0 : char *s;
333 0 : int c, u;
334 :
335 0 : if ((bn = COLnew(0, tt, 0, role)) == NULL) {
336 0 : fclose(f);
337 0 : return NULL;
338 : }
339 0 : m = 4096;
340 0 : n = 0;
341 0 : u = 0;
342 0 : s = p = GDKmalloc(m);
343 0 : if (p == NULL) {
344 0 : fclose(f);
345 0 : BBPreclaim(bn);
346 0 : return NULL;
347 : }
348 0 : while ((c = getc(f)) != EOF) {
349 0 : if (n == m) {
350 0 : m += 4096;
351 0 : s = GDKrealloc(p, m);
352 0 : if (s == NULL) {
353 0 : GDKfree(p);
354 0 : BBPreclaim(bn);
355 0 : fclose(f);
356 0 : return NULL;
357 : }
358 0 : p = s;
359 0 : s = p + n;
360 : }
361 0 : if (c == '\n' && n > 0 && s[-1] == '\r') {
362 : /* deal with CR-LF sequence */
363 0 : s[-1] = c;
364 : } else {
365 0 : *s++ = c;
366 0 : n++;
367 : }
368 0 : if (u) {
369 0 : if ((c & 0xC0) == 0x80)
370 0 : u--;
371 : else
372 0 : goto notutf8;
373 0 : } else if ((c & 0xF8) == 0xF0)
374 : u = 3;
375 0 : else if ((c & 0xF0) == 0xE0)
376 : u = 2;
377 0 : else if ((c & 0xE0) == 0xC0)
378 : u = 1;
379 0 : else if ((c & 0x80) == 0x80)
380 0 : goto notutf8;
381 0 : else if (c == 0) {
382 0 : if (BUNappend(bn, p, false) != GDK_SUCCEED) {
383 0 : BBPreclaim(bn);
384 0 : fclose(f);
385 0 : GDKfree(p);
386 0 : return NULL;
387 : }
388 : s = p;
389 : n = 0;
390 : }
391 : }
392 0 : fclose(f);
393 0 : GDKfree(p);
394 0 : if (n > 0) {
395 0 : BBPreclaim(bn);
396 0 : GDKerror("last string is not null-terminated\n");
397 0 : return NULL;
398 : }
399 : } else {
400 0 : struct stat st;
401 0 : int atomsize;
402 0 : BUN cap;
403 0 : lng n;
404 :
405 0 : if (fstat(fileno(f), &st) < 0) {
406 0 : GDKsyserror("BATattach: cannot stat %s\n", heapfile);
407 0 : fclose(f);
408 0 : return NULL;
409 : }
410 0 : atomsize = ATOMsize(tt);
411 0 : if (st.st_size % atomsize != 0) {
412 0 : fclose(f);
413 0 : GDKerror("heapfile size not integral number of atoms\n");
414 0 : return NULL;
415 : }
416 0 : if (ATOMstorage(tt) == TYPE_msk ?
417 : (st.st_size > (off_t) (BUN_MAX / 8)) :
418 0 : ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) {
419 0 : fclose(f);
420 0 : GDKerror("heapfile too large\n");
421 0 : return NULL;
422 : }
423 0 : cap = (BUN) (ATOMstorage(tt) == TYPE_msk ?
424 0 : st.st_size * 8 :
425 0 : st.st_size / atomsize);
426 0 : bn = COLnew(0, tt, cap, role);
427 0 : if (bn == NULL) {
428 0 : fclose(f);
429 0 : return NULL;
430 : }
431 0 : p = Tloc(bn, 0);
432 0 : n = (lng) st.st_size;
433 0 : while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
434 0 : p += m;
435 0 : n -= m;
436 : }
437 0 : fclose(f);
438 0 : if (n > 0) {
439 0 : GDKerror("couldn't read the complete file\n");
440 0 : BBPreclaim(bn);
441 0 : return NULL;
442 : }
443 0 : BATsetcount(bn, cap);
444 0 : bn->tnonil = cap == 0;
445 0 : bn->tnil = false;
446 0 : bn->tseqbase = oid_nil;
447 0 : if (cap > 1) {
448 0 : bn->tsorted = false;
449 0 : bn->trevsorted = false;
450 0 : bn->tkey = false;
451 : } else {
452 0 : bn->tsorted = ATOMlinear(tt);
453 0 : bn->trevsorted = ATOMlinear(tt);
454 0 : bn->tkey = true;
455 : }
456 : }
457 : return bn;
458 :
459 0 : notutf8:
460 0 : fclose(f);
461 0 : BBPreclaim(bn);
462 0 : GDKfree(p);
463 0 : GDKerror("input is not UTF-8\n");
464 0 : return NULL;
465 : }
466 :
467 : /*
468 : * If the BAT runs out of storage for BUNS it will reallocate space.
469 : * For memory mapped BATs we simple extend the administration after
470 : * having an assurance that the BAT still can be safely stored away.
471 : */
472 : BUN
473 21183 : BATgrows(BAT *b)
474 : {
475 21183 : BUN oldcap, newcap;
476 :
477 21183 : BATcheck(b, 0);
478 :
479 21183 : newcap = oldcap = BATcapacity(b);
480 21183 : if (newcap < BATTINY)
481 : newcap = 2 * BATTINY;
482 21183 : else if (newcap < 10 * BATTINY)
483 19780 : newcap = 4 * newcap;
484 1403 : else if (newcap < 50 * BATTINY)
485 827 : newcap = 2 * newcap;
486 576 : else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
487 574 : newcap = (BUN) ((double) newcap * BATMARGIN);
488 : else
489 : newcap = BUN_MAX;
490 21183 : if (newcap == oldcap) {
491 0 : if (newcap <= BUN_MAX - 10)
492 0 : newcap += 10;
493 : else
494 : newcap = BUN_MAX;
495 : }
496 21183 : if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
497 0 : newcap = (newcap + 31) & ~(BUN)31;
498 : return newcap;
499 : }
500 :
501 : /*
502 : * The routine should ensure that the BAT keeps its location in the
503 : * BAT buffer.
504 : *
505 : * Overflow in the other heaps are dealt with in the atom routines.
506 : * Here we merely copy their references into the new administration
507 : * space.
508 : */
509 : gdk_return
510 178999 : BATextend(BAT *b, BUN newcap)
511 : {
512 178999 : size_t theap_size;
513 178999 : gdk_return rc = GDK_SUCCEED;
514 :
515 178999 : assert(newcap <= BUN_MAX);
516 178999 : BATcheck(b, GDK_FAIL);
517 : /*
518 : * The main issue is to properly predict the new BAT size.
519 : * storage overflow. The assumption taken is that capacity
520 : * overflow is rare. It is changed only when the position of
521 : * the next available BUN surpasses the free area marker. Be
522 : * aware that the newcap should be greater than the old value,
523 : * otherwise you may easily corrupt the administration of
524 : * malloc.
525 : */
526 178999 : if (newcap <= BATcapacity(b)) {
527 : return GDK_SUCCEED;
528 : }
529 :
530 29533 : if (ATOMstorage(b->ttype) == TYPE_msk) {
531 1335 : newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
532 1335 : theap_size = (size_t) (newcap / 8); /* in bytes */
533 : } else {
534 28198 : theap_size = (size_t) newcap << b->tshift;
535 : }
536 :
537 29533 : MT_lock_set(&b->theaplock);
538 29515 : if (b->theap->base) {
539 29515 : TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
540 : b->theap->filename, b->theap->size, theap_size);
541 29515 : rc = HEAPgrow(&b->theap, theap_size, b->batRestricted == BAT_READ);
542 29532 : if (rc == GDK_SUCCEED)
543 29532 : b->batCapacity = newcap;
544 : } else {
545 0 : b->batCapacity = newcap;
546 : }
547 29532 : MT_lock_unset(&b->theaplock);
548 :
549 29530 : return rc;
550 : }
551 :
552 :
553 :
554 : /*
555 : * @+ BAT destruction
556 : * BATclear quickly removes all elements from a BAT. It must respect
557 : * the transaction rules; so stable elements must be moved to the
558 : * "deleted" section of the BAT (they cannot be fully deleted
559 : * yet). For the elements that really disappear, we must free
560 : * heapspace and unfix the atoms if they have fix/unfix handles. As an
561 : * optimization, in the case of no stable elements, we quickly empty
562 : * the heaps by copying a standard small empty image over them.
563 : */
564 : gdk_return
565 410 : BATclear(BAT *b, bool force)
566 : {
567 410 : BUN p, q;
568 :
569 410 : BATcheck(b, GDK_FAIL);
570 :
571 410 : if (!force && b->batInserted > 0) {
572 0 : GDKerror("cannot clear committed BAT\n");
573 0 : return GDK_FAIL;
574 : }
575 :
576 410 : TRC_DEBUG(ALGO, ALGOBATFMT "\n", ALGOBATPAR(b));
577 :
578 : /* kill all search accelerators */
579 410 : HASHdestroy(b);
580 410 : IMPSdestroy(b);
581 410 : OIDXdestroy(b);
582 410 : STRMPdestroy(b);
583 410 : RTREEdestroy(b);
584 410 : PROPdestroy(b);
585 :
586 410 : bat tvp = 0;
587 :
588 : /* we must dispose of all inserted atoms */
589 410 : MT_lock_set(&b->theaplock);
590 410 : if (force && BATatoms[b->ttype].atomDel == NULL) {
591 403 : assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
592 : /* no stable elements: we do a quick heap clean */
593 : /* need to clean heap which keeps data even though the
594 : BUNs got removed. This means reinitialize when
595 : free > 0
596 : */
597 403 : if (b->tvheap && b->tvheap->free > 0) {
598 21 : Heap *th = GDKmalloc(sizeof(Heap));
599 :
600 21 : if (th == NULL) {
601 0 : MT_lock_unset(&b->theaplock);
602 0 : return GDK_FAIL;
603 : }
604 21 : *th = (Heap) {
605 21 : .farmid = b->tvheap->farmid,
606 21 : .parentid = b->tvheap->parentid,
607 : .dirty = true,
608 21 : .hasfile = b->tvheap->hasfile,
609 : };
610 21 : strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
611 21 : if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
612 0 : MT_lock_unset(&b->theaplock);
613 0 : return GDK_FAIL;
614 : }
615 21 : tvp = b->tvheap->parentid;
616 21 : ATOMIC_INIT(&th->refs, 1);
617 21 : HEAPdecref(b->tvheap, false);
618 21 : b->tvheap = th;
619 : }
620 : } else {
621 : /* do heap-delete of all inserted atoms */
622 7 : void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
623 :
624 : /* TYPE_str has no del method, so we shouldn't get here */
625 7 : assert(tatmdel == NULL || b->twidth == sizeof(var_t));
626 0 : if (tatmdel) {
627 0 : BATiter bi = bat_iterator_nolock(b);
628 :
629 0 : for (p = b->batInserted, q = BATcount(b); p < q; p++)
630 0 : (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
631 0 : b->tvheap->dirty = true;
632 : }
633 : }
634 :
635 410 : b->batInserted = 0;
636 410 : b->batCount = 0;
637 410 : if (b->ttype == TYPE_void)
638 0 : b->batCapacity = 0;
639 410 : b->theap->free = 0;
640 410 : BAThseqbase(b, 0);
641 410 : BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
642 410 : b->theap->dirty = true;
643 410 : b->tnonil = true;
644 410 : b->tnil = false;
645 410 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
646 410 : b->tnosorted = b->tnorevsorted = 0;
647 410 : b->tkey = true;
648 410 : b->tnokey[0] = b->tnokey[1] = 0;
649 410 : b->tminpos = b->tmaxpos = BUN_NONE;
650 410 : b->tunique_est = 0;
651 410 : MT_lock_unset(&b->theaplock);
652 410 : if (tvp != 0 && tvp != b->batCacheid)
653 0 : BBPrelease(tvp);
654 : return GDK_SUCCEED;
655 : }
656 :
657 : /* free a cached BAT; leave the bat descriptor cached */
658 : void
659 450631 : BATfree(BAT *b)
660 : {
661 450631 : if (b == NULL)
662 : return;
663 :
664 : /* deallocate all memory for a bat */
665 450631 : MT_rwlock_rdlock(&b->thashlock);
666 450631 : BUN nunique = BUN_NONE;
667 450631 : if (b->thash && b->thash != (Hash *) 1) {
668 1768 : nunique = b->thash->nunique;
669 : }
670 450631 : MT_rwlock_rdunlock(&b->thashlock);
671 450631 : HASHfree(b);
672 450631 : IMPSfree(b);
673 450631 : OIDXfree(b);
674 450631 : STRMPfree(b);
675 450631 : RTREEfree(b);
676 450631 : MT_lock_set(&b->theaplock);
677 450631 : if (nunique != BUN_NONE) {
678 1768 : b->tunique_est = (double) nunique;
679 : }
680 : /* wait until there are no other references to the heap; a
681 : * reference is possible in e.g. BBPsync that uses a
682 : * bat_iterator directly on the BBP_desc, i.e. without fix */
683 450631 : while (b->theap && (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1) {
684 0 : MT_lock_unset(&b->theaplock);
685 0 : MT_sleep_ms(1);
686 450631 : MT_lock_set(&b->theaplock);
687 : }
688 450631 : if (b->theap) {
689 450631 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) == 1);
690 450631 : assert(b->theap->parentid == b->batCacheid);
691 450631 : HEAPfree(b->theap, false);
692 : }
693 : /* wait until there are no other references to the heap; a
694 : * reference is possible in e.g. BBPsync that uses a
695 : * bat_iterator directly on the BBP_desc, i.e. without fix */
696 27889 : while (b->tvheap && (ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) > 1) {
697 0 : MT_lock_unset(&b->theaplock);
698 0 : MT_sleep_ms(1);
699 450631 : MT_lock_set(&b->theaplock);
700 : }
701 450631 : if (b->tvheap) {
702 27889 : assert((ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) == 1);
703 27889 : assert(b->tvheap->parentid == b->batCacheid);
704 27889 : HEAPfree(b->tvheap, false);
705 : }
706 450631 : MT_lock_unset(&b->theaplock);
707 : }
708 :
709 : /* free a cached BAT descriptor */
710 : void
711 15726244 : BATdestroy(BAT *b)
712 : {
713 15726244 : if (b->tvheap) {
714 26058 : ATOMIC_DESTROY(&b->tvheap->refs);
715 26058 : GDKfree(b->tvheap);
716 : }
717 15726244 : PROPdestroy_nolock(b);
718 15725258 : MT_lock_destroy(&b->theaplock);
719 15724868 : MT_lock_destroy(&b->batIdxLock);
720 15723362 : MT_rwlock_destroy(&b->thashlock);
721 15723580 : if (b->theap) {
722 444306 : ATOMIC_DESTROY(&b->theap->refs);
723 444306 : GDKfree(b->theap);
724 : }
725 15723441 : if (b->oldtail) {
726 0 : ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
727 : /* the bat has not been committed, so we cannot remove
728 : * the old tail file */
729 0 : HEAPdecref(b->oldtail, false);
730 0 : b->oldtail = NULL;
731 : }
732 15723441 : GDKfree(b);
733 15726654 : }
734 :
735 : /*
736 : * @+ BAT copying
737 : *
738 : * BAT copying is an often used operation. So it deserves attention.
739 : * When making a copy of a BAT, the following aspects are of
740 : * importance:
741 : *
742 : * - the requested head and tail types. The purpose of the copy may be
743 : * to slightly change these types (e.g. void <-> oid). We may also
744 : * remap between types as long as they share the same
745 : * ATOMstorage(type), i.e. the types have the same physical
746 : * implementation. We may even want to allow 'dirty' trick such as
747 : * viewing a flt-column suddenly as int.
748 : *
749 : * To allow such changes, the desired column-types is a
750 : * parameter of COLcopy.
751 : *
752 : * - access mode. If we want a read-only copy of a read-only BAT, a
753 : * VIEW may do (in this case, the user may be after just an
754 : * independent BAT header and id). This is indicated by the
755 : * parameter (writable = FALSE).
756 : *
757 : * In other cases, we really want an independent physical copy
758 : * (writable = TRUE). Changing the mode to BAT_WRITE will be a
759 : * zero-cost operation if the BAT was copied with (writable = TRUE).
760 : *
761 : * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
762 : * TRUE).
763 : *
764 : * In these cases the copy becomes a logical view on the original,
765 : * which ensures that the original cannot be modified or destroyed
766 : * (which could affect the shared heaps).
767 : */
768 : static bool
769 403 : wrongtype(int t1, int t2)
770 : {
771 : /* check if types are compatible. be extremely forgiving */
772 403 : if (t1 != TYPE_void) {
773 403 : t1 = ATOMtype(ATOMstorage(t1));
774 403 : t2 = ATOMtype(ATOMstorage(t2));
775 403 : if (t1 != t2) {
776 362 : if (ATOMvarsized(t1) ||
777 362 : ATOMvarsized(t2) ||
778 362 : t1 == TYPE_msk || t2 == TYPE_msk ||
779 362 : ATOMsize(t1) != ATOMsize(t2) ||
780 362 : BATatoms[t1].atomFix ||
781 362 : BATatoms[t2].atomFix)
782 0 : return true;
783 : }
784 : }
785 : return false;
786 : }
787 :
788 : /*
789 : * There are four main implementation cases:
790 : * (1) we are allowed to return a view (zero effort),
791 : * (2) the result is void,void (zero effort),
792 : * (3) we can copy the heaps (memcopy, or even VM page sharing)
793 : * (4) we must insert BUN-by-BUN into the result (fallback)
794 : * The latter case is still optimized for the case that the result
795 : * is bat[void,T] for a simple fixed-size type T. In that case we
796 : * do inline array[T] inserts.
797 : */
798 : BAT *
799 44277 : COLcopy(BAT *b, int tt, bool writable, role_t role)
800 : {
801 44277 : bool slowcopy = false;
802 44277 : BAT *bn = NULL;
803 44277 : BATiter bi;
804 44277 : char strhash[GDK_STRHASHSIZE];
805 :
806 44277 : BATcheck(b, NULL);
807 44277 : assert(tt != TYPE_bat);
808 :
809 : /* maybe a bit ugly to change the requested bat type?? */
810 44277 : if (b->ttype == TYPE_void && !writable)
811 44277 : tt = TYPE_void;
812 :
813 44277 : if (tt != b->ttype && wrongtype(tt, b->ttype)) {
814 0 : GDKerror("wrong tail-type requested\n");
815 0 : return NULL;
816 : }
817 :
818 : /* in case of a string bat, we save the string heap hash table
819 : * while we have the lock so that we can restore it in the copy;
820 : * this is because during our operation, a parallel thread could
821 : * be adding strings to the vheap which would modify the hash
822 : * table and that would result in buckets containing values
823 : * beyond the original vheap that we're copying */
824 44277 : MT_lock_set(&b->theaplock);
825 44277 : bi = bat_iterator_nolock(b);
826 44277 : if (ATOMstorage(b->ttype) == TYPE_str && b->tvheap->free >= GDK_STRHASHSIZE)
827 12149 : memcpy(strhash, b->tvheap->base, GDK_STRHASHSIZE);
828 :
829 : #ifndef NDEBUG
830 44277 : bi.locked = true;
831 : #endif
832 44277 : HEAPincref(bi.h);
833 44278 : if (bi.vh)
834 15482 : HEAPincref(bi.vh);
835 44278 : MT_lock_unset(&b->theaplock);
836 :
837 : /* first try case (1); create a view, possibly with different
838 : * atom-types */
839 44277 : if (!writable &&
840 44277 : role == TRANSIENT &&
841 18022 : bi.restricted == BAT_READ &&
842 13954 : ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
843 13954 : (bi.h == NULL ||
844 13954 : bi.h->parentid == b->batCacheid ||
845 3405 : BBP_desc(bi.h->parentid)->batRestricted == BAT_READ)) {
846 13953 : bn = VIEWcreate(b->hseqbase, b);
847 13954 : if (bn == NULL) {
848 0 : goto bunins_failed;
849 : }
850 13954 : if (tt != bn->ttype) {
851 38 : bn->ttype = tt;
852 38 : if (bn->tvheap && !ATOMvarsized(tt)) {
853 0 : if (bn->tvheap->parentid != bn->batCacheid)
854 0 : BBPrelease(bn->tvheap->parentid);
855 0 : HEAPdecref(bn->tvheap, false);
856 0 : bn->tvheap = NULL;
857 : }
858 38 : bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
859 : }
860 13954 : bat_iterator_end(&bi);
861 13954 : return bn;
862 : } else {
863 : /* check whether we need case (4); BUN-by-BUN copy (by
864 : * setting slowcopy to true) */
865 30324 : if (ATOMsize(tt) != ATOMsize(bi.type)) {
866 : /* oops, void materialization */
867 : slowcopy = true;
868 29962 : } else if (BATatoms[tt].atomFix) {
869 : /* oops, we need to fix/unfix atoms */
870 : slowcopy = true;
871 29962 : } else if (bi.h && bi.h->parentid != b->batCacheid &&
872 5565 : BATcapacity(BBP_desc(bi.h->parentid)) > bi.count + bi.count) {
873 : /* reduced slice view: do not copy too much
874 : * garbage */
875 : slowcopy = true;
876 24557 : } else if (bi.vh && bi.vh->parentid != b->batCacheid &&
877 10560 : BATcount(BBP_desc(bi.vh->parentid)) > bi.count + bi.count) {
878 : /* reduced vheap view: do not copy too much
879 : * garbage; this really is a heuristic since the
880 : * vheap could be used completely, even if the
881 : * offset heap is only (less than) half the size
882 : * of the parent's offset heap */
883 8199 : slowcopy = true;
884 : }
885 :
886 30324 : bn = COLnew2(b->hseqbase, tt, bi.count, role, bi.width);
887 30324 : if (bn == NULL) {
888 0 : goto bunins_failed;
889 : }
890 30324 : if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
891 : /* this combination can happen since the last
892 : * argument of COLnew2 not being zero triggers a
893 : * skip in the allocation of the tvheap */
894 12417 : if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
895 0 : goto bunins_failed;
896 : }
897 : }
898 :
899 30322 : if (tt == TYPE_void) {
900 : /* case (2): a void,void result => nothing to
901 : * copy! */
902 1114 : bn->theap->free = 0;
903 29208 : } else if (!slowcopy) {
904 : /* case (3): just copy the heaps */
905 21009 : if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
906 0 : goto bunins_failed;
907 : }
908 21011 : memcpy(bn->theap->base, bi.base, bi.hfree);
909 21011 : bn->theap->free = bi.hfree;
910 21011 : bn->theap->dirty = true;
911 21011 : if (bn->tvheap) {
912 9496 : memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
913 9496 : bn->tvheap->free = bi.vhfree;
914 9496 : bn->tvheap->dirty = true;
915 9496 : if (ATOMstorage(b->ttype) == TYPE_str && bi.vhfree >= GDK_STRHASHSIZE)
916 8744 : memcpy(bn->tvheap->base, strhash, GDK_STRHASHSIZE);
917 : }
918 :
919 : /* make sure we use the correct capacity */
920 21011 : if (ATOMstorage(bn->ttype) == TYPE_msk)
921 0 : bn->batCapacity = (BUN) (bn->theap->size * 8);
922 21011 : else if (bn->ttype)
923 21011 : bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
924 : else
925 0 : bn->batCapacity = 0;
926 16396 : } else if (BATatoms[tt].atomFix || tt != TYPE_void || ATOMextern(tt)) {
927 : /* case (4): one-by-one BUN insert (really slow) */
928 8199 : lng timeoffset = 0;
929 8199 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
930 8199 : if (qry_ctx != NULL) {
931 6489 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
932 : }
933 :
934 7367313 : TIMEOUT_LOOP_IDX_DECL(p, bi.count, timeoffset) {
935 7342263 : const void *t = BUNtail(bi, p);
936 :
937 7341152 : if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
938 0 : goto bunins_failed;
939 : }
940 : }
941 8197 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
942 8197 : bn->theap->dirty |= bi.count > 0;
943 : } else if (tt != TYPE_void && bi.type == TYPE_void) {
944 : /* case (4): optimized for unary void
945 : * materialization */
946 : oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
947 : const oid inc = !is_oid_nil(cur);
948 :
949 : bn->theap->free = bi.count * sizeof(oid);
950 : bn->theap->dirty |= bi.count > 0;
951 : for (BUN p = 0; p < bi.count; p++) {
952 : dst[p] = cur;
953 : cur += inc;
954 : }
955 : } else if (ATOMstorage(bi.type) == TYPE_msk) {
956 : /* convert number of bits to number of bytes,
957 : * and round the latter up to a multiple of
958 : * 4 (copy in units of 4 bytes) */
959 : bn->theap->free = ((bi.count + 31) / 32) * 4;
960 : bn->theap->dirty |= bi.count > 0;
961 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
962 : } else {
963 : /* case (4): optimized for simple array copy */
964 : bn->theap->free = bi.count << bn->tshift;
965 : bn->theap->dirty |= bi.count > 0;
966 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
967 : }
968 : /* copy all properties (size+other) from the source bat */
969 30322 : BATsetcount(bn, bi.count);
970 : }
971 : /* set properties (note that types may have changed in the copy) */
972 60284 : if (ATOMtype(tt) == ATOMtype(bi.type)) {
973 30320 : if (ATOMtype(tt) == TYPE_oid) {
974 7796 : BATtseqbase(bn, bi.tseq);
975 : } else {
976 22524 : BATtseqbase(bn, oid_nil);
977 : }
978 30321 : BATkey(bn, bi.key);
979 30321 : bn->tsorted = bi.sorted;
980 30321 : bn->trevsorted = bi.revsorted;
981 30321 : bn->tnorevsorted = bi.norevsorted;
982 30321 : if (bi.nokey[0] != bi.nokey[1]) {
983 413 : bn->tnokey[0] = bi.nokey[0];
984 413 : bn->tnokey[1] = bi.nokey[1];
985 : } else {
986 29908 : bn->tnokey[0] = bn->tnokey[1] = 0;
987 : }
988 30321 : bn->tnosorted = bi.nosorted;
989 30321 : bn->tnonil = bi.nonil;
990 30321 : bn->tnil = bi.nil;
991 30321 : bn->tminpos = bi.minpos;
992 30321 : bn->tmaxpos = bi.maxpos;
993 30321 : bn->tunique_est = bi.unique_est;
994 3 : } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
995 3 : ATOMcompare(tt) == ATOMcompare(b->ttype)) {
996 3 : BUN h = bi.count;
997 3 : bn->tsorted = bi.sorted;
998 3 : bn->trevsorted = bi.revsorted;
999 3 : if (bi.key)
1000 2 : BATkey(bn, true);
1001 3 : bn->tnonil = bi.nonil;
1002 3 : bn->tnil = bi.nil;
1003 3 : if (bi.nosorted > 0 && bi.nosorted < h)
1004 1 : bn->tnosorted = bi.nosorted;
1005 : else
1006 2 : bn->tnosorted = 0;
1007 3 : if (bi.norevsorted > 0 && bi.norevsorted < h)
1008 3 : bn->tnorevsorted = bi.norevsorted;
1009 : else
1010 0 : bn->tnorevsorted = 0;
1011 3 : if (bi.nokey[0] < h &&
1012 3 : bi.nokey[1] < h &&
1013 : bi.nokey[0] != bi.nokey[1]) {
1014 0 : bn->tnokey[0] = bi.nokey[0];
1015 0 : bn->tnokey[1] = bi.nokey[1];
1016 : } else {
1017 3 : bn->tnokey[0] = bn->tnokey[1] = 0;
1018 : }
1019 3 : bn->tminpos = bi.minpos;
1020 3 : bn->tmaxpos = bi.maxpos;
1021 3 : bn->tunique_est = bi.unique_est;
1022 : } else {
1023 0 : bn->tsorted = bn->trevsorted = false; /* set based on count later */
1024 0 : bn->tnonil = bn->tnil = false;
1025 0 : bn->tnosorted = bn->tnorevsorted = 0;
1026 0 : bn->tnokey[0] = bn->tnokey[1] = 0;
1027 : }
1028 30324 : if (BATcount(bn) <= 1) {
1029 6014 : bn->tsorted = ATOMlinear(b->ttype);
1030 6014 : bn->trevsorted = ATOMlinear(b->ttype);
1031 6014 : bn->tkey = true;
1032 : }
1033 30324 : bat_iterator_end(&bi);
1034 30323 : if (!writable)
1035 4068 : bn->batRestricted = BAT_READ;
1036 30323 : TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
1037 : ALGOBATPAR(b), ALGOBATPAR(bn));
1038 : return bn;
1039 0 : bunins_failed:
1040 0 : bat_iterator_end(&bi);
1041 0 : BBPreclaim(bn);
1042 : return NULL;
1043 : }
1044 :
1045 : /* Append an array of values of length count to the bat. For
1046 : * fixed-sized values, `values' is an array of values, for
1047 : * variable-sized values, `values' is an array of pointers to values.
1048 : * If values equals NULL, count times nil will be appended. */
1049 : gdk_return
1050 63765029 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
1051 : {
1052 63765029 : BUN p;
1053 63765029 : BUN nunique = 0;
1054 :
1055 63765029 : BATcheck(b, GDK_FAIL);
1056 :
1057 63765029 : assert(!VIEWtparent(b));
1058 :
1059 63765029 : if (count == 0)
1060 : return GDK_SUCCEED;
1061 :
1062 63767334 : TRC_DEBUG(ALGO, ALGOBATFMT " appending " BUNFMT " values%s\n", ALGOBATPAR(b), count, values ? "" : " (all nil)");
1063 :
1064 63766299 : p = BATcount(b); /* insert at end */
1065 63766299 : if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
1066 0 : GDKerror("bat too large\n");
1067 0 : return GDK_FAIL;
1068 : }
1069 :
1070 63766299 : ALIGNapp(b, force, GDK_FAIL);
1071 : /* load hash so that we can maintain it */
1072 63775550 : (void) BATcheckhash(b);
1073 :
1074 63806907 : if (b->ttype == TYPE_void && BATtdense(b)) {
1075 0 : const oid *ovals = values;
1076 0 : bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
1077 0 : if (ovals) {
1078 0 : for (BUN i = 1; dense && i < count; i++) {
1079 0 : dense = ovals[i - 1] + 1 == ovals[i];
1080 : }
1081 : }
1082 0 : if (dense) {
1083 0 : MT_lock_set(&b->theaplock);
1084 0 : if (b->batCount == 0)
1085 0 : b->tseqbase = ovals ? ovals[0] : oid_nil;
1086 0 : BATsetcount(b, BATcount(b) + count);
1087 0 : MT_lock_unset(&b->theaplock);
1088 0 : return GDK_SUCCEED;
1089 : } else {
1090 : /* we need to materialize b; allocate enough capacity */
1091 0 : if (BATmaterialize(b, BATcount(b) + count) != GDK_SUCCEED)
1092 : return GDK_FAIL;
1093 : }
1094 : }
1095 :
1096 63806907 : if (unshare_varsized_heap(b) != GDK_SUCCEED) {
1097 : return GDK_FAIL;
1098 : }
1099 :
1100 63804485 : if (BATcount(b) + count > BATcapacity(b)) {
1101 : /* if needed space exceeds a normal growth extend just
1102 : * with what's needed */
1103 11163 : BUN ncap = BATcount(b) + count;
1104 11163 : BUN grows = BATgrows(b);
1105 :
1106 11163 : if (ncap > grows)
1107 : grows = ncap;
1108 11163 : gdk_return rc = BATextend(b, grows);
1109 11162 : if (rc != GDK_SUCCEED)
1110 : return rc;
1111 : }
1112 :
1113 63804484 : const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1114 63804484 : MT_lock_set(&b->theaplock);
1115 63777276 : BATiter bi = bat_iterator_nolock(b);
1116 63777276 : const ValRecord *prop;
1117 63777276 : ValRecord minprop, maxprop;
1118 63777276 : const void *minbound = NULL, *maxbound = NULL;
1119 63777287 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1120 11 : VALcopy(&minprop, prop) != NULL)
1121 11 : minbound = VALptr(&minprop);
1122 63810548 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1123 10 : VALcopy(&maxprop, prop) != NULL)
1124 10 : maxbound = VALptr(&maxprop);
1125 63805986 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1126 63793279 : MT_lock_unset(&b->theaplock);
1127 63809122 : MT_rwlock_wrlock(&b->thashlock);
1128 63812906 : if (values && b->ttype) {
1129 63802102 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1130 63802102 : const void *atomnil = ATOMnilptr(b->ttype);
1131 63802102 : const void *minvalp = NULL, *maxvalp = NULL;
1132 63802102 : if (b->tvheap) {
1133 29481315 : if (bi.minpos != BUN_NONE)
1134 26643536 : minvalp = BUNtvar(bi, bi.minpos);
1135 29479551 : if (bi.maxpos != BUN_NONE)
1136 26642063 : maxvalp = BUNtvar(bi, bi.maxpos);
1137 29479374 : const void *vbase = b->tvheap->base;
1138 58981703 : for (BUN i = 0; i < count; i++) {
1139 29504093 : t = ((void **) values)[i];
1140 29504093 : bool isnil = atomcmp(t, atomnil) == 0;
1141 29501366 : gdk_return rc;
1142 29501366 : if (notnull && isnil) {
1143 0 : assert(0);
1144 : GDKerror("NULL value not within bounds\n");
1145 : rc = GDK_FAIL;
1146 29501366 : } else if (minbound &&
1147 29501366 : !isnil &&
1148 0 : atomcmp(t, minbound) < 0) {
1149 0 : assert(0);
1150 : GDKerror("value not within bounds\n");
1151 : rc = GDK_FAIL;
1152 29501366 : } else if (maxbound &&
1153 0 : !isnil &&
1154 0 : atomcmp(t, maxbound) >= 0) {
1155 0 : assert(0);
1156 : GDKerror("value not within bounds\n");
1157 : rc = GDK_FAIL;
1158 : } else {
1159 29501366 : rc = tfastins_nocheckVAR(b, p, t);
1160 : }
1161 29503149 : if (rc != GDK_SUCCEED) {
1162 0 : MT_rwlock_wrunlock(&b->thashlock);
1163 0 : if (minbound)
1164 0 : VALclear(&minprop);
1165 0 : if (maxbound)
1166 0 : VALclear(&maxprop);
1167 0 : return rc;
1168 : }
1169 29503149 : if (vbase != b->tvheap->base) {
1170 : /* tvheap changed location, so
1171 : * pointers may need to be
1172 : * updated (not if they were
1173 : * initialized from t below, but
1174 : * we don't know) */
1175 3541 : BUN minpos = bi.minpos;
1176 3541 : BUN maxpos = bi.maxpos;
1177 3541 : MT_lock_set(&b->theaplock);
1178 3541 : bi = bat_iterator_nolock(b);
1179 3541 : MT_lock_unset(&b->theaplock);
1180 3541 : bi.minpos = minpos;
1181 3541 : bi.maxpos = maxpos;
1182 3541 : vbase = b->tvheap->base;
1183 3541 : if (bi.minpos != BUN_NONE)
1184 2647 : minvalp = BUNtvar(bi, bi.minpos);
1185 3541 : if (bi.maxpos != BUN_NONE)
1186 2646 : maxvalp = BUNtvar(bi, bi.maxpos);
1187 : }
1188 29503149 : if (!isnil) {
1189 25867217 : if (p == 0) {
1190 202180 : bi.minpos = bi.maxpos = 0;
1191 202180 : minvalp = maxvalp = t;
1192 : } else {
1193 50265500 : if (bi.minpos != BUN_NONE &&
1194 24600725 : atomcmp(minvalp, t) > 0) {
1195 73474 : bi.minpos = p;
1196 73474 : minvalp = t;
1197 : }
1198 50262842 : if (bi.maxpos != BUN_NONE &&
1199 24598625 : atomcmp(maxvalp, t) < 0) {
1200 2248142 : bi.maxpos = p;
1201 2248142 : maxvalp = t;
1202 : }
1203 : }
1204 : }
1205 29502329 : p++;
1206 : }
1207 29477610 : if (minbound)
1208 0 : VALclear(&minprop);
1209 29477856 : if (maxbound)
1210 0 : VALclear(&maxprop);
1211 29477811 : if (b->thash) {
1212 6187 : p -= count;
1213 12374 : for (BUN i = 0; i < count; i++) {
1214 6187 : t = ((void **) values)[i];
1215 6187 : HASHappend_locked(b, p, t);
1216 6187 : p++;
1217 : }
1218 6187 : nunique = b->thash ? b->thash->nunique : 0;
1219 : }
1220 34320787 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1221 12757 : bi.minpos = bi.maxpos = BUN_NONE;
1222 12757 : minvalp = maxvalp = NULL;
1223 25514 : for (BUN i = 0; i < count; i++) {
1224 12757 : t = (void *) ((char *) values + (i << b->tshift));
1225 12757 : mskSetVal(b, p, *(msk *) t);
1226 12757 : p++;
1227 : }
1228 : } else {
1229 34308030 : if (bi.minpos != BUN_NONE)
1230 32853116 : minvalp = BUNtloc(bi, bi.minpos);
1231 34308030 : if (bi.maxpos != BUN_NONE)
1232 32894831 : maxvalp = BUNtloc(bi, bi.maxpos);
1233 68667569 : for (BUN i = 0; i < count; i++) {
1234 34373768 : t = (void *) ((char *) values + (i << b->tshift));
1235 34373768 : gdk_return rc = tfastins_nocheckFIX(b, p, t);
1236 34366310 : if (rc != GDK_SUCCEED) {
1237 0 : MT_rwlock_wrunlock(&b->thashlock);
1238 0 : return rc;
1239 : }
1240 34366310 : if (b->thash) {
1241 477491 : HASHappend_locked(b, p, t);
1242 : }
1243 34366310 : if (atomcmp(t, atomnil) != 0) {
1244 33369755 : if (p == 0) {
1245 367376 : bi.minpos = bi.maxpos = 0;
1246 367376 : minvalp = maxvalp = t;
1247 : } else {
1248 65886001 : if (bi.minpos != BUN_NONE &&
1249 32884276 : atomcmp(minvalp, t) > 0) {
1250 35050 : bi.minpos = p;
1251 35050 : minvalp = t;
1252 : }
1253 65924906 : if (bi.maxpos != BUN_NONE &&
1254 32925277 : atomcmp(maxvalp, t) < 0) {
1255 7983316 : bi.maxpos = p;
1256 7983316 : maxvalp = t;
1257 : }
1258 : }
1259 : }
1260 34359539 : p++;
1261 : }
1262 34293801 : nunique = b->thash ? b->thash->nunique : 0;
1263 : }
1264 : } else {
1265 25230 : for (BUN i = 0; i < count; i++) {
1266 14427 : gdk_return rc = tfastins_nocheck(b, p, t);
1267 14426 : if (rc != GDK_SUCCEED) {
1268 0 : MT_rwlock_wrunlock(&b->thashlock);
1269 0 : return rc;
1270 : }
1271 14426 : if (b->thash) {
1272 0 : HASHappend_locked(b, p, t);
1273 : }
1274 14426 : p++;
1275 : }
1276 10803 : nunique = b->thash ? b->thash->nunique : 0;
1277 : }
1278 63795172 : MT_lock_set(&b->theaplock);
1279 63791511 : b->tminpos = bi.minpos;
1280 63791511 : b->tmaxpos = bi.maxpos;
1281 63791511 : if (count > BATcount(b) / gdk_unique_estimate_keep_fraction)
1282 16651449 : b->tunique_est = 0;
1283 :
1284 63791511 : if (b->ttype == TYPE_oid) {
1285 : /* spend extra effort on oid (possible candidate list) */
1286 365435 : if (values == NULL || is_oid_nil(((oid *) values)[0])) {
1287 156 : b->tnil = true;
1288 156 : b->tnonil = false;
1289 156 : b->tsorted = false;
1290 156 : b->trevsorted = false;
1291 156 : b->tkey = false;
1292 156 : b->tseqbase = oid_nil;
1293 : } else {
1294 365279 : if (b->batCount == 0) {
1295 5621 : b->tsorted = true;
1296 5621 : b->trevsorted = true;
1297 5621 : b->tkey = true;
1298 5621 : b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
1299 5621 : b->tnil = false;
1300 5621 : b->tnonil = true;
1301 : } else {
1302 359658 : if (!is_oid_nil(b->tseqbase) &&
1303 208762 : (count > 1 ||
1304 208762 : b->tseqbase + b->batCount != ((oid *) values)[0]))
1305 1692 : b->tseqbase = oid_nil;
1306 359658 : if (b->tsorted && !is_oid_nil(((oid *) b->theap->base)[b->batCount - 1]) && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
1307 44 : b->tsorted = false;
1308 44 : if (b->tnosorted == 0)
1309 44 : b->tnosorted = b->batCount;
1310 : }
1311 359658 : if (b->trevsorted && !is_oid_nil(((oid *) values)[0]) && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
1312 4845 : b->trevsorted = false;
1313 4845 : if (b->tnorevsorted == 0)
1314 4845 : b->tnorevsorted = b->batCount;
1315 : }
1316 359658 : if (b->tkey) {
1317 359318 : if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
1318 17 : b->tkey = false;
1319 17 : if (b->tnokey[1] == 0) {
1320 17 : b->tnokey[0] = b->batCount - 1;
1321 17 : b->tnokey[1] = b->batCount;
1322 : }
1323 359301 : } else if (!b->tsorted && !b->trevsorted)
1324 32 : b->tkey = false;
1325 : }
1326 : }
1327 365279 : for (BUN i = 1; i < count; i++) {
1328 0 : if (is_oid_nil(((oid *) values)[i])) {
1329 0 : b->tnil = true;
1330 0 : b->tnonil = false;
1331 0 : b->tsorted = false;
1332 0 : b->trevsorted = false;
1333 0 : b->tkey = false;
1334 0 : b->tseqbase = oid_nil;
1335 0 : break;
1336 : }
1337 0 : if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
1338 0 : b->tkey = false;
1339 0 : if (b->tnokey[1] == 0) {
1340 0 : b->tnokey[0] = b->batCount + i - 1;
1341 0 : b->tnokey[1] = b->batCount + i;
1342 : }
1343 0 : } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
1344 0 : b->tsorted = false;
1345 0 : if (b->tnosorted == 0)
1346 0 : b->tnosorted = b->batCount + i;
1347 0 : if (!b->trevsorted)
1348 0 : b->tkey = false;
1349 : } else {
1350 0 : if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
1351 0 : b->tseqbase = oid_nil;
1352 0 : b->trevsorted = false;
1353 0 : if (b->tnorevsorted == 0)
1354 0 : b->tnorevsorted = b->batCount + i;
1355 0 : if (!b->tsorted)
1356 0 : b->tkey = false;
1357 : }
1358 : }
1359 : }
1360 63426076 : } else if (!ATOMlinear(b->ttype)) {
1361 12757 : b->tnil = b->tnonil = false;
1362 12757 : b->tsorted = b->trevsorted = b->tkey = false;
1363 63413319 : } else if (b->batCount == 0) {
1364 572549 : if (values == NULL) {
1365 0 : b->tsorted = b->trevsorted = true;
1366 0 : b->tkey = count == 1;
1367 0 : b->tnil = true;
1368 0 : b->tnonil = false;
1369 0 : b->tunique_est = 1;
1370 : } else {
1371 572549 : int c;
1372 572549 : b->tnil = b->tnonil = false;
1373 572549 : switch (count) {
1374 571990 : case 1:
1375 571990 : b->tsorted = b->trevsorted = b->tkey = true;
1376 571990 : b->tunique_est = 1;
1377 571990 : break;
1378 82 : case 2:
1379 82 : if (b->tvheap)
1380 25 : c = ATOMcmp(b->ttype,
1381 : ((void **) values)[0],
1382 : ((void **) values)[1]);
1383 : else
1384 57 : c = ATOMcmp(b->ttype,
1385 : values,
1386 : (char *) values + b->twidth);
1387 82 : b->tsorted = c <= 0;
1388 82 : b->tnosorted = !b->tsorted;
1389 82 : b->trevsorted = c >= 0;
1390 82 : b->tnorevsorted = !b->trevsorted;
1391 82 : b->tkey = c != 0;
1392 82 : b->tnokey[0] = 0;
1393 82 : b->tnokey[1] = !b->tkey;
1394 82 : b->tunique_est = (double) (1 + b->tkey);
1395 82 : break;
1396 477 : default:
1397 477 : b->tsorted = b->trevsorted = b->tkey = false;
1398 477 : break;
1399 : }
1400 : }
1401 62840770 : } else if (b->batCount == 1 && count == 1) {
1402 413640 : bi = bat_iterator_nolock(b);
1403 413640 : t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1404 413640 : if (values != NULL) {
1405 413700 : if (b->tvheap)
1406 152774 : t = ((void **) values)[0];
1407 : else
1408 : t = values;
1409 : }
1410 413640 : int c = ATOMcmp(b->ttype, BUNtail(bi, 0), t);
1411 413470 : b->tsorted = c <= 0;
1412 413470 : b->tnosorted = !b->tsorted;
1413 413470 : b->trevsorted = c >= 0;
1414 413470 : b->tnorevsorted = !b->trevsorted;
1415 413470 : b->tkey = c != 0;
1416 413470 : b->tnokey[0] = 0;
1417 413470 : b->tnokey[1] = !b->tkey;
1418 413470 : b->tunique_est = (double) (1 + b->tkey);
1419 413470 : b->tnil |= values == NULL;
1420 413470 : b->tnonil = false;
1421 : } else {
1422 62427130 : b->tnil |= values == NULL;
1423 62427130 : b->tnonil = false;
1424 62427130 : b->tsorted = b->trevsorted = b->tkey = false;
1425 : }
1426 63791341 : BATsetcount(b, p);
1427 63783932 : if (nunique != 0)
1428 483678 : b->tunique_est = (double) nunique;
1429 63783932 : MT_lock_unset(&b->theaplock);
1430 63793168 : MT_rwlock_wrunlock(&b->thashlock);
1431 :
1432 63802282 : IMPSdestroy(b); /* no support for inserts in imprints yet */
1433 63794024 : OIDXdestroy(b);
1434 63786142 : STRMPdestroy(b); /* TODO: use STRMPappendBitstring */
1435 63778641 : RTREEdestroy(b);
1436 63778641 : return GDK_SUCCEED;
1437 : }
1438 :
1439 : /* Append a single value to the bat. */
1440 : gdk_return
1441 48405314 : BUNappend(BAT *b, const void *t, bool force)
1442 : {
1443 48405314 : return BUNappendmulti(b, b->ttype && b->tvheap ? (const void *) &t : (const void *) t, 1, force);
1444 : }
1445 :
1446 : gdk_return
1447 4 : BUNdelete(BAT *b, oid o)
1448 : {
1449 4 : BUN p;
1450 4 : BATiter bi = bat_iterator_nolock(b);
1451 4 : const void *val;
1452 4 : bool locked = false;
1453 4 : BUN nunique;
1454 :
1455 4 : assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
1456 4 : if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
1457 : /* value already not there */
1458 : return GDK_SUCCEED;
1459 : }
1460 4 : assert(BATcount(b) > 0); /* follows from "if" above */
1461 4 : p = o - b->hseqbase;
1462 4 : if (p < b->batInserted) {
1463 0 : GDKerror("cannot delete committed value\n");
1464 0 : return GDK_FAIL;
1465 : }
1466 4 : TRC_DEBUG(ALGO, ALGOBATFMT " deleting oid " OIDFMT "\n", ALGOBATPAR(b), o);
1467 : /* load hash so that we can maintain it */
1468 4 : (void) BATcheckhash(b);
1469 :
1470 4 : val = BUNtail(bi, p);
1471 : /* writing the values should be locked, reading could be done
1472 : * unlocked (since we're the only thread that should be changing
1473 : * anything) */
1474 4 : MT_lock_set(&b->theaplock);
1475 4 : if (b->tmaxpos == p)
1476 1 : b->tmaxpos = BUN_NONE;
1477 4 : if (b->tminpos == p)
1478 0 : b->tminpos = BUN_NONE;
1479 4 : MT_lock_unset(&b->theaplock);
1480 4 : if (ATOMunfix(b->ttype, val) != GDK_SUCCEED)
1481 : return GDK_FAIL;
1482 4 : nunique = HASHdelete(&bi, p, val);
1483 4 : ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
1484 4 : if (p != BATcount(b) - 1 &&
1485 2 : (b->ttype != TYPE_void || BATtdense(b))) {
1486 : /* replace to-be-delete BUN with last BUN; materialize
1487 : * void column before doing so */
1488 2 : if (b->ttype == TYPE_void &&
1489 0 : BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1490 : return GDK_FAIL;
1491 2 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1492 0 : msk mval = mskGetVal(b, BATcount(b) - 1);
1493 0 : assert(b->thash == NULL);
1494 0 : mskSetVal(b, p, mval);
1495 : /* don't leave garbage */
1496 0 : mskClr(b, BATcount(b) - 1);
1497 : } else {
1498 2 : val = Tloc(b, BATcount(b) - 1);
1499 2 : nunique = HASHdelete(&bi, BATcount(b) - 1, val);
1500 2 : memcpy(Tloc(b, p), val, b->twidth);
1501 2 : nunique = HASHinsert(&bi, p, val);
1502 2 : MT_lock_set(&b->theaplock);
1503 2 : locked = true;
1504 2 : if (b->tminpos == BATcount(b) - 1)
1505 0 : b->tminpos = p;
1506 2 : if (b->tmaxpos == BATcount(b) - 1)
1507 1 : b->tmaxpos = p;
1508 : }
1509 : /* no longer sorted */
1510 2 : if (!locked) {
1511 0 : MT_lock_set(&b->theaplock);
1512 0 : locked = true;
1513 : }
1514 2 : b->tsorted = b->trevsorted = false;
1515 2 : b->theap->dirty = true;
1516 : }
1517 4 : if (!locked)
1518 2 : MT_lock_set(&b->theaplock);
1519 4 : if (b->tnosorted >= p)
1520 0 : b->tnosorted = 0;
1521 4 : if (b->tnorevsorted >= p)
1522 1 : b->tnorevsorted = 0;
1523 4 : b->batCount--;
1524 4 : if (nunique != 0)
1525 0 : b->tunique_est = (double) nunique;
1526 4 : else if (BATcount(b) < gdk_unique_estimate_keep_fraction)
1527 4 : b->tunique_est = 0;
1528 4 : if (b->batCount <= 1) {
1529 : /* some trivial properties */
1530 0 : b->tkey = true;
1531 0 : b->tsorted = b->trevsorted = true;
1532 0 : b->tnosorted = b->tnorevsorted = 0;
1533 0 : if (b->batCount == 0) {
1534 0 : b->tnil = false;
1535 0 : b->tnonil = true;
1536 : }
1537 : }
1538 4 : MT_lock_unset(&b->theaplock);
1539 4 : IMPSdestroy(b);
1540 4 : OIDXdestroy(b);
1541 4 : return GDK_SUCCEED;
1542 : }
1543 :
1544 : /* @- BUN replace
1545 : * The last operation in this context is BUN replace. It assumes that
1546 : * the header denotes a key. The old value association is destroyed
1547 : * (if it exists in the first place) and the new value takes its
1548 : * place.
1549 : *
1550 : * In order to make updates on void columns workable; replaces on them
1551 : * are always done in-place. Performing them without bun-movements
1552 : * greatly simplifies the problem. The 'downside' is that when
1553 : * transaction management has to be performed, replaced values should
1554 : * be saved explicitly.
1555 : */
1556 : static gdk_return
1557 1065515 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
1558 : {
1559 1065515 : BUN prv, nxt;
1560 1065515 : const void *val;
1561 1065515 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1562 1065515 : const void *atomnil = ATOMnilptr(b->ttype);
1563 :
1564 1065515 : MT_lock_set(&b->theaplock);
1565 1065542 : BUN last = BATcount(b) - 1;
1566 1065542 : BATiter bi = bat_iterator_nolock(b);
1567 : /* zap alignment info */
1568 1065542 : if (!force && (b->batRestricted != BAT_WRITE ||
1569 547086 : ((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1))) {
1570 0 : MT_lock_unset(&b->theaplock);
1571 0 : GDKerror("access denied to %s, aborting.\n",
1572 : BATgetId(b));
1573 0 : assert(0);
1574 : return GDK_FAIL;
1575 : }
1576 1065542 : TRC_DEBUG(ALGO, ALGOBATFMT " replacing " BUNFMT " values\n", ALGOBATPAR(b), count);
1577 1065417 : if (b->ttype == TYPE_void) {
1578 0 : PROPdestroy(b);
1579 0 : b->tminpos = BUN_NONE;
1580 0 : b->tmaxpos = BUN_NONE;
1581 0 : b->tunique_est = 0.0;
1582 1065417 : } else if (count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1583 530152 : b->tunique_est = 0;
1584 : }
1585 1065417 : const ValRecord *prop;
1586 1065417 : ValRecord minprop, maxprop;
1587 1065417 : const void *minbound = NULL, *maxbound = NULL;
1588 1065417 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1589 0 : VALcopy(&minprop, prop) != NULL)
1590 0 : minbound = VALptr(&minprop);
1591 1065474 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1592 0 : VALcopy(&maxprop, prop) != NULL)
1593 0 : maxbound = VALptr(&maxprop);
1594 1065455 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1595 1065254 : MT_lock_unset(&b->theaplock);
1596 : /* load hash so that we can maintain it */
1597 1065458 : (void) BATcheckhash(b);
1598 1065524 : MT_rwlock_wrlock(&b->thashlock);
1599 2130835 : for (BUN i = 0; i < count; i++) {
1600 1065510 : BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
1601 1065568 : const void *t = b->ttype && b->tvheap ?
1602 1209790 : ((const void **) values)[i] :
1603 921230 : (const void *) ((const char *) values + (i << b->tshift));
1604 1065510 : bool isnil = atomnil && atomcmp(t, atomnil) == 0;
1605 1065170 : if (notnull && isnil) {
1606 0 : assert(0);
1607 : GDKerror("NULL value not within bounds\n");
1608 : MT_rwlock_wrunlock(&b->thashlock);
1609 : goto bailout;
1610 1065170 : } else if (!isnil &&
1611 78 : ((minbound &&
1612 1018760 : atomcmp(t, minbound) < 0) ||
1613 0 : (maxbound &&
1614 0 : atomcmp(t, maxbound) >= 0))) {
1615 0 : assert(0);
1616 : GDKerror("value not within bounds\n");
1617 : MT_rwlock_wrunlock(&b->thashlock);
1618 : goto bailout;
1619 : }
1620 :
1621 : /* retrieve old value, but if this comes from the
1622 : * logger, we need to deal with offsets that point
1623 : * outside of the valid vheap */
1624 1065248 : if (b->ttype == TYPE_void) {
1625 0 : val = BUNtpos(bi, p);
1626 1065248 : } else if (bi.type == TYPE_msk) {
1627 1840 : val = BUNtmsk(bi, p);
1628 1063408 : } else if (b->tvheap) {
1629 144227 : size_t off = BUNtvaroff(bi, p);
1630 144230 : if (off < bi.vhfree)
1631 144224 : val = bi.vh->base + off;
1632 : else
1633 : val = NULL; /* bad offset */
1634 : } else {
1635 919181 : val = BUNtloc(bi, p);
1636 : }
1637 :
1638 1065245 : if (val) {
1639 1065245 : if (atomcmp(val, t) == 0)
1640 240499 : continue; /* nothing to do */
1641 824795 : if (!isnil &&
1642 10082 : b->tnil &&
1643 10084 : atomcmp(val, atomnil) == 0) {
1644 : /* if old value is nil and new value
1645 : * isn't, we're not sure anymore about
1646 : * the nil property, so we must clear
1647 : * it */
1648 10081 : MT_lock_set(&b->theaplock);
1649 10077 : b->tnil = false;
1650 10077 : MT_lock_unset(&b->theaplock);
1651 : }
1652 824795 : if (b->ttype != TYPE_void) {
1653 824798 : if (bi.maxpos != BUN_NONE) {
1654 514866 : if (!isnil && atomcmp(BUNtail(bi, bi.maxpos), t) < 0) {
1655 : /* new value is larger
1656 : * than previous
1657 : * largest */
1658 53804 : bi.maxpos = p;
1659 461063 : } else if (bi.maxpos == p && atomcmp(BUNtail(bi, bi.maxpos), t) != 0) {
1660 : /* old value is equal to
1661 : * largest and new value
1662 : * is smaller or nil (see
1663 : * above), so we don't
1664 : * know anymore which is
1665 : * the largest */
1666 478 : bi.maxpos = BUN_NONE;
1667 : }
1668 : }
1669 824799 : if (bi.minpos != BUN_NONE) {
1670 363117 : if (!isnil && atomcmp(BUNtail(bi, bi.minpos), t) > 0) {
1671 : /* new value is smaller
1672 : * than previous
1673 : * smallest */
1674 423 : bi.minpos = p;
1675 362691 : } else if (bi.minpos == p && atomcmp(BUNtail(bi, bi.minpos), t) != 0) {
1676 : /* old value is equal to
1677 : * smallest and new value
1678 : * is larger or nil (see
1679 : * above), so we don't
1680 : * know anymore which is
1681 : * the largest */
1682 611 : bi.minpos = BUN_NONE;
1683 : }
1684 : }
1685 : }
1686 824793 : HASHdelete_locked(&bi, p, val); /* first delete old value from hash */
1687 : } else {
1688 : /* out of range old value, so the properties and
1689 : * hash cannot be trusted */
1690 6 : PROPdestroy(b);
1691 0 : Hash *hs = b->thash;
1692 0 : if (hs) {
1693 0 : b->thash = NULL;
1694 0 : doHASHdestroy(b, hs);
1695 : }
1696 0 : MT_lock_set(&b->theaplock);
1697 0 : bi.minpos = BUN_NONE;
1698 0 : bi.maxpos = BUN_NONE;
1699 0 : b->tunique_est = 0.0;
1700 0 : MT_lock_unset(&b->theaplock);
1701 : }
1702 824817 : OIDXdestroy(b);
1703 824856 : IMPSdestroy(b);
1704 824845 : STRMPdestroy(b);
1705 824826 : RTREEdestroy(b);
1706 :
1707 824851 : if (b->tvheap && b->ttype) {
1708 72473 : var_t _d;
1709 72473 : ptr _ptr;
1710 72473 : _ptr = BUNtloc(bi, p);
1711 72473 : switch (b->twidth) {
1712 7947 : case 1:
1713 7947 : _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
1714 7947 : break;
1715 59204 : case 2:
1716 59204 : _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
1717 59204 : break;
1718 5322 : case 4:
1719 5322 : _d = (var_t) * (uint32_t *) _ptr;
1720 5322 : break;
1721 : #if SIZEOF_VAR_T == 8
1722 0 : case 8:
1723 0 : _d = (var_t) * (uint64_t *) _ptr;
1724 0 : break;
1725 : #endif
1726 : default:
1727 0 : MT_UNREACHABLE();
1728 : }
1729 72473 : MT_lock_set(&b->theaplock);
1730 72467 : if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
1731 0 : MT_lock_unset(&b->theaplock);
1732 0 : MT_rwlock_wrunlock(&b->thashlock);
1733 0 : goto bailout;
1734 : }
1735 72496 : MT_lock_unset(&b->theaplock);
1736 72500 : if (b->twidth < SIZEOF_VAR_T &&
1737 72497 : (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
1738 : /* doesn't fit in current heap, upgrade it */
1739 78 : if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
1740 0 : MT_rwlock_wrunlock(&b->thashlock);
1741 0 : goto bailout;
1742 : }
1743 : }
1744 : /* reinitialize iterator after possible heap upgrade */
1745 : {
1746 : /* save and restore minpos/maxpos */
1747 72500 : BUN minpos = bi.minpos;
1748 72500 : BUN maxpos = bi.maxpos;
1749 72500 : bi = bat_iterator_nolock(b);
1750 72500 : bi.minpos = minpos;
1751 72500 : bi.maxpos = maxpos;
1752 : }
1753 72500 : _ptr = BUNtloc(bi, p);
1754 72500 : switch (b->twidth) {
1755 7877 : case 1:
1756 7877 : * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
1757 7877 : break;
1758 59293 : case 2:
1759 59293 : * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
1760 59293 : break;
1761 5330 : case 4:
1762 5330 : * (uint32_t *) _ptr = (uint32_t) _d;
1763 5330 : break;
1764 : #if SIZEOF_VAR_T == 8
1765 0 : case 8:
1766 0 : * (uint64_t *) _ptr = (uint64_t) _d;
1767 0 : break;
1768 : #endif
1769 : default:
1770 72500 : MT_UNREACHABLE();
1771 : }
1772 752378 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1773 1840 : mskSetVal(b, p, * (msk *) t);
1774 : } else {
1775 750538 : assert(BATatoms[b->ttype].atomPut == NULL);
1776 750538 : if (ATOMfix(b->ttype, t) != GDK_SUCCEED ||
1777 750539 : ATOMunfix(b->ttype, BUNtloc(bi, p)) != GDK_SUCCEED) {
1778 0 : MT_rwlock_wrunlock(&b->thashlock);
1779 0 : goto bailout;
1780 : }
1781 750543 : switch (ATOMsize(b->ttype)) {
1782 : case 0: /* void */
1783 : break;
1784 13957 : case 1:
1785 13957 : ((bte *) b->theap->base)[p] = * (bte *) t;
1786 13957 : break;
1787 7180 : case 2:
1788 7180 : ((sht *) b->theap->base)[p] = * (sht *) t;
1789 7180 : break;
1790 181642 : case 4:
1791 181642 : ((int *) b->theap->base)[p] = * (int *) t;
1792 181642 : break;
1793 547764 : case 8:
1794 547764 : ((lng *) b->theap->base)[p] = * (lng *) t;
1795 547764 : break;
1796 0 : case 16:
1797 : #ifdef HAVE_HGE
1798 0 : ((hge *) b->theap->base)[p] = * (hge *) t;
1799 : #else
1800 : ((uuid *) b->theap->base)[p] = * (uuid *) t;
1801 : #endif
1802 0 : break;
1803 0 : default:
1804 0 : memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
1805 0 : break;
1806 : }
1807 : }
1808 :
1809 824883 : HASHinsert_locked(&bi, p, t); /* insert new value into hash */
1810 :
1811 824822 : prv = p > 0 ? p - 1 : BUN_NONE;
1812 824822 : nxt = p < last ? p + 1 : BUN_NONE;
1813 :
1814 824822 : MT_lock_set(&b->theaplock);
1815 824818 : if (b->tsorted) {
1816 2818 : if (prv != BUN_NONE &&
1817 1105 : atomcmp(t, BUNtail(bi, prv)) < 0) {
1818 13 : b->tsorted = false;
1819 13 : b->tnosorted = p;
1820 2389 : } else if (nxt != BUN_NONE &&
1821 689 : atomcmp(t, BUNtail(bi, nxt)) > 0) {
1822 623 : b->tsorted = false;
1823 623 : b->tnosorted = nxt;
1824 1077 : } else if (b->ttype != TYPE_void && BATtdense(b)) {
1825 0 : if (prv != BUN_NONE &&
1826 0 : 1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
1827 0 : b->tseqbase = oid_nil;
1828 0 : } else if (nxt != BUN_NONE &&
1829 0 : * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
1830 0 : b->tseqbase = oid_nil;
1831 0 : } else if (prv == BUN_NONE &&
1832 0 : nxt == BUN_NONE) {
1833 0 : b->tseqbase = * (oid *) t;
1834 : }
1835 : }
1836 823105 : } else if (b->tnosorted >= p)
1837 3199 : b->tnosorted = 0;
1838 824818 : if (b->trevsorted) {
1839 1310 : if (prv != BUN_NONE &&
1840 377 : atomcmp(t, BUNtail(bi, prv)) > 0) {
1841 40 : b->trevsorted = false;
1842 40 : b->tnorevsorted = p;
1843 1007 : } else if (nxt != BUN_NONE &&
1844 114 : atomcmp(t, BUNtail(bi, nxt)) < 0) {
1845 16 : b->trevsorted = false;
1846 16 : b->tnorevsorted = nxt;
1847 : }
1848 823885 : } else if (b->tnorevsorted >= p)
1849 1678 : b->tnorevsorted = 0;
1850 824818 : if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
1851 915 : BATkey(b, false);
1852 823903 : } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
1853 803 : b->tnokey[0] = b->tnokey[1] = 0;
1854 824818 : if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
1855 61364 : b->tnonil = t && atomcmp(t, atomnil) != 0;
1856 1065307 : MT_lock_unset(&b->theaplock);
1857 : }
1858 1065304 : BUN nunique = b->thash ? b->thash->nunique : 0;
1859 1065304 : MT_rwlock_wrunlock(&b->thashlock);
1860 1065358 : MT_lock_set(&b->theaplock);
1861 1065045 : if (nunique != 0)
1862 32967 : b->tunique_est = (double) nunique;
1863 1065045 : b->tminpos = bi.minpos;
1864 1065045 : b->tmaxpos = bi.maxpos;
1865 1065045 : b->theap->dirty = true;
1866 1065045 : if (b->tvheap)
1867 144129 : b->tvheap->dirty = true;
1868 1065045 : MT_lock_unset(&b->theaplock);
1869 :
1870 1064798 : return GDK_SUCCEED;
1871 :
1872 0 : bailout:
1873 0 : if (minbound)
1874 0 : VALclear(&minprop);
1875 0 : if (maxbound)
1876 0 : VALclear(&maxprop);
1877 : return GDK_FAIL;
1878 : }
1879 :
1880 : /* Replace multiple values given by their positions with the given values. */
1881 : gdk_return
1882 552583 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
1883 : {
1884 552583 : BATcheck(b, GDK_FAIL);
1885 :
1886 552583 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1887 : return GDK_FAIL;
1888 :
1889 552583 : return BUNinplacemulti(b, positions, values, count, force, false);
1890 : }
1891 :
1892 : /* Replace multiple values starting from a given position with the given
1893 : * values. */
1894 : gdk_return
1895 509790 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
1896 : {
1897 509790 : BATcheck(b, GDK_FAIL);
1898 :
1899 509790 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1900 : return GDK_FAIL;
1901 :
1902 509790 : return BUNinplacemulti(b, &position, values, count, force, true);
1903 : }
1904 :
1905 : gdk_return
1906 552583 : BUNreplace(BAT *b, oid id, const void *t, bool force)
1907 : {
1908 552583 : return BUNreplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &t : t, 1, force);
1909 : }
1910 :
1911 : /* very much like BUNreplace, but this doesn't make any changes if the
1912 : * tail column is void */
1913 : gdk_return
1914 3127 : void_inplace(BAT *b, oid id, const void *val, bool force)
1915 : {
1916 3127 : assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
1917 3127 : if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
1918 : GDKerror("id out of range\n");
1919 : return GDK_FAIL;
1920 : }
1921 3127 : if (b->ttype == TYPE_void)
1922 : return GDK_SUCCEED;
1923 3127 : return BUNinplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &val : (const void *) val, 1, force, false);
1924 : }
1925 :
1926 : /*
1927 : * @- BUN Lookup
1928 : * Location of a BUN using a value should use the available indexes to
1929 : * speed up access. If indexes are lacking then a hash index is
1930 : * constructed under the assumption that 1) multiple access to the BAT
1931 : * can be expected and 2) building the hash is only slightly more
1932 : * expensive than the full linear scan. BUN_NONE is returned if no
1933 : * such element could be found. In those cases where the type is
1934 : * known and a hash index is available, one should use the inline
1935 : * functions to speed-up processing.
1936 : */
1937 : static BUN
1938 0 : slowfnd(BAT *b, const void *v)
1939 : {
1940 0 : BATiter bi = bat_iterator(b);
1941 0 : BUN p, q;
1942 0 : int (*cmp)(const void *, const void *) = ATOMcompare(bi.type);
1943 :
1944 0 : BATloop(b, p, q) {
1945 0 : if ((*cmp)(v, BUNtail(bi, p)) == 0) {
1946 0 : bat_iterator_end(&bi);
1947 0 : return p;
1948 : }
1949 : }
1950 0 : bat_iterator_end(&bi);
1951 0 : return BUN_NONE;
1952 : }
1953 :
1954 : static BUN
1955 0 : mskfnd(BAT *b, msk v)
1956 : {
1957 0 : BUN p, q;
1958 :
1959 0 : if (v) {
1960 : /* find a 1 value */
1961 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1962 0 : if (((uint32_t *) b->theap->base)[p] != 0) {
1963 : /* there's at least one 1 bit */
1964 0 : return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
1965 : }
1966 : }
1967 : } else {
1968 : /* find a 0 value */
1969 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1970 0 : if (((uint32_t *) b->theap->base)[p] != ~0U) {
1971 : /* there's at least one 0 bit */
1972 0 : return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
1973 : }
1974 : }
1975 : }
1976 : return BUN_NONE;
1977 : }
1978 :
1979 : BUN
1980 1531866 : BUNfnd(BAT *b, const void *v)
1981 : {
1982 1531866 : BUN r = BUN_NONE;
1983 1531866 : BATiter bi;
1984 :
1985 1531866 : BATcheck(b, BUN_NONE);
1986 1531866 : if (!v || BATcount(b) == 0)
1987 : return r;
1988 1119889 : if (complex_cand(b)) {
1989 0 : struct canditer ci;
1990 0 : canditer_init(&ci, NULL, b);
1991 0 : return canditer_search(&ci, * (const oid *) v, false);
1992 : }
1993 1119889 : if (BATtvoid(b))
1994 523275 : return BUNfndVOID(b, v);
1995 596614 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1996 0 : return mskfnd(b, *(msk*)v);
1997 : }
1998 596614 : if (!BATcheckhash(b)) {
1999 130203 : if (BATordered(b) || BATordered_rev(b))
2000 129860 : return SORTfnd(b, v);
2001 : }
2002 466750 : if (BAThash(b) == GDK_SUCCEED) {
2003 466749 : bi = bat_iterator(b); /* outside of hashlock */
2004 466762 : MT_rwlock_rdlock(&b->thashlock);
2005 466762 : if (b->thash == NULL) {
2006 0 : MT_rwlock_rdunlock(&b->thashlock);
2007 0 : bat_iterator_end(&bi);
2008 0 : goto hashfnd_failed;
2009 : }
2010 933258 : switch (ATOMbasetype(bi.type)) {
2011 7 : case TYPE_bte:
2012 7 : HASHloop_bte(bi, b->thash, r, v)
2013 : break;
2014 : break;
2015 0 : case TYPE_sht:
2016 0 : HASHloop_sht(bi, b->thash, r, v)
2017 : break;
2018 : break;
2019 59 : case TYPE_int:
2020 59 : HASHloop_int(bi, b->thash, r, v)
2021 : break;
2022 : break;
2023 0 : case TYPE_flt:
2024 0 : HASHloop_flt(bi, b->thash, r, v)
2025 : break;
2026 : break;
2027 0 : case TYPE_dbl:
2028 0 : HASHloop_dbl(bi, b->thash, r, v)
2029 : break;
2030 : break;
2031 247 : case TYPE_lng:
2032 247 : HASHloop_lng(bi, b->thash, r, v)
2033 : break;
2034 : break;
2035 : #ifdef HAVE_HGE
2036 0 : case TYPE_hge:
2037 0 : HASHloop_hge(bi, b->thash, r, v)
2038 : break;
2039 : break;
2040 : #endif
2041 0 : case TYPE_uuid:
2042 0 : HASHloop_uuid(bi, b->thash, r, v)
2043 : break;
2044 : break;
2045 466448 : case TYPE_str:
2046 648580 : HASHloop_str(bi, b->thash, r, v)
2047 : break;
2048 : break;
2049 1 : default:
2050 3 : HASHloop(bi, b->thash, r, v)
2051 : break;
2052 : break;
2053 : }
2054 466760 : MT_rwlock_rdunlock(&b->thashlock);
2055 466761 : bat_iterator_end(&bi);
2056 466761 : return r;
2057 : }
2058 0 : hashfnd_failed:
2059 : /* can't build hash table, search the slow way */
2060 0 : GDKclrerr();
2061 0 : return slowfnd(b, v);
2062 : }
2063 :
2064 : /*
2065 : * @+ BAT Property Management
2066 : *
2067 : * The function BATcount returns the number of active elements in a
2068 : * BAT. Counting is type independent. It can be implemented quickly,
2069 : * because the system ensures a dense BUN list.
2070 : */
2071 : void
2072 6797556 : BATsetcapacity(BAT *b, BUN cnt)
2073 : {
2074 6797556 : b->batCapacity = cnt;
2075 6797556 : assert(b->batCount <= cnt);
2076 6797556 : }
2077 :
2078 : /* Set the batCount value for the bat and also set some dependent
2079 : * properties. This function should be called only when it is save from
2080 : * concurrent use (e.g. when theaplock is being held). */
2081 : void
2082 87489881 : BATsetcount(BAT *b, BUN cnt)
2083 : {
2084 : /* head column is always VOID, and some head properties never change */
2085 87489881 : assert(!is_oid_nil(b->hseqbase));
2086 87489881 : assert(cnt <= BUN_MAX);
2087 :
2088 87489881 : b->batCount = cnt;
2089 87489881 : if (b->theap->parentid == b->batCacheid) {
2090 80697902 : b->theap->dirty |= b->ttype != TYPE_void && cnt > 0;
2091 80697902 : b->theap->free = tailsize(b, cnt);
2092 : }
2093 87489881 : if (b->ttype == TYPE_void)
2094 8420794 : b->batCapacity = cnt;
2095 87489881 : if (cnt <= 1) {
2096 13141296 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
2097 13141296 : b->tnosorted = b->tnorevsorted = 0;
2098 : }
2099 : /* if the BAT was made smaller, we need to zap some values */
2100 87489881 : if (b->tnosorted >= BATcount(b))
2101 11215703 : b->tnosorted = 0;
2102 87489881 : if (b->tnorevsorted >= BATcount(b))
2103 11211167 : b->tnorevsorted = 0;
2104 87489881 : if (b->tnokey[0] >= BATcount(b) || b->tnokey[1] >= BATcount(b)) {
2105 11188023 : b->tnokey[0] = 0;
2106 11188023 : b->tnokey[1] = 0;
2107 : }
2108 87489881 : if (b->ttype == TYPE_void) {
2109 8421151 : b->tsorted = true;
2110 8421151 : if (is_oid_nil(b->tseqbase)) {
2111 4358720 : b->tkey = cnt <= 1;
2112 4358720 : b->trevsorted = true;
2113 4358720 : b->tnil = true;
2114 4358720 : b->tnonil = false;
2115 : } else {
2116 4062431 : b->tkey = true;
2117 4062431 : b->trevsorted = cnt <= 1;
2118 4062431 : b->tnil = false;
2119 4062431 : b->tnonil = true;
2120 : }
2121 : }
2122 87489881 : assert(b->batCapacity >= cnt);
2123 87489881 : }
2124 :
2125 : /*
2126 : * The key and name properties can be changed at any time. Keyed
2127 : * dimensions are automatically supported by an auxiliary hash-based
2128 : * access structure to speed up searching. Turning off the key
2129 : * integrity property does not cause the index to disappear. It can
2130 : * still be used to speed-up retrieval. The routine BATkey sets the
2131 : * key property of the association head.
2132 : */
2133 : gdk_return
2134 6924929 : BATkey(BAT *b, bool flag)
2135 : {
2136 6924929 : BATcheck(b, GDK_FAIL);
2137 6924929 : if (b->ttype == TYPE_void) {
2138 23919 : if (BATtdense(b) && !flag) {
2139 0 : GDKerror("dense column must be unique.\n");
2140 0 : return GDK_FAIL;
2141 : }
2142 23919 : if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
2143 0 : GDKerror("void column cannot be unique.\n");
2144 0 : return GDK_FAIL;
2145 : }
2146 : }
2147 6924929 : b->tkey = flag;
2148 6924929 : if (!flag) {
2149 5571541 : b->tseqbase = oid_nil;
2150 : } else
2151 1353388 : b->tnokey[0] = b->tnokey[1] = 0;
2152 6924929 : gdk_return rc = GDK_SUCCEED;
2153 6924929 : if (flag && VIEWtparent(b)) {
2154 : /* if a view is key, then so is the parent if the two
2155 : * are aligned */
2156 1264704 : BAT *bp = BATdescriptor(VIEWtparent(b));
2157 1264643 : if (bp != NULL) {
2158 1264643 : MT_lock_set(&bp->theaplock);
2159 2196609 : if (BATcount(b) == BATcount(bp) &&
2160 932425 : ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
2161 932434 : !BATtkey(bp) &&
2162 45102 : ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
2163 : BATcount(b) == 0))
2164 43454 : rc = BATkey(bp, true);
2165 1264184 : MT_lock_unset(&bp->theaplock);
2166 1264642 : BBPunfix(bp->batCacheid);
2167 : }
2168 : }
2169 : return rc;
2170 : }
2171 :
2172 : void
2173 1606478 : BAThseqbase(BAT *b, oid o)
2174 : {
2175 1606478 : if (b != NULL) {
2176 1606478 : assert(o <= GDK_oid_max); /* i.e., not oid_nil */
2177 1606478 : assert(o + BATcount(b) <= GDK_oid_max);
2178 1606478 : b->hseqbase = o;
2179 : }
2180 1606478 : }
2181 :
2182 : void
2183 4903799 : BATtseqbase(BAT *b, oid o)
2184 : {
2185 4903799 : assert(o <= oid_nil);
2186 4903799 : if (b == NULL)
2187 : return;
2188 4903799 : assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
2189 4903799 : if (ATOMtype(b->ttype) == TYPE_oid) {
2190 4256167 : b->tseqbase = o;
2191 :
2192 : /* adapt keyness */
2193 4256167 : if (BATtvoid(b)) {
2194 4192268 : b->tsorted = true;
2195 4192268 : if (is_oid_nil(o)) {
2196 65 : b->tkey = b->batCount <= 1;
2197 65 : b->tnonil = b->batCount == 0;
2198 65 : b->tnil = b->batCount > 0;
2199 65 : b->trevsorted = true;
2200 65 : b->tnosorted = b->tnorevsorted = 0;
2201 65 : if (!b->tkey) {
2202 0 : b->tnokey[0] = 0;
2203 0 : b->tnokey[1] = 1;
2204 : } else {
2205 65 : b->tnokey[0] = b->tnokey[1] = 0;
2206 : }
2207 : } else {
2208 4192203 : if (!b->tkey) {
2209 6804 : b->tkey = true;
2210 6804 : b->tnokey[0] = b->tnokey[1] = 0;
2211 : }
2212 4192203 : b->tnonil = true;
2213 4192203 : b->tnil = false;
2214 4192203 : b->trevsorted = b->batCount <= 1;
2215 4192203 : if (!b->trevsorted)
2216 15361 : b->tnorevsorted = 1;
2217 : }
2218 : }
2219 : } else {
2220 647632 : assert(o == oid_nil);
2221 647632 : b->tseqbase = oid_nil;
2222 : }
2223 : }
2224 :
2225 : /*
2226 : * @- Change the BAT access permissions (read, append, write)
2227 : * Regrettably, BAT access-permissions, persistent status and memory
2228 : * map modes, interact in ways that makes one's brain sizzle. This
2229 : * makes BATsetaccess and TMcommit (where a change in BAT persistence
2230 : * mode is made permanent) points in which the memory map status of
2231 : * bats needs to be carefully re-assessed and ensured.
2232 : *
2233 : * Another complication is the fact that during commit, concurrent
2234 : * users may access the heaps, such that the simple solution
2235 : * unmap;re-map is out of the question.
2236 : * Even worse, it is not possible to even rename an open mmap file in
2237 : * Windows. For this purpose, we dropped the old .priv scheme, which
2238 : * relied on file moves. Now, the file that is opened with mmap is
2239 : * always the X file, in case of newstorage=STORE_PRIV, we save in a
2240 : * new file X.new
2241 : *
2242 : * we must consider the following dimensions:
2243 : *
2244 : * persistence:
2245 : * not simply the current persistence mode but whether the bat *was*
2246 : * present at the last commit point (BBP status & BBPEXISTING).
2247 : * The crucial issue is namely whether we must guarantee recovery
2248 : * to a previous sane state.
2249 : *
2250 : * access:
2251 : * whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
2252 : * is usually the same as BAT_READ (as our concern are only data pages
2253 : * that already existed at the last commit).
2254 : *
2255 : * storage:
2256 : * the current way the heap file X is memory-mapped;
2257 : * STORE_MMAP uses direct mapping (so dirty pages may be flushed
2258 : * at any time to disk), STORE_PRIV uses copy-on-write.
2259 : *
2260 : * newstorage:
2261 : * the current save-regime. STORE_MMAP calls msync() on the heap X,
2262 : * whereas STORE_PRIV writes the *entire* heap in a file: X.new
2263 : * If a BAT is loaded from disk, the field newstorage is used
2264 : * to set storage as well (so before change-access and commit-
2265 : * persistence mayhem, we always have newstorage=storage).
2266 : *
2267 : * change-access:
2268 : * what happens if the bat-access mode is changed from
2269 : * BAT_READ into BAT_WRITE (or vice versa).
2270 : *
2271 : * commit-persistence:
2272 : * what happens during commit if the bat-persistence mode was
2273 : * changed (from TRANSIENT into PERSISTENT, or vice versa).
2274 : *
2275 : * this is the scheme:
2276 : *
2277 : * persistence access newstorage storage change-access commit-persistence
2278 : * =========== ========= ========== ========== ============= ==================
2279 : * 0 transient BAT_READ STORE_MMAP STORE_MMAP =>2 =>4
2280 : * 1 transient BAT_READ STORE_PRIV STORE_PRIV =>3 =>5
2281 : * 2 transient BAT_WRITE STORE_MMAP STORE_MMAP =>0 =>6+
2282 : * 3 transient BAT_WRITE STORE_PRIV STORE_PRIV =>1 =>7
2283 : * 4 persistent BAT_READ STORE_MMAP STORE_MMAP =>6+ =>0
2284 : * 5 persistent BAT_READ STORE_PRIV STORE_PRIV =>7 =>1
2285 : * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
2286 : * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5 =>3
2287 : *
2288 : * exception states:
2289 : * a transient BAT_READ STORE_PRIV STORE_MMAP =>b =>c
2290 : * b transient BAT_WRITE STORE_PRIV STORE_MMAP =>a =>6
2291 : * c persistent BAT_READ STORE_PRIV STORE_MMAP =>6 =>a
2292 : *
2293 : * (+) indicates that we must ensure that the heap gets saved in its new mode
2294 : *
2295 : * Note that we now allow a heap with save-regime STORE_PRIV that was
2296 : * actually mapped STORE_MMAP. In effect, the potential corruption of
2297 : * the X file is compensated by writing out full X.new files that take
2298 : * precedence. When transitioning out of this state towards one with
2299 : * both storage regime and OS as STORE_MMAP we need to move the X.new
2300 : * files into the backup directory. Then msync the X file and (on
2301 : * success) remove the X.new; see backup_new().
2302 : *
2303 : * Exception states are only reachable if the commit fails and those
2304 : * new persistent bats have already been processed (but never become
2305 : * part of a committed state). In that case a transition 2=>6 may end
2306 : * up 2=>b. Exception states a and c are reachable from b.
2307 : *
2308 : * Errors in HEAPchangeaccess() can be handled atomically inside the
2309 : * routine. The work on changing mmap modes HEAPcommitpersistence()
2310 : * is done during the BBPsync() for all bats that are newly persistent
2311 : * (BBPNEW). After the TMcommit(), it is done for those bats that are
2312 : * no longer persistent after the commit (BBPDELETED), only if it
2313 : * succeeds. Such transient bats cannot be processed before the
2314 : * commit, because the commit may fail and then the more unsafe
2315 : * transient mmap modes would be present on a persistent bat.
2316 : *
2317 : * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
2318 : * gdk_tm.c.
2319 : *
2320 : * Including the exception states, we have 11 of the 16
2321 : * combinations. As for the 5 avoided states, all four
2322 : * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
2323 : * omitted (this would amount to an msync() save regime on a
2324 : * copy-on-write heap -- which does not work). The remaining avoided
2325 : * state is the patently unsafe
2326 : * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
2327 : *
2328 : * Note that after a server restart exception states are gone, as on
2329 : * BAT loads the saved descriptor is inspected again (which will
2330 : * reproduce the state at the last succeeded commit).
2331 : *
2332 : * To avoid exception states, a TMsubcommit protocol would need to be
2333 : * used which is too heavy for BATsetaccess().
2334 : *
2335 : * Note that this code is not about making heaps mmap-ed in the first
2336 : * place. It is just about determining which flavor of mmap should be
2337 : * used. The MAL user is oblivious of such details.
2338 : */
2339 :
2340 : /* rather than deleting X.new, we comply with the commit protocol and
2341 : * move it to backup storage */
2342 : static gdk_return
2343 0 : backup_new(Heap *hp, bool lock)
2344 : {
2345 0 : int batret, bakret, ret = -1;
2346 0 : char *batpath, *bakpath;
2347 0 : struct stat st;
2348 :
2349 0 : char *bak_filename = NULL;
2350 0 : if ((bak_filename = strrchr(hp->filename, DIR_SEP)) != NULL)
2351 0 : bak_filename++;
2352 : else
2353 : bak_filename = hp->filename;
2354 : /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
2355 0 : batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, "new");
2356 0 : bakpath = GDKfilepath(hp->farmid, BAKDIR, bak_filename, "new");
2357 0 : if (batpath != NULL && bakpath != NULL) {
2358 : /* file actions here interact with the global commits */
2359 0 : if (lock)
2360 0 : BBPtmlock();
2361 :
2362 0 : batret = MT_stat(batpath, &st);
2363 0 : bakret = MT_stat(bakpath, &st);
2364 :
2365 0 : if (batret == 0 && bakret) {
2366 : /* no backup yet, so move the existing X.new there out
2367 : * of the way */
2368 0 : if ((ret = MT_rename(batpath, bakpath)) < 0)
2369 0 : GDKsyserror("backup_new: rename %s to %s failed\n",
2370 : batpath, bakpath);
2371 0 : TRC_DEBUG(IO_, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
2372 0 : } else if (batret == 0) {
2373 : /* there is a backup already; just remove the X.new */
2374 0 : if ((ret = MT_remove(batpath)) != 0)
2375 0 : GDKsyserror("backup_new: remove %s failed\n", batpath);
2376 0 : TRC_DEBUG(IO_, "remove(%s) = %d\n", batpath, ret);
2377 : } else {
2378 : ret = 0;
2379 : }
2380 0 : if (lock)
2381 0 : BBPtmunlock();
2382 : }
2383 0 : GDKfree(batpath);
2384 0 : GDKfree(bakpath);
2385 0 : return ret ? GDK_FAIL : GDK_SUCCEED;
2386 : }
2387 :
2388 : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
2389 :
2390 : /* transition heap from readonly to writable */
2391 : static storage_t
2392 3958158 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
2393 : {
2394 3958158 : if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
2395 3958158 : return hp->newstorage; /* 0<=>2,1<=>3,a<=>b */
2396 :
2397 0 : if (dstmode == BAT_WRITE) {
2398 0 : if (hp->storage != STORE_PRIV)
2399 0 : hp->dirty = true; /* exception c does not make it dirty */
2400 0 : return STORE_PRIV; /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
2401 : }
2402 0 : if (hp->storage == STORE_MMAP) { /* 6=>4 */
2403 0 : hp->dirty = true;
2404 0 : return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2405 : }
2406 : return hp->storage; /* 7=>5 */
2407 : }
2408 :
2409 : /* heap changes persistence mode (at commit point) */
2410 : static storage_t
2411 333656 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
2412 : {
2413 333656 : if (existing) { /* existing, ie will become transient */
2414 58284 : if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) { /* 6=>2 */
2415 0 : hp->dirty = true;
2416 0 : return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2417 : }
2418 58284 : return hp->newstorage; /* 4=>0,5=>1,7=>3,c=>a no change */
2419 : }
2420 : /* !existing, ie will become persistent */
2421 275372 : if (hp->newstorage == STORE_MEM)
2422 : return hp->newstorage;
2423 1416 : if (hp->newstorage == STORE_MMAP && !writable)
2424 : return STORE_MMAP; /* 0=>4 STORE_MMAP */
2425 :
2426 0 : if (hp->newstorage == STORE_MMAP)
2427 0 : hp->dirty = true; /* 2=>6 */
2428 : return STORE_PRIV; /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
2429 : }
2430 :
2431 :
2432 : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
2433 :
2434 : /* change the heap modes at a commit */
2435 : gdk_return
2436 293341 : BATcheckmodes(BAT *b, bool existing)
2437 : {
2438 293341 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2439 293341 : bool dirty = false, wr;
2440 :
2441 293341 : BATcheck(b, GDK_FAIL);
2442 :
2443 293341 : wr = (b->batRestricted == BAT_WRITE);
2444 293341 : if (b->ttype) {
2445 293341 : m1 = HEAPcommitpersistence(b->theap, wr, existing);
2446 293341 : dirty |= (b->theap->newstorage != m1);
2447 : }
2448 :
2449 293341 : if (b->tvheap) {
2450 40315 : bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
2451 40315 : m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
2452 40315 : dirty |= (b->tvheap->newstorage != m3);
2453 : }
2454 293341 : if (m1 == STORE_INVALID || m3 == STORE_INVALID)
2455 : return GDK_FAIL;
2456 :
2457 293341 : if (dirty) {
2458 0 : b->theap->newstorage = m1;
2459 0 : if (b->tvheap)
2460 0 : b->tvheap->newstorage = m3;
2461 : }
2462 : return GDK_SUCCEED;
2463 : }
2464 :
2465 : BAT *
2466 5192020 : BATsetaccess(BAT *b, restrict_t newmode)
2467 : {
2468 5192020 : restrict_t bakmode;
2469 :
2470 5192020 : BATcheck(b, NULL);
2471 5192020 : if (newmode != BAT_READ &&
2472 19382 : (isVIEW(b) || (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
2473 0 : BAT *bn = COLcopy(b, b->ttype, true, b->batRole);
2474 0 : BBPunfix(b->batCacheid);
2475 0 : if (bn == NULL)
2476 : return NULL;
2477 : b = bn;
2478 : }
2479 5192020 : MT_lock_set(&b->theaplock);
2480 5192038 : bakmode = b->batRestricted;
2481 5192038 : if (bakmode != newmode) {
2482 3472039 : bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
2483 3472039 : bool wr = (newmode == BAT_WRITE);
2484 3472039 : bool rd = (bakmode == BAT_WRITE);
2485 3472039 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2486 3472039 : storage_t b1 = STORE_MEM, b3 = STORE_MEM;
2487 :
2488 3472039 : if (b->theap->parentid == b->batCacheid) {
2489 3471108 : b1 = b->theap->newstorage;
2490 3481128 : m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
2491 : }
2492 3471712 : if (b->tvheap && b->tvheap->parentid == b->batCacheid) {
2493 487228 : bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
2494 487228 : b3 = b->tvheap->newstorage;
2495 974420 : m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
2496 : }
2497 3471587 : if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
2498 1027929 : MT_lock_unset(&b->theaplock);
2499 1028055 : BBPunfix(b->batCacheid);
2500 1028055 : return NULL;
2501 : }
2502 :
2503 : /* set new access mode and mmap modes */
2504 2443658 : b->batRestricted = newmode;
2505 2443658 : if (b->theap->parentid == b->batCacheid)
2506 2443307 : b->theap->newstorage = m1;
2507 2443658 : if (b->tvheap && b->tvheap->parentid == b->batCacheid)
2508 475770 : b->tvheap->newstorage = m3;
2509 :
2510 2443658 : MT_lock_unset(&b->theaplock);
2511 2443418 : if (existing && !isVIEW(b) && BBPsave(b) != GDK_SUCCEED) {
2512 : /* roll back all changes */
2513 0 : MT_lock_set(&b->theaplock);
2514 0 : b->batRestricted = bakmode;
2515 0 : b->theap->newstorage = b1;
2516 0 : if (b->tvheap)
2517 0 : b->tvheap->newstorage = b3;
2518 0 : MT_lock_unset(&b->theaplock);
2519 0 : BBPunfix(b->batCacheid);
2520 0 : return NULL;
2521 : }
2522 : } else {
2523 1719999 : MT_lock_unset(&b->theaplock);
2524 : }
2525 : return b;
2526 : }
2527 :
2528 : restrict_t
2529 0 : BATgetaccess(BAT *b)
2530 : {
2531 0 : BATcheck(b, BAT_WRITE);
2532 0 : MT_lock_set(&b->theaplock);
2533 0 : restrict_t restricted = b->batRestricted;
2534 0 : MT_lock_unset(&b->theaplock);
2535 0 : return restricted;
2536 : }
2537 :
2538 : /*
2539 : * @- change BAT persistency (persistent,session,transient)
2540 : * In the past, we prevented BATS with certain types from being saved at all:
2541 : * - BATs of BATs, as having recursive bats creates cascading
2542 : * complexities in commits/aborts.
2543 : * - any atom with refcounts, as the BBP has no overview of such
2544 : * user-defined refcounts.
2545 : * - pointer types, as the values they point to are bound to be transient.
2546 : *
2547 : * However, nowadays we do allow such saves, as the BBP swapping
2548 : * mechanism was altered to be able to save transient bats temporarily
2549 : * to disk in order to make room. Thus, we must be able to save any
2550 : * transient BAT to disk.
2551 : *
2552 : * What we don't allow is to make such bats persistent.
2553 : *
2554 : * Although the persistent state does influence the allowed mmap
2555 : * modes, this only goes for the *real* committed persistent
2556 : * state. Making the bat persistent with BATmode does not matter for
2557 : * the heap modes until the commit point is reached. So we do not need
2558 : * to do anything with heap modes yet at this point.
2559 : */
2560 : gdk_return
2561 426368 : BATmode(BAT *b, bool transient)
2562 : {
2563 426368 : BATcheck(b, GDK_FAIL);
2564 :
2565 : /* can only make a bat PERSISTENT if its role is already
2566 : * PERSISTENT */
2567 426368 : assert(transient || b->batRole == PERSISTENT);
2568 : /* cannot make a view PERSISTENT */
2569 242945 : assert(transient || !isVIEW(b));
2570 :
2571 426368 : if (b->batRole == TRANSIENT && !transient) {
2572 0 : GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
2573 0 : return GDK_FAIL;
2574 : }
2575 :
2576 426368 : BATiter bi = bat_iterator(b);
2577 426368 : bool mustrelease = false;
2578 426368 : bat bid = b->batCacheid;
2579 :
2580 426368 : if (transient != bi.transient) {
2581 426368 : if (!transient) {
2582 242945 : if (ATOMisdescendant(b->ttype, TYPE_ptr) ||
2583 242945 : BATatoms[b->ttype].atomUnfix ||
2584 242945 : BATatoms[b->ttype].atomFix) {
2585 0 : GDKerror("%s type implies that %s[%s] "
2586 : "cannot be made persistent.\n",
2587 : ATOMname(b->ttype), BATgetId(b),
2588 : ATOMname(b->ttype));
2589 0 : bat_iterator_end(&bi);
2590 0 : return GDK_FAIL;
2591 : }
2592 : }
2593 :
2594 : /* persistent BATs get a logical reference */
2595 426368 : if (!transient) {
2596 242945 : BBPretain(bid);
2597 183423 : } else if (!bi.transient) {
2598 : /* we need to delay the release because if there
2599 : * is no fix and the bat is loaded, BBPrelease
2600 : * can call BBPfree which calls BATfree which
2601 : * may hang while waiting for the heap reference
2602 : * that we have because of the BAT iterator to
2603 : * come down, in other words, deadlock */
2604 426368 : mustrelease = true;
2605 : }
2606 426368 : MT_lock_set(&GDKswapLock(bid));
2607 426368 : if (!transient) {
2608 242945 : if (BBP_status(bid) & BBPDELETED) {
2609 0 : BBP_status_on(bid, BBPEXISTING);
2610 0 : BBP_status_off(bid, BBPDELETED);
2611 : } else
2612 242945 : BBP_status_on(bid, BBPNEW);
2613 183423 : } else if (!bi.transient) {
2614 183423 : if (!(BBP_status(bid) & BBPNEW))
2615 60115 : BBP_status_on(bid, BBPDELETED);
2616 183423 : BBP_status_off(bid, BBPPERSISTENT);
2617 : }
2618 : /* session bats or persistent bats that did not
2619 : * witness a commit yet may have been saved */
2620 426368 : MT_lock_set(&b->theaplock);
2621 426368 : if (b->batCopiedtodisk) {
2622 49917 : if (!transient) {
2623 545 : BBP_status_off(bid, BBPTMP);
2624 : } else {
2625 : /* TMcommit must remove it to
2626 : * guarantee free space */
2627 49372 : BBP_status_on(bid, BBPTMP);
2628 : }
2629 : }
2630 426368 : b->batTransient = transient;
2631 426368 : MT_lock_unset(&b->theaplock);
2632 426368 : MT_lock_unset(&GDKswapLock(bid));
2633 : }
2634 426368 : bat_iterator_end(&bi);
2635 : /* release after bat_iterator_end because of refs to heaps */
2636 426368 : if (mustrelease)
2637 183423 : BBPrelease(bid);
2638 : return GDK_SUCCEED;
2639 : }
2640 :
2641 : /* BATassertProps checks whether properties are set correctly. Under
2642 : * no circumstances will it change any properties. Note that the
2643 : * "nil" property is not actually used anywhere, but it is checked. */
2644 :
2645 : #ifdef NDEBUG
2646 : /* assertions are disabled, turn failing tests into a message */
2647 : #undef assert
2648 : #define assert(test) ((void) ((test) || (TRC_CRITICAL_ENDIF(CHECK_, "Assertion `%s' failed\n", #test), 0)))
2649 : #endif
2650 :
2651 : /* Assert that properties are set correctly.
2652 : *
2653 : * A BAT can have a bunch of properties set. Mostly, the property
2654 : * bits are set if we *know* the property holds, and not set if we
2655 : * don't know whether the property holds (or if we know it doesn't
2656 : * hold). All properties are per column.
2657 : *
2658 : * The properties currently maintained are:
2659 : *
2660 : * seqbase Only valid for TYPE_oid and TYPE_void columns: each
2661 : * value in the column is exactly one more than the
2662 : * previous value, starting at position 0 with the value
2663 : * stored in this property.
2664 : * This implies sorted, key, nonil (which therefore need
2665 : * to be set).
2666 : * nil There is at least one NIL value in the column.
2667 : * nonil There are no NIL values in the column.
2668 : * key All values in the column are distinct.
2669 : * sorted The column is sorted (ascending). If also revsorted,
2670 : * then all values are equal.
2671 : * revsorted The column is reversely sorted (descending). If
2672 : * also sorted, then all values are equal.
2673 : * nosorted BUN position which proofs not sorted (given position
2674 : * and one before are not ordered correctly).
2675 : * norevsorted BUN position which proofs not revsorted (given position
2676 : * and one before are not ordered correctly).
2677 : * nokey Pair of BUN positions that proof not all values are
2678 : * distinct (i.e. values at given locations are equal).
2679 : *
2680 : * Note that the functions BATtseqbase and BATkey also set more
2681 : * properties than you might suspect. When setting properties on a
2682 : * newly created and filled BAT, you may want to first make sure the
2683 : * batCount is set correctly (e.g. by calling BATsetcount), then use
2684 : * BATtseqbase and BATkey, and finally set the other properties.
2685 : *
2686 : * For a view, we cannot check all properties, since it is possible with
2687 : * the way the SQL layer works, that a parent BAT gets changed, changing
2688 : * the properties, while there is a view. The view is supposed to look
2689 : * at only the non-changing part of the BAT (through candidate lists),
2690 : * but this means that the properties of the view might not be correct.
2691 : * For this reason, for views, we skip all property checking that looks
2692 : * at the BAT content.
2693 : */
2694 :
2695 : void
2696 26621627 : BATassertProps(BAT *b)
2697 : {
2698 26621627 : unsigned bbpstatus;
2699 26621627 : BUN p, q;
2700 26621627 : int (*cmpf)(const void *, const void *);
2701 26621627 : int cmp;
2702 26621627 : const void *prev = NULL, *valp, *nilp;
2703 26621627 : char filename[sizeof(b->theap->filename)];
2704 26621627 : bool isview1, isview2;
2705 :
2706 : /* do the complete check within a lock */
2707 26621627 : MT_lock_set(&b->theaplock);
2708 :
2709 : /* general BAT sanity */
2710 26592812 : assert(b != NULL);
2711 26592812 : assert(b->batCacheid > 0);
2712 26592812 : assert(b->batCacheid < getBBPsize());
2713 26614145 : assert(b == BBP_cache(b->batCacheid));
2714 26614145 : assert(b->batCount >= b->batInserted);
2715 :
2716 : /* headless */
2717 26614145 : assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
2718 26614145 : assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
2719 :
2720 26614145 : isview1 = b->theap->parentid != b->batCacheid;
2721 26614145 : isview2 = b->tvheap && b->tvheap->parentid != b->batCacheid;
2722 :
2723 26614145 : bbpstatus = BBP_status(b->batCacheid);
2724 : /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
2725 26614145 : assert(((bbpstatus & BBPDELETED) != 0) +
2726 : ((bbpstatus & BBPEXISTING) != 0) +
2727 : ((bbpstatus & BBPNEW) != 0) <= 1);
2728 :
2729 26614145 : assert(b->ttype >= TYPE_void);
2730 26614145 : assert(b->ttype < GDKatomcnt);
2731 26614145 : assert(b->ttype != TYPE_bat);
2732 26614145 : assert(isview1 ||
2733 : b->ttype == TYPE_void ||
2734 : BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
2735 26614145 : assert(isview2 ||
2736 : b->tvheap == NULL ||
2737 : (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
2738 :
2739 26614145 : cmpf = ATOMcompare(b->ttype);
2740 26614145 : nilp = ATOMnilptr(b->ttype);
2741 :
2742 26614145 : assert(isview1 || b->theap->free >= tailsize(b, BATcount(b)));
2743 26614145 : if (b->ttype != TYPE_void) {
2744 24298551 : assert(b->batCount <= b->batCapacity);
2745 24298551 : assert(isview1 || b->theap->size >= b->theap->free);
2746 24298551 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2747 : /* 32 values per 4-byte word (that's not the
2748 : * same as 8 values per byte...) */
2749 4154 : assert(isview1 || b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
2750 : } else
2751 24294397 : assert(isview1 || b->theap->size >> b->tshift >= b->batCapacity);
2752 : }
2753 26614145 : if (!isview1) {
2754 23636127 : strconcat_len(filename, sizeof(filename),
2755 23636127 : BBP_physical(b->theap->parentid),
2756 1487962 : b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
2757 : #if SIZEOF_VAR_T == 8
2758 : b->twidth == 4 ? ".tail4" :
2759 : #endif
2760 : ".tail" : ".tail",
2761 : NULL);
2762 23639938 : assert(strcmp(b->theap->filename, filename) == 0);
2763 : }
2764 26617956 : if (!isview2 && b->tvheap) {
2765 1374236 : strconcat_len(filename, sizeof(filename),
2766 1374236 : BBP_physical(b->tvheap->parentid),
2767 : ".theap",
2768 : NULL);
2769 1373799 : assert(strcmp(b->tvheap->filename, filename) == 0);
2770 : }
2771 :
2772 : /* void, str and blob imply varsized */
2773 26617519 : if (ATOMstorage(b->ttype) == TYPE_str ||
2774 : ATOMstorage(b->ttype) == TYPE_blob)
2775 2026239 : assert(b->tvheap != NULL);
2776 : /* other "known" types are not varsized */
2777 26617519 : if (ATOMstorage(b->ttype) > TYPE_void &&
2778 : ATOMstorage(b->ttype) < TYPE_str)
2779 22281196 : assert(b->tvheap == NULL);
2780 : /* shift and width have a particular relationship */
2781 26617519 : if (ATOMstorage(b->ttype) == TYPE_str)
2782 2024149 : assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
2783 : else
2784 24593370 : assert(b->twidth == ATOMsize(b->ttype));
2785 26617519 : assert(b->tseqbase <= oid_nil);
2786 : /* only oid/void columns can be dense */
2787 26617519 : assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
2788 : /* a column cannot both have and not have NILs */
2789 26617519 : assert(!b->tnil || !b->tnonil);
2790 26617519 : if (b->ttype == TYPE_void) {
2791 2305447 : assert(b->tshift == 0);
2792 2305447 : assert(b->twidth == 0);
2793 2305447 : assert(b->tsorted);
2794 2305447 : if (is_oid_nil(b->tseqbase)) {
2795 226 : assert(b->tvheap == NULL);
2796 226 : assert(BATcount(b) == 0 || !b->tnonil);
2797 226 : assert(BATcount(b) <= 1 || !b->tkey);
2798 226 : assert(b->trevsorted);
2799 : } else {
2800 2305221 : if (b->tvheap != NULL) {
2801 : /* candidate list with exceptions */
2802 21834 : assert(b->batRole == TRANSIENT || b->batRole == SYSTRANS);
2803 21834 : assert(b->tvheap->free <= b->tvheap->size);
2804 21834 : assert(b->tvheap->free >= sizeof(ccand_t));
2805 21834 : assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
2806 21834 : if (negoid_cand(b) && ccand_free(b) > 0) {
2807 4 : const oid *oids = (const oid *) ccand_first(b);
2808 4 : q = ccand_free(b) / SIZEOF_OID;
2809 4 : assert(oids != NULL);
2810 4 : assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
2811 : /* exceptions within range */
2812 4 : assert(oids[0] >= b->tseqbase);
2813 4 : assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
2814 : /* exceptions sorted */
2815 6 : for (p = 1; p < q; p++)
2816 2 : assert(oids[p - 1] < oids[p]);
2817 : }
2818 : }
2819 2305221 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2820 2305221 : assert(BATcount(b) == 0 || !b->tnil);
2821 2305221 : assert(BATcount(b) <= 1 || !b->trevsorted);
2822 2305221 : assert(b->tkey);
2823 2305221 : assert(b->tnonil);
2824 : }
2825 2305447 : MT_lock_unset(&b->theaplock);
2826 22954049 : return;
2827 : }
2828 :
2829 24312072 : BATiter bi = bat_iterator_nolock(b);
2830 :
2831 24312072 : if (BATtdense(b)) {
2832 194669 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2833 194669 : assert(b->ttype == TYPE_oid);
2834 194669 : assert(b->tsorted);
2835 194669 : assert(b->tkey);
2836 194669 : assert(b->tnonil);
2837 194669 : if ((q = b->batCount) != 0) {
2838 153916 : const oid *o = (const oid *) Tloc(b, 0);
2839 153916 : assert(*o == b->tseqbase);
2840 25527923 : for (p = 1; p < q; p++)
2841 25374007 : assert(o[p - 1] + 1 == o[p]);
2842 : }
2843 194669 : MT_lock_unset(&b->theaplock);
2844 194662 : return;
2845 : }
2846 24117403 : assert(1 << b->tshift == b->twidth);
2847 : /* only linear atoms can be sorted */
2848 24117403 : assert(!b->tsorted || ATOMlinear(b->ttype));
2849 24117403 : assert(!b->trevsorted || ATOMlinear(b->ttype));
2850 24117403 : if (ATOMlinear(b->ttype)) {
2851 24118053 : assert(b->tnosorted == 0 ||
2852 : (b->tnosorted > 0 &&
2853 : b->tnosorted < b->batCount));
2854 24118053 : assert(!b->tsorted || b->tnosorted == 0);
2855 24118053 : if (!isview1 &&
2856 24118053 : !isview2 &&
2857 16878165 : !b->tsorted &&
2858 10086334 : b->tnosorted > 0 &&
2859 10086334 : b->tnosorted < b->batCount)
2860 10086337 : assert(cmpf(BUNtail(bi, b->tnosorted - 1),
2861 : BUNtail(bi, b->tnosorted)) > 0);
2862 24118021 : assert(b->tnorevsorted == 0 ||
2863 : (b->tnorevsorted > 0 &&
2864 : b->tnorevsorted < b->batCount));
2865 24118021 : assert(!b->trevsorted || b->tnorevsorted == 0);
2866 24118021 : if (!isview1 &&
2867 21009341 : !isview2 &&
2868 17436920 : !b->trevsorted &&
2869 5627487 : b->tnorevsorted > 0 &&
2870 5627487 : b->tnorevsorted < b->batCount)
2871 5627553 : assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
2872 : BUNtail(bi, b->tnorevsorted)) < 0);
2873 : }
2874 : /* if tkey property set, both tnokey values must be 0 */
2875 24117323 : assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
2876 24117323 : if (!isview1 &&
2877 24117323 : !isview2 &&
2878 18279871 : !b->tkey &&
2879 18279871 : (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
2880 : /* if tkey not set and tnokey indicates a proof of
2881 : * non-key-ness, make sure the tnokey values are in
2882 : * range and indeed provide a proof */
2883 434199 : assert(b->tnokey[0] != b->tnokey[1]);
2884 434199 : assert(b->tnokey[0] < b->batCount);
2885 434199 : assert(b->tnokey[1] < b->batCount);
2886 434199 : assert(cmpf(BUNtail(bi, b->tnokey[0]),
2887 : BUNtail(bi, b->tnokey[1])) == 0);
2888 : }
2889 : /* var heaps must have sane sizes */
2890 24117128 : assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
2891 :
2892 24117128 : if (!b->tkey && !b->tsorted && !b->trevsorted &&
2893 18712241 : !b->tnonil && !b->tnil) {
2894 : /* nothing more to prove */
2895 18147942 : MT_lock_unset(&b->theaplock);
2896 18145976 : return;
2897 : }
2898 :
2899 : /* only do a scan if the bat is not a view */
2900 5969186 : if (!isview1 && !isview2) {
2901 4715073 : const ValRecord *prop;
2902 4715073 : const void *maxval = NULL;
2903 4715073 : const void *minval = NULL;
2904 4715073 : const void *maxbound = NULL;
2905 4715073 : const void *minbound = NULL;
2906 4715073 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
2907 4712683 : bool seenmax = false, seenmin = false;
2908 4712683 : bool seennil = false;
2909 :
2910 4712683 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL)
2911 12 : maxbound = VALptr(prop);
2912 4711605 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL)
2913 1228 : minbound = VALptr(prop);
2914 4712430 : if (b->tmaxpos != BUN_NONE) {
2915 440776 : assert(b->tmaxpos < BATcount(b));
2916 440776 : maxval = BUNtail(bi, b->tmaxpos);
2917 440726 : assert(cmpf(maxval, nilp) != 0);
2918 : }
2919 4711595 : if (b->tminpos != BUN_NONE) {
2920 401588 : assert(b->tminpos < BATcount(b));
2921 401588 : minval = BUNtail(bi, b->tminpos);
2922 401574 : assert(cmpf(minval, nilp) != 0);
2923 : }
2924 4710813 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2925 : /* for now, don't do extra checks for bit mask */
2926 : ;
2927 4706717 : } else if (b->tsorted || b->trevsorted || !b->tkey) {
2928 : /* if sorted (either way), or we don't have to
2929 : * prove uniqueness, we can do a simple
2930 : * scan */
2931 : /* only call compare function if we have to */
2932 4690179 : bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
2933 :
2934 2381540324 : BATloop(b, p, q) {
2935 2377047862 : valp = BUNtail(bi, p);
2936 2377049588 : bool isnil = cmpf(valp, nilp) == 0;
2937 2278060208 : assert(!isnil || !notnull);
2938 2278060208 : assert(!b->tnonil || !isnil);
2939 2278060208 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
2940 2278060208 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
2941 2278060208 : if (minbound && !isnil) {
2942 30 : cmp = cmpf(minbound, valp);
2943 30 : assert(cmp <= 0);
2944 : }
2945 2278060208 : if (maxbound && !isnil) {
2946 30 : cmp = cmpf(maxbound, valp);
2947 30 : assert(cmp > 0);
2948 : }
2949 2278060208 : if (maxval && !isnil) {
2950 27989280 : cmp = cmpf(maxval, valp);
2951 27987810 : assert(cmp >= 0);
2952 27987810 : seenmax |= cmp == 0;
2953 : }
2954 2278058738 : if (minval && !isnil) {
2955 3126545 : cmp = cmpf(minval, valp);
2956 3126163 : assert(cmp <= 0);
2957 3126163 : seenmin |= cmp == 0;
2958 : }
2959 2278058356 : if (prev && cmpprv) {
2960 1260267831 : cmp = cmpf(prev, valp);
2961 1359255386 : assert(!b->tsorted || cmp <= 0);
2962 1359255386 : assert(!b->trevsorted || cmp >= 0);
2963 1359255386 : assert(!b->tkey || cmp != 0);
2964 : }
2965 2377045911 : seennil |= isnil;
2966 2377045911 : if (seennil && !cmpprv &&
2967 195839 : maxval == NULL && minval == NULL &&
2968 195759 : minbound == NULL && maxbound == NULL) {
2969 : /* we've done all the checking
2970 : * we can do */
2971 : break;
2972 : }
2973 2376850145 : prev = valp;
2974 : }
2975 : } else { /* b->tkey && !b->tsorted && !b->trevsorted */
2976 : /* we need to check for uniqueness the hard
2977 : * way (i.e. using a hash table) */
2978 16538 : const char *nme = BBP_physical(b->batCacheid);
2979 16538 : Hash *hs = NULL;
2980 16538 : BUN mask;
2981 :
2982 16538 : if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
2983 0 : TRC_WARNING(BAT_, "Cannot allocate hash table\n");
2984 0 : goto abort_check;
2985 : }
2986 16538 : if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heaplink.filename) ||
2987 16538 : snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heapbckt.filename)) {
2988 : /* cannot happen, see comment in gdk.h
2989 : * about sizes near definition of
2990 : * BBPINIT */
2991 0 : GDKfree(hs);
2992 0 : TRC_CRITICAL(BAT_, "Heap filename is too large\n");
2993 0 : goto abort_check;
2994 : }
2995 16538 : if (ATOMsize(b->ttype) == 1)
2996 : mask = (BUN) 1 << 8;
2997 16532 : else if (ATOMsize(b->ttype) == 2)
2998 : mask = (BUN) 1 << 16;
2999 : else
3000 16530 : mask = HASHmask(b->batCount);
3001 16538 : hs->heapbckt.parentid = b->batCacheid;
3002 16538 : hs->heaplink.parentid = b->batCacheid;
3003 16538 : if ((hs->heaplink.farmid = BBPselectfarm(
3004 16538 : TRANSIENT, b->ttype, hashheap)) < 0 ||
3005 16538 : (hs->heapbckt.farmid = BBPselectfarm(
3006 33076 : TRANSIENT, b->ttype, hashheap)) < 0 ||
3007 16538 : HASHnew(hs, b->ttype, BATcount(b),
3008 : mask, BUN_NONE, false) != GDK_SUCCEED) {
3009 0 : GDKfree(hs);
3010 0 : TRC_WARNING(BAT_, "Cannot allocate hash table\n");
3011 0 : goto abort_check;
3012 : }
3013 48862602 : BATloop(b, p, q) {
3014 48846064 : BUN hb;
3015 48846064 : BUN prb;
3016 48846064 : valp = BUNtail(bi, p);
3017 48844174 : bool isnil = cmpf(valp, nilp) == 0;
3018 48863067 : assert(!isnil || !notnull);
3019 48863067 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
3020 48863067 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
3021 48863067 : if (minbound && !isnil) {
3022 0 : cmp = cmpf(minbound, valp);
3023 0 : assert(cmp <= 0);
3024 : }
3025 48863067 : if (maxbound && !isnil) {
3026 0 : cmp = cmpf(maxbound, valp);
3027 0 : assert(cmp > 0);
3028 : }
3029 48863067 : if (maxval && !isnil) {
3030 1627 : cmp = cmpf(maxval, valp);
3031 1627 : assert(cmp >= 0);
3032 1627 : seenmax |= cmp == 0;
3033 : }
3034 48863067 : if (minval && !isnil) {
3035 1627 : cmp = cmpf(minval, valp);
3036 1627 : assert(cmp <= 0);
3037 1627 : seenmin |= cmp == 0;
3038 : }
3039 48863067 : prb = HASHprobe(hs, valp);
3040 48924744 : for (hb = HASHget(hs, prb);
3041 56659804 : hb != BUN_NONE;
3042 7735060 : hb = HASHgetlink(hs, hb))
3043 7863423 : if (cmpf(valp, BUNtail(bi, hb)) == 0)
3044 0 : assert(!b->tkey);
3045 48796381 : HASHputlink(hs, p, HASHget(hs, prb));
3046 48811855 : HASHput(hs, prb, p);
3047 48846064 : assert(!b->tnonil || !isnil);
3048 48846064 : seennil |= isnil;
3049 : }
3050 16538 : HEAPfree(&hs->heaplink, true);
3051 16538 : HEAPfree(&hs->heapbckt, true);
3052 16538 : GDKfree(hs);
3053 : }
3054 4708861 : abort_check:
3055 4708861 : GDKclrerr();
3056 4708851 : assert(maxval == NULL || seenmax);
3057 4708851 : assert(minval == NULL || seenmin);
3058 4708851 : assert(!b->tnil || seennil);
3059 : }
3060 5962964 : MT_lock_unset(&b->theaplock);
3061 : }
|