Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. Nes
15 : * @* BAT Module
16 : * In this Chapter we describe the BAT implementation in more detail.
17 : * The routines mentioned are primarily meant to simplify the library
18 : * implementation.
19 : *
20 : * @+ BAT Construction
21 : * BATs are implemented in several blocks of memory, prepared for disk
22 : * storage and easy shipment over a network.
23 : *
24 : * The BAT starts with a descriptor, which indicates the required BAT
25 : * library version and the BAT administration details. In particular,
26 : * it describes the binary relationship maintained and the location of
27 : * fields required for storage.
28 : *
29 : * The general layout of the BAT in this implementation is as follows.
30 : * Each BAT comes with a heap for the loc-size buns and, optionally,
31 : * with heaps to manage the variable-sized data items of both
32 : * dimensions. The buns are assumed to be stored as loc-size objects.
33 : * This is essentially an array of structs to store the associations.
34 : * The size is determined at BAT creation time using an upper bound on
35 : * the number of elements to be accommodated. In case of overflow,
36 : * its storage space is extended automatically.
37 : *
38 : * The capacity of a BAT places an upper limit on the number of BUNs
39 : * to be stored initially. The actual space set aside may be quite
40 : * large. Moreover, the size is aligned to int boundaries to speedup
41 : * access and avoid some machine limitations.
42 : *
43 : * Initialization of the variable parts rely on type specific routines
44 : * called atomHeap.
45 : */
46 : #include "monetdb_config.h"
47 : #include "gdk.h"
48 : #include "gdk_private.h"
49 : #include "mutils.h"
50 :
51 : #ifdef ALIGN
52 : #undef ALIGN
53 : #endif
54 : #define ALIGN(n,b) ((b)?(b)*(1+(((n)-1)/(b))):n)
55 :
56 : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
57 :
58 : BAT *
59 24460576 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
60 : {
61 24460576 : bat bid;
62 24460576 : BAT *bn;
63 24460576 : Heap *h = NULL, *vh = NULL;
64 :
65 : /*
66 : * Alloc space for the BAT and its dependent records.
67 : */
68 24460576 : assert(tt >= 0);
69 :
70 24460576 : if (heapnames) {
71 12362519 : if ((h = GDKmalloc(sizeof(Heap))) == NULL) {
72 : return NULL;
73 : }
74 24804958 : *h = (Heap) {
75 12407942 : .farmid = BBPselectfarm(role, tt, offheap),
76 : .dirty = true,
77 : .refs = ATOMIC_VAR_INIT(1),
78 : };
79 :
80 12397016 : if (ATOMneedheap(tt)) {
81 1072171 : if ((vh = GDKmalloc(sizeof(Heap))) == NULL) {
82 0 : GDKfree(h);
83 0 : return NULL;
84 : }
85 1080863 : *vh = (Heap) {
86 1080479 : .farmid = BBPselectfarm(role, tt, varheap),
87 : .dirty = true,
88 : .refs = ATOMIC_VAR_INIT(1),
89 : };
90 : }
91 : }
92 :
93 24503765 : bid = BBPallocbat(tt);
94 24585064 : if (bid == 0) {
95 0 : GDKfree(h);
96 0 : GDKfree(vh);
97 0 : return NULL;
98 : }
99 24585064 : bn = BBP_desc(bid);
100 :
101 : /*
102 : * Fill in basic column info
103 : */
104 49103249 : *bn = (BAT) {
105 : .batCacheid = bid,
106 : .hseqbase = hseq,
107 :
108 : .ttype = tt,
109 : .tkey = true,
110 : .tnonil = true,
111 : .tnil = false,
112 24518185 : .tsorted = ATOMlinear(tt),
113 : .trevsorted = ATOMlinear(tt),
114 24518185 : .tascii = tt == TYPE_str,
115 : .tseqbase = oid_nil,
116 : .tminpos = BUN_NONE,
117 : .tmaxpos = BUN_NONE,
118 : .tunique_est = 0.0,
119 :
120 : .batRole = role,
121 : .batTransient = true,
122 : .batRestricted = BAT_WRITE,
123 : .theap = h,
124 : .tvheap = vh,
125 24585064 : .creator_tid = MT_getpid(),
126 : };
127 :
128 24518185 : if (bn->theap) {
129 12381770 : bn->theap->parentid = bn->batCacheid;
130 12381770 : const char *nme = BBP_physical(bn->batCacheid);
131 12381770 : settailname(bn->theap, nme, tt, width);
132 :
133 12405215 : if (bn->tvheap) {
134 1074769 : bn->tvheap->parentid = bn->batCacheid;
135 1074769 : strconcat_len(bn->tvheap->filename,
136 : sizeof(bn->tvheap->filename),
137 : nme, ".theap", NULL);
138 : }
139 : }
140 24547143 : char name[MT_NAME_LEN];
141 24547143 : snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
142 24547143 : MT_lock_init(&bn->theaplock, name);
143 24485612 : snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
144 24485612 : MT_lock_init(&bn->batIdxLock, name);
145 24533813 : snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
146 24533813 : MT_rwlock_init(&bn->thashlock, name);
147 24431661 : return bn;
148 : }
149 :
150 : uint8_t
151 12381010 : ATOMelmshift(int sz)
152 : {
153 12381010 : uint8_t sh;
154 12381010 : int i = sz >> 1;
155 :
156 22786169 : for (sh = 0; i != 0; sh++) {
157 10405159 : i >>= 1;
158 : }
159 12381010 : return sh;
160 : }
161 :
162 :
163 : void
164 12354503 : BATsetdims(BAT *b, uint16_t width)
165 : {
166 12354503 : b->twidth = b->ttype == TYPE_str ? width > 0 ? width : 1 : ATOMsize(b->ttype);
167 12354503 : b->tshift = ATOMelmshift(b->twidth);
168 12354503 : assert_shift_width(b->tshift, b->twidth);
169 12354503 : }
170 :
171 : const char *
172 1265 : BATtailname(const BAT *b)
173 : {
174 1265 : if (b->ttype == TYPE_str) {
175 340 : switch (b->twidth) {
176 250 : case 1:
177 250 : return "tail1";
178 75 : case 2:
179 75 : return "tail2";
180 15 : case 4:
181 : #if SIZEOF_VAR_T == 8
182 15 : return "tail4";
183 : case 8:
184 : #endif
185 : break;
186 : default:
187 0 : MT_UNREACHABLE();
188 : }
189 : }
190 : return "tail";
191 : }
192 :
193 : void
194 12452397 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
195 : {
196 12452397 : if (tt == TYPE_str) {
197 1104337 : switch (width) {
198 1043150 : case 0:
199 : case 1:
200 1043150 : strconcat_len(tail->filename,
201 : sizeof(tail->filename), physnme,
202 : ".tail1", NULL);
203 1043150 : return;
204 56007 : case 2:
205 56007 : strconcat_len(tail->filename,
206 : sizeof(tail->filename), physnme,
207 : ".tail2", NULL);
208 56007 : return;
209 5180 : case 4:
210 : #if SIZEOF_VAR_T == 8
211 5180 : strconcat_len(tail->filename,
212 : sizeof(tail->filename), physnme,
213 : ".tail4", NULL);
214 5180 : return;
215 : case 8:
216 : #endif
217 : break;
218 : default:
219 0 : MT_UNREACHABLE();
220 : }
221 : }
222 11348060 : strconcat_len(tail->filename, sizeof(tail->filename), physnme,
223 : ".tail", NULL);
224 : }
225 :
226 : /*
227 : * @- BAT allocation
228 : * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
229 : * We now initialize new BATs with their heapname such that the
230 : * modified HEAPalloc/HEAPextend primitives can possibly use memory
231 : * mapped files as temporary heap storage.
232 : *
233 : * In case of huge bats, we want HEAPalloc to write a file to disk,
234 : * and memory map it. To make this possible, we must provide it with
235 : * filenames.
236 : */
237 : BAT *
238 12371334 : COLnew2(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
239 : {
240 12371334 : BAT *bn;
241 :
242 12371334 : assert(cap <= BUN_MAX);
243 12371334 : assert(hseq <= oid_nil);
244 12371334 : ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
245 :
246 : /* round up to multiple of BATTINY */
247 12371334 : if (cap < BUN_MAX - BATTINY)
248 12356532 : cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
249 12371334 : if (ATOMstorage(tt) == TYPE_msk) {
250 177709 : if (cap < 8*BATTINY)
251 : cap = 8*BATTINY;
252 : else
253 59418 : cap = (cap + 31) & ~(BUN)31;
254 12193625 : } else if (cap < BATTINY)
255 : cap = BATTINY;
256 : /* limit the size */
257 3636010 : if (cap > BUN_MAX)
258 : cap = BUN_MAX;
259 :
260 12371334 : bn = BATcreatedesc(hseq, tt, true, role, width);
261 12338308 : if (bn == NULL)
262 : return NULL;
263 :
264 12338308 : BATsetdims(bn, width);
265 12357265 : bn->batCapacity = cap;
266 :
267 12357265 : if (ATOMstorage(tt) == TYPE_msk)
268 177676 : cap /= 8; /* 8 values per byte */
269 :
270 : /* alloc the main heaps */
271 12357265 : if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
272 0 : goto bailout;
273 : }
274 :
275 12396587 : if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
276 0 : HEAPfree(bn->theap, true);
277 0 : goto bailout;
278 : }
279 12389309 : if (BBPcacheit(bn, true) != GDK_SUCCEED) {
280 : /* cannot happen, function always returns success */
281 0 : goto bailout;
282 : }
283 12408180 : TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
284 : return bn;
285 0 : bailout:
286 0 : BBPclear(bn->batCacheid);
287 0 : return NULL;
288 : }
289 :
290 : BAT *
291 12067804 : COLnew(oid hseq, int tt, BUN cap, role_t role)
292 : {
293 12067804 : return COLnew2(hseq, tt, cap, role, 0);
294 : }
295 :
296 : BAT *
297 5704218 : BATdense(oid hseq, oid tseq, BUN cnt)
298 : {
299 5704218 : BAT *bn;
300 :
301 5704218 : bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
302 5713001 : if (bn != NULL) {
303 5713001 : BATtseqbase(bn, tseq);
304 5711304 : BATsetcount(bn, cnt);
305 5707311 : TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
306 : "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
307 : ALGOBATPAR(bn));
308 : }
309 5707311 : return bn;
310 : }
311 :
312 : /*
313 : * If the BAT runs out of storage for BUNS it will reallocate space.
314 : * For memory mapped BATs we simple extend the administration after
315 : * having an assurance that the BAT still can be safely stored away.
316 : */
317 : BUN
318 23801 : BATgrows(BAT *b)
319 : {
320 23801 : BUN oldcap, newcap;
321 :
322 23801 : BATcheck(b, 0);
323 :
324 23801 : newcap = oldcap = BATcapacity(b);
325 23801 : if (newcap < BATTINY)
326 : newcap = 2 * BATTINY;
327 23883 : else if (newcap < 10 * BATTINY)
328 22703 : newcap = 4 * newcap;
329 1180 : else if (newcap < 50 * BATTINY)
330 687 : newcap = 2 * newcap;
331 493 : else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
332 491 : newcap = (BUN) ((double) newcap * BATMARGIN);
333 : else
334 : newcap = BUN_MAX;
335 23883 : if (newcap == oldcap) {
336 0 : if (newcap <= BUN_MAX - 10)
337 0 : newcap += 10;
338 : else
339 : newcap = BUN_MAX;
340 : }
341 23801 : if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
342 2 : newcap = (newcap + 31) & ~(BUN)31;
343 : return newcap;
344 : }
345 :
346 : /*
347 : * The routine should ensure that the BAT keeps its location in the
348 : * BAT buffer.
349 : *
350 : * Overflow in the other heaps are dealt with in the atom routines.
351 : * Here we merely copy their references into the new administration
352 : * space.
353 : */
354 : gdk_return
355 185772 : BATextend(BAT *b, BUN newcap)
356 : {
357 185772 : size_t theap_size;
358 185772 : gdk_return rc = GDK_SUCCEED;
359 :
360 185772 : assert(newcap <= BUN_MAX);
361 185772 : BATcheck(b, GDK_FAIL);
362 : /*
363 : * The main issue is to properly predict the new BAT size.
364 : * storage overflow. The assumption taken is that capacity
365 : * overflow is rare. It is changed only when the position of
366 : * the next available BUN surpasses the free area marker. Be
367 : * aware that the newcap should be greater than the old value,
368 : * otherwise you may easily corrupt the administration of
369 : * malloc.
370 : */
371 185772 : MT_lock_set(&b->theaplock);
372 186300 : if (newcap <= BATcapacity(b)) {
373 151718 : MT_lock_unset(&b->theaplock);
374 151760 : return GDK_SUCCEED;
375 : }
376 :
377 34582 : if (ATOMstorage(b->ttype) == TYPE_msk) {
378 1138 : newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
379 1138 : theap_size = (size_t) (newcap / 8); /* in bytes */
380 : } else {
381 33444 : theap_size = (size_t) newcap << b->tshift;
382 : }
383 :
384 34582 : if (b->theap->base) {
385 34582 : TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
386 : b->theap->filename, b->theap->size, theap_size);
387 34582 : rc = HEAPgrow(&b->theap, theap_size, b->batRestricted == BAT_READ);
388 34627 : if (rc == GDK_SUCCEED)
389 34627 : b->batCapacity = newcap;
390 : } else {
391 0 : b->batCapacity = newcap;
392 : }
393 34627 : MT_lock_unset(&b->theaplock);
394 :
395 34624 : return rc;
396 : }
397 :
398 :
399 :
400 : /*
401 : * @+ BAT destruction
402 : * BATclear quickly removes all elements from a BAT. It must respect
403 : * the transaction rules; so stable elements must be moved to the
404 : * "deleted" section of the BAT (they cannot be fully deleted
405 : * yet). For the elements that really disappear, we must free
406 : * heapspace. As an optimization, in the case of no stable elements, we quickly empty
407 : * the heaps by copying a standard small empty image over them.
408 : */
409 : gdk_return
410 427 : BATclear(BAT *b, bool force)
411 : {
412 427 : BUN p, q;
413 :
414 427 : BATcheck(b, GDK_FAIL);
415 :
416 427 : if (!force && b->batInserted > 0) {
417 0 : GDKerror("cannot clear committed BAT\n");
418 0 : return GDK_FAIL;
419 : }
420 :
421 427 : TRC_DEBUG(ALGO, ALGOBATFMT "\n", ALGOBATPAR(b));
422 :
423 : /* kill all search accelerators */
424 427 : HASHdestroy(b);
425 427 : OIDXdestroy(b);
426 427 : STRMPdestroy(b);
427 427 : RTREEdestroy(b);
428 427 : PROPdestroy(b);
429 :
430 427 : bat tvp = 0;
431 :
432 : /* we must dispose of all inserted atoms */
433 427 : MT_lock_set(&b->theaplock);
434 427 : if (force && BATatoms[b->ttype].atomDel == NULL) {
435 420 : assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
436 : /* no stable elements: we do a quick heap clean */
437 : /* need to clean heap which keeps data even though the
438 : BUNs got removed. This means reinitialize when
439 : free > 0
440 : */
441 420 : if (b->tvheap && b->tvheap->free > 0) {
442 21 : Heap *th = GDKmalloc(sizeof(Heap));
443 :
444 21 : if (th == NULL) {
445 0 : MT_lock_unset(&b->theaplock);
446 0 : return GDK_FAIL;
447 : }
448 21 : *th = (Heap) {
449 21 : .farmid = b->tvheap->farmid,
450 21 : .parentid = b->tvheap->parentid,
451 : .dirty = true,
452 21 : .hasfile = b->tvheap->hasfile,
453 : .refs = ATOMIC_VAR_INIT(1),
454 : };
455 21 : strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
456 21 : if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
457 0 : MT_lock_unset(&b->theaplock);
458 0 : return GDK_FAIL;
459 : }
460 21 : tvp = b->tvheap->parentid;
461 21 : HEAPdecref(b->tvheap, false);
462 21 : b->tvheap = th;
463 : }
464 : } else {
465 : /* do heap-delete of all inserted atoms */
466 7 : void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
467 :
468 : /* TYPE_str has no del method, so we shouldn't get here */
469 7 : assert(tatmdel == NULL || b->twidth == sizeof(var_t));
470 0 : if (tatmdel) {
471 0 : BATiter bi = bat_iterator_nolock(b);
472 :
473 0 : for (p = b->batInserted, q = BATcount(b); p < q; p++)
474 0 : (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
475 0 : b->tvheap->dirty = true;
476 : }
477 : }
478 :
479 427 : b->batInserted = 0;
480 427 : b->batCount = 0;
481 427 : if (b->ttype == TYPE_void)
482 0 : b->batCapacity = 0;
483 427 : b->theap->free = 0;
484 427 : BAThseqbase(b, 0);
485 427 : BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
486 427 : b->theap->dirty = true;
487 427 : b->tnonil = true;
488 427 : b->tnil = false;
489 427 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
490 427 : b->tnosorted = b->tnorevsorted = 0;
491 427 : b->tkey = true;
492 427 : b->tnokey[0] = b->tnokey[1] = 0;
493 427 : b->tminpos = b->tmaxpos = BUN_NONE;
494 427 : b->tunique_est = 0;
495 427 : MT_lock_unset(&b->theaplock);
496 427 : if (tvp != 0 && tvp != b->batCacheid)
497 0 : BBPrelease(tvp);
498 : return GDK_SUCCEED;
499 : }
500 :
501 : /* free a cached BAT; leave the bat descriptor cached */
502 : void
503 576668 : BATfree(BAT *b)
504 : {
505 576668 : if (b == NULL)
506 : return;
507 :
508 : /* deallocate all memory for a bat */
509 576668 : MT_rwlock_rdlock(&b->thashlock);
510 576669 : BUN nunique = BUN_NONE;
511 576669 : if (b->thash && b->thash != (Hash *) 1) {
512 2011 : nunique = b->thash->nunique;
513 : }
514 576669 : MT_rwlock_rdunlock(&b->thashlock);
515 576669 : HASHfree(b);
516 576668 : OIDXfree(b);
517 576669 : STRMPfree(b);
518 576669 : RTREEfree(b);
519 576668 : MT_lock_set(&b->theaplock);
520 576668 : if (nunique != BUN_NONE) {
521 2011 : b->tunique_est = (double) nunique;
522 : }
523 : /* wait until there are no other references to the heap; a
524 : * reference is possible in e.g. BBPsync that uses a
525 : * bat_iterator directly on the BBP_desc, i.e. without fix */
526 576668 : while (b->theap && (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1) {
527 0 : MT_lock_unset(&b->theaplock);
528 0 : MT_sleep_ms(1);
529 576668 : MT_lock_set(&b->theaplock);
530 : }
531 576668 : if (b->theap) {
532 576668 : assert((ATOMIC_GET(&b->theap->refs) & HEAPREFS) == 1);
533 576668 : assert(b->theap->parentid == b->batCacheid);
534 576668 : HEAPfree(b->theap, false);
535 : }
536 : /* wait until there are no other references to the heap; a
537 : * reference is possible in e.g. BBPsync that uses a
538 : * bat_iterator directly on the BBP_desc, i.e. without fix */
539 30805 : while (b->tvheap && (ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) > 1) {
540 0 : MT_lock_unset(&b->theaplock);
541 0 : MT_sleep_ms(1);
542 576669 : MT_lock_set(&b->theaplock);
543 : }
544 576669 : if (b->tvheap) {
545 30805 : assert((ATOMIC_GET(&b->tvheap->refs) & HEAPREFS) == 1);
546 30805 : assert(b->tvheap->parentid == b->batCacheid);
547 30805 : HEAPfree(b->tvheap, false);
548 : }
549 576669 : MT_lock_unset(&b->theaplock);
550 : }
551 :
552 : /* free a cached BAT descriptor */
553 : void
554 24583638 : BATdestroy(BAT *b)
555 : {
556 24583638 : if (b->tvheap) {
557 29901 : GDKfree(b->tvheap);
558 : }
559 24583638 : PROPdestroy_nolock(b);
560 24553788 : MT_lock_destroy(&b->theaplock);
561 24552927 : MT_lock_destroy(&b->batIdxLock);
562 24569774 : MT_rwlock_destroy(&b->thashlock);
563 24581410 : if (b->theap) {
564 573649 : GDKfree(b->theap);
565 : }
566 24579083 : if (b->oldtail) {
567 0 : ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
568 : /* the bat has not been committed, so we cannot remove
569 : * the old tail file */
570 0 : HEAPdecref(b->oldtail, false);
571 0 : b->oldtail = NULL;
572 : }
573 24579083 : *b = (BAT) {
574 : .batCacheid = 0,
575 : };
576 24579083 : }
577 :
578 : /*
579 : * @+ BAT copying
580 : *
581 : * BAT copying is an often used operation. So it deserves attention.
582 : * When making a copy of a BAT, the following aspects are of
583 : * importance:
584 : *
585 : * - the requested head and tail types. The purpose of the copy may be
586 : * to slightly change these types (e.g. void <-> oid). We may also
587 : * remap between types as long as they share the same
588 : * ATOMstorage(type), i.e. the types have the same physical
589 : * implementation. We may even want to allow 'dirty' trick such as
590 : * viewing a flt-column suddenly as int.
591 : *
592 : * To allow such changes, the desired column-types is a
593 : * parameter of COLcopy.
594 : *
595 : * - access mode. If we want a read-only copy of a read-only BAT, a
596 : * VIEW may do (in this case, the user may be after just an
597 : * independent BAT header and id). This is indicated by the
598 : * parameter (writable = FALSE).
599 : *
600 : * In other cases, we really want an independent physical copy
601 : * (writable = TRUE). Changing the mode to BAT_WRITE will be a
602 : * zero-cost operation if the BAT was copied with (writable = TRUE).
603 : *
604 : * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
605 : * TRUE).
606 : *
607 : * In these cases the copy becomes a logical view on the original,
608 : * which ensures that the original cannot be modified or destroyed
609 : * (which could affect the shared heaps).
610 : */
611 : static bool
612 622 : wrongtype(int t1, int t2)
613 : {
614 : /* check if types are compatible. be extremely forgiving */
615 622 : if (t1 != TYPE_void) {
616 623 : t1 = ATOMtype(ATOMstorage(t1));
617 623 : t2 = ATOMtype(ATOMstorage(t2));
618 623 : if (t1 != t2) {
619 564 : if (ATOMvarsized(t1) ||
620 564 : ATOMvarsized(t2) ||
621 564 : t1 == TYPE_msk || t2 == TYPE_msk ||
622 564 : ATOMsize(t1) != ATOMsize(t2))
623 0 : return true;
624 : }
625 : }
626 : return false;
627 : }
628 :
629 : /*
630 : * There are four main implementation cases:
631 : * (1) we are allowed to return a view (zero effort),
632 : * (2) the result is void,void (zero effort),
633 : * (3) we can copy the heaps (memcopy, or even VM page sharing)
634 : * (4) we must insert BUN-by-BUN into the result (fallback)
635 : * The latter case is still optimized for the case that the result
636 : * is bat[void,T] for a simple fixed-size type T. In that case we
637 : * do inline array[T] inserts.
638 : */
639 : BAT *
640 53075 : COLcopy(BAT *b, int tt, bool writable, role_t role)
641 : {
642 53075 : bool slowcopy = false;
643 53075 : BAT *bn = NULL;
644 53075 : BATiter bi;
645 53075 : char strhash[GDK_STRHASHSIZE];
646 :
647 53075 : BATcheck(b, NULL);
648 :
649 : /* maybe a bit ugly to change the requested bat type?? */
650 53075 : if (b->ttype == TYPE_void && !writable)
651 53075 : tt = TYPE_void;
652 :
653 53075 : if (tt != b->ttype && wrongtype(tt, b->ttype)) {
654 0 : GDKerror("wrong tail-type requested\n");
655 0 : return NULL;
656 : }
657 :
658 : /* in case of a string bat, we save the string heap hash table
659 : * while we have the lock so that we can restore it in the copy;
660 : * this is because during our operation, a parallel thread could
661 : * be adding strings to the vheap which would modify the hash
662 : * table and that would result in buckets containing values
663 : * beyond the original vheap that we're copying */
664 53075 : MT_lock_set(&b->theaplock);
665 53122 : BAT *pb = NULL, *pvb = NULL;
666 53122 : if (b->theap->parentid != b->batCacheid) {
667 11525 : pb = BBP_desc(b->theap->parentid);
668 11525 : MT_lock_set(&pb->theaplock);
669 : }
670 53122 : if (b->tvheap &&
671 16730 : b->tvheap->parentid != b->batCacheid &&
672 11623 : b->tvheap->parentid != b->theap->parentid) {
673 10952 : pvb = BBP_desc(b->tvheap->parentid);
674 10952 : MT_lock_set(&pvb->theaplock);
675 : }
676 53121 : bi = bat_iterator_nolock(b);
677 53121 : if (ATOMstorage(b->ttype) == TYPE_str && b->tvheap->free >= GDK_STRHASHSIZE)
678 12454 : memcpy(strhash, b->tvheap->base, GDK_STRHASHSIZE);
679 :
680 53121 : bat_iterator_incref(&bi);
681 53123 : if (pvb)
682 10955 : MT_lock_unset(&pvb->theaplock);
683 53114 : if (pb)
684 11520 : MT_lock_unset(&pb->theaplock);
685 53123 : MT_lock_unset(&b->theaplock);
686 :
687 : /* first try case (1); create a view, possibly with different
688 : * atom-types */
689 53114 : if (!writable &&
690 53114 : role == TRANSIENT &&
691 23058 : bi.restricted == BAT_READ &&
692 18636 : ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
693 18636 : (bi.h == NULL ||
694 18636 : bi.h->parentid == b->batCacheid ||
695 3597 : BBP_desc(bi.h->parentid)->batRestricted == BAT_READ)) {
696 18636 : bn = VIEWcreate(b->hseqbase, b, 0, BUN_MAX);
697 18636 : if (bn == NULL) {
698 0 : goto bunins_failed;
699 : }
700 18636 : if (tt != bn->ttype) {
701 56 : bn->ttype = tt;
702 56 : if (bn->tvheap && !ATOMvarsized(tt)) {
703 0 : if (bn->tvheap->parentid != bn->batCacheid)
704 0 : BBPrelease(bn->tvheap->parentid);
705 0 : HEAPdecref(bn->tvheap, false);
706 0 : bn->tvheap = NULL;
707 : }
708 56 : bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
709 : }
710 18636 : bat_iterator_end(&bi);
711 18636 : return bn;
712 : } else {
713 : /* check whether we need case (4); BUN-by-BUN copy (by
714 : * setting slowcopy to true) */
715 34478 : if (ATOMsize(tt) != ATOMsize(bi.type)) {
716 : /* oops, void materialization */
717 : slowcopy = true;
718 33910 : } else if (bi.h && bi.h->parentid != b->batCacheid &&
719 7917 : BATcapacity(BBP_desc(bi.h->parentid)) > bi.count + bi.count) {
720 : /* reduced slice view: do not copy too much
721 : * garbage */
722 : slowcopy = true;
723 26201 : } else if (bi.vh && bi.vh->parentid != b->batCacheid &&
724 10388 : BATcount(BBP_desc(bi.vh->parentid)) > bi.count + bi.count) {
725 : /* reduced vheap view: do not copy too much
726 : * garbage; this really is a heuristic since the
727 : * vheap could be used completely, even if the
728 : * offset heap is only (less than) half the size
729 : * of the parent's offset heap */
730 10556 : slowcopy = true;
731 : }
732 :
733 34478 : bn = COLnew2(b->hseqbase, tt, bi.count, role, bi.width);
734 34441 : if (bn == NULL) {
735 0 : goto bunins_failed;
736 : }
737 34441 : if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
738 : /* this combination can happen since the last
739 : * argument of COLnew2 not being zero triggers a
740 : * skip in the allocation of the tvheap */
741 12699 : if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
742 0 : goto bunins_failed;
743 : }
744 : }
745 :
746 34457 : if (tt == TYPE_void) {
747 : /* case (2): a void,void result => nothing to
748 : * copy! */
749 1161 : bn->theap->free = 0;
750 33296 : } else if (!slowcopy) {
751 : /* case (3): just copy the heaps */
752 22755 : if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
753 0 : goto bunins_failed;
754 : }
755 22752 : memcpy(bn->theap->base, bi.base, bi.hfree);
756 22752 : bn->theap->free = bi.hfree;
757 22752 : bn->theap->dirty = true;
758 22752 : if (bn->tvheap) {
759 9736 : memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
760 9736 : bn->tvheap->free = bi.vhfree;
761 9736 : bn->tvheap->dirty = true;
762 9736 : bn->tascii = bi.ascii;
763 9736 : if (ATOMstorage(b->ttype) == TYPE_str && bi.vhfree >= GDK_STRHASHSIZE)
764 8901 : memcpy(bn->tvheap->base, strhash, GDK_STRHASHSIZE);
765 : }
766 :
767 : /* make sure we use the correct capacity */
768 22752 : if (ATOMstorage(bn->ttype) == TYPE_msk)
769 32 : bn->batCapacity = (BUN) (bn->theap->size * 8);
770 22720 : else if (bn->ttype)
771 22720 : bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
772 : else
773 0 : bn->batCapacity = 0;
774 21090 : } else if (tt != TYPE_void || ATOMextern(tt)) {
775 : /* case (4): one-by-one BUN insert (really slow) */
776 10541 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
777 :
778 8117112 : TIMEOUT_LOOP_IDX_DECL(p, bi.count, qry_ctx) {
779 8095560 : const void *t = BUNtail(bi, p);
780 :
781 8080008 : if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
782 0 : goto bunins_failed;
783 : }
784 : }
785 10554 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
786 10549 : bn->theap->dirty |= bi.count > 0;
787 : } else if (tt != TYPE_void && bi.type == TYPE_void) {
788 : /* case (4): optimized for unary void
789 : * materialization */
790 : oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
791 : const oid inc = !is_oid_nil(cur);
792 :
793 : for (BUN p = 0; p < bi.count; p++) {
794 : dst[p] = cur;
795 : cur += inc;
796 : }
797 : bn->theap->free = bi.count * sizeof(oid);
798 : bn->theap->dirty |= bi.count > 0;
799 : } else if (ATOMstorage(bi.type) == TYPE_msk) {
800 : /* convert number of bits to number of bytes,
801 : * and round the latter up to a multiple of
802 : * 4 (copy in units of 4 bytes) */
803 : bn->theap->free = ((bi.count + 31) / 32) * 4;
804 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
805 : bn->theap->dirty |= bi.count > 0;
806 : } else {
807 : /* case (4): optimized for simple array copy */
808 : bn->theap->free = bi.count << bn->tshift;
809 : memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
810 : bn->theap->dirty |= bi.count > 0;
811 : }
812 : /* copy all properties (size+other) from the source bat */
813 34462 : BATsetcount(bn, bi.count);
814 : }
815 : /* set properties (note that types may have changed in the copy) */
816 68338 : if (ATOMtype(tt) == ATOMtype(bi.type)) {
817 34443 : if (ATOMtype(tt) == TYPE_oid) {
818 9199 : BATtseqbase(bn, bi.tseq);
819 : } else {
820 25244 : BATtseqbase(bn, oid_nil);
821 : }
822 34375 : BATkey(bn, bi.key);
823 34386 : bn->tsorted = bi.sorted;
824 34386 : bn->trevsorted = bi.revsorted;
825 34386 : bn->tnorevsorted = bi.norevsorted;
826 34386 : if (bi.nokey[0] != bi.nokey[1]) {
827 2763 : bn->tnokey[0] = bi.nokey[0];
828 2763 : bn->tnokey[1] = bi.nokey[1];
829 : } else {
830 31623 : bn->tnokey[0] = bn->tnokey[1] = 0;
831 : }
832 34386 : bn->tnosorted = bi.nosorted;
833 34386 : bn->tnonil = bi.nonil;
834 34386 : bn->tnil = bi.nil;
835 34386 : bn->tminpos = bi.minpos;
836 34386 : bn->tmaxpos = bi.maxpos;
837 34386 : bn->tunique_est = bi.unique_est;
838 3 : } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
839 3 : ATOMcompare(tt) == ATOMcompare(b->ttype)) {
840 3 : BUN h = bi.count;
841 3 : bn->tsorted = bi.sorted;
842 3 : bn->trevsorted = bi.revsorted;
843 3 : BATkey(bn, bi.key);
844 3 : bn->tnonil = bi.nonil;
845 3 : bn->tnil = bi.nil;
846 3 : if (bi.nosorted > 0 && bi.nosorted < h)
847 1 : bn->tnosorted = bi.nosorted;
848 : else
849 2 : bn->tnosorted = 0;
850 3 : if (bi.norevsorted > 0 && bi.norevsorted < h)
851 3 : bn->tnorevsorted = bi.norevsorted;
852 : else
853 0 : bn->tnorevsorted = 0;
854 3 : if (bi.nokey[0] < h &&
855 3 : bi.nokey[1] < h &&
856 : bi.nokey[0] != bi.nokey[1]) {
857 0 : bn->tnokey[0] = bi.nokey[0];
858 0 : bn->tnokey[1] = bi.nokey[1];
859 : } else {
860 3 : bn->tnokey[0] = bn->tnokey[1] = 0;
861 : }
862 3 : bn->tminpos = bi.minpos;
863 3 : bn->tmaxpos = bi.maxpos;
864 3 : bn->tunique_est = bi.unique_est;
865 : } else {
866 0 : bn->tsorted = bn->trevsorted = false; /* set based on count later */
867 0 : bn->tnonil = bn->tnil = false;
868 0 : bn->tkey = false;
869 0 : bn->tnosorted = bn->tnorevsorted = 0;
870 0 : bn->tnokey[0] = bn->tnokey[1] = 0;
871 : }
872 34389 : if (BATcount(bn) <= 1) {
873 5984 : bn->tsorted = ATOMlinear(b->ttype);
874 5984 : bn->trevsorted = ATOMlinear(b->ttype);
875 5984 : bn->tkey = true;
876 : }
877 34389 : bat_iterator_end(&bi);
878 34466 : if (!writable)
879 4418 : bn->batRestricted = BAT_READ;
880 34466 : TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
881 : ALGOBATPAR(b), ALGOBATPAR(bn));
882 : return bn;
883 0 : bunins_failed:
884 0 : bat_iterator_end(&bi);
885 0 : BBPreclaim(bn);
886 : return NULL;
887 : }
888 :
889 : /* Append an array of values of length count to the bat. For
890 : * fixed-sized values, `values' is an array of values, for
891 : * variable-sized values, `values' is an array of pointers to values.
892 : * If values equals NULL, count times nil will be appended. */
893 : gdk_return
894 55124907 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
895 : {
896 55124907 : BUN p;
897 55124907 : BUN nunique = 0;
898 :
899 55124907 : BATcheck(b, GDK_FAIL);
900 :
901 55124907 : assert(!VIEWtparent(b));
902 :
903 55124907 : if (count == 0)
904 : return GDK_SUCCEED;
905 :
906 55121413 : TRC_DEBUG(ALGO, ALGOBATFMT " appending " BUNFMT " values%s\n", ALGOBATPAR(b), count, values ? "" : " (all nil)");
907 :
908 55118828 : p = BATcount(b); /* insert at end */
909 55118828 : if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
910 0 : GDKerror("bat too large\n");
911 0 : return GDK_FAIL;
912 : }
913 :
914 55118828 : ALIGNapp(b, force, GDK_FAIL);
915 : /* load hash so that we can maintain it */
916 55165439 : (void) BATcheckhash(b);
917 :
918 55156519 : if (b->ttype == TYPE_void && BATtdense(b)) {
919 0 : const oid *ovals = values;
920 0 : bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
921 0 : if (ovals) {
922 0 : for (BUN i = 1; dense && i < count; i++) {
923 0 : dense = ovals[i - 1] + 1 == ovals[i];
924 : }
925 : }
926 0 : if (dense) {
927 0 : MT_lock_set(&b->theaplock);
928 0 : assert(BATtdense(b)); /* no change (coverity) */
929 0 : if (b->batCount == 0)
930 0 : b->tseqbase = ovals ? ovals[0] : oid_nil;
931 0 : BATsetcount(b, BATcount(b) + count);
932 0 : MT_lock_unset(&b->theaplock);
933 0 : return GDK_SUCCEED;
934 : } else {
935 : /* we need to materialize b; allocate enough capacity */
936 0 : if (BATmaterialize(b, BATcount(b) + count) != GDK_SUCCEED)
937 : return GDK_FAIL;
938 : }
939 : }
940 :
941 55156519 : if (unshare_varsized_heap(b) != GDK_SUCCEED) {
942 : return GDK_FAIL;
943 : }
944 :
945 55141115 : if (BATcount(b) + count > BATcapacity(b)) {
946 : /* if needed space exceeds a normal growth extend just
947 : * with what's needed */
948 12819 : BUN ncap = BATcount(b) + count;
949 12819 : BUN grows = BATgrows(b);
950 :
951 12819 : if (ncap > grows)
952 : grows = ncap;
953 12819 : gdk_return rc = BATextend(b, grows);
954 12829 : if (rc != GDK_SUCCEED)
955 : return rc;
956 : }
957 :
958 55141125 : const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
959 55141125 : MT_lock_set(&b->theaplock);
960 55162412 : BATiter bi = bat_iterator_nolock(b);
961 55162412 : const ValRecord *prop;
962 55162412 : ValRecord minprop, maxprop;
963 55162412 : const void *minbound = NULL, *maxbound = NULL;
964 55162423 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
965 11 : VALcopy(&minprop, prop) != NULL)
966 11 : minbound = VALptr(&minprop);
967 55094094 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
968 10 : VALcopy(&maxprop, prop) != NULL)
969 10 : maxbound = VALptr(&maxprop);
970 55128891 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
971 55161128 : bool setnil = false;
972 55161128 : MT_lock_unset(&b->theaplock);
973 55192950 : MT_rwlock_wrlock(&b->thashlock);
974 55191029 : if (values && b->ttype) {
975 55184407 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
976 55184407 : const void *atomnil = ATOMnilptr(b->ttype);
977 55184407 : const void *minvalp = NULL, *maxvalp = NULL;
978 55184407 : if (b->tvheap) {
979 22213229 : if (bi.minpos != BUN_NONE)
980 19299963 : minvalp = BUNtvar(bi, bi.minpos);
981 22213229 : if (bi.maxpos != BUN_NONE)
982 19292767 : maxvalp = BUNtvar(bi, bi.maxpos);
983 22213229 : const void *vbase = b->tvheap->base;
984 44438633 : for (BUN i = 0; i < count; i++) {
985 22238487 : t = ((void **) values)[i];
986 22238487 : bool isnil = atomcmp(t, atomnil) == 0;
987 22223580 : gdk_return rc;
988 22223580 : if (notnull && isnil) {
989 0 : assert(0);
990 : GDKerror("NULL value not within bounds\n");
991 : rc = GDK_FAIL;
992 22223580 : } else if (minbound &&
993 22223580 : !isnil &&
994 0 : atomcmp(t, minbound) < 0) {
995 0 : assert(0);
996 : GDKerror("value not within bounds\n");
997 : rc = GDK_FAIL;
998 22223580 : } else if (maxbound &&
999 0 : !isnil &&
1000 0 : atomcmp(t, maxbound) >= 0) {
1001 0 : assert(0);
1002 : GDKerror("value not within bounds\n");
1003 : rc = GDK_FAIL;
1004 : } else {
1005 22223580 : rc = tfastins_nocheckVAR(b, p, t);
1006 : }
1007 22225181 : if (rc != GDK_SUCCEED) {
1008 0 : MT_rwlock_wrunlock(&b->thashlock);
1009 0 : if (minbound)
1010 0 : VALclear(&minprop);
1011 0 : if (maxbound)
1012 0 : VALclear(&maxprop);
1013 0 : return rc;
1014 : }
1015 22225181 : if (vbase != b->tvheap->base) {
1016 : /* tvheap changed location, so
1017 : * pointers may need to be
1018 : * updated (not if they were
1019 : * initialized from t below, but
1020 : * we don't know) */
1021 3446 : BUN minpos = bi.minpos;
1022 3446 : BUN maxpos = bi.maxpos;
1023 3446 : MT_lock_set(&b->theaplock);
1024 3446 : bi = bat_iterator_nolock(b);
1025 3446 : MT_lock_unset(&b->theaplock);
1026 3446 : bi.minpos = minpos;
1027 3446 : bi.maxpos = maxpos;
1028 3446 : vbase = b->tvheap->base;
1029 3446 : if (bi.minpos != BUN_NONE)
1030 2686 : minvalp = BUNtvar(bi, bi.minpos);
1031 3446 : if (bi.maxpos != BUN_NONE)
1032 2685 : maxvalp = BUNtvar(bi, bi.maxpos);
1033 : }
1034 22225181 : if (!isnil) {
1035 20529524 : if (p == 0) {
1036 212974 : bi.minpos = bi.maxpos = 0;
1037 212974 : minvalp = maxvalp = t;
1038 : } else {
1039 39565031 : if (bi.minpos != BUN_NONE &&
1040 19248768 : atomcmp(minvalp, t) > 0) {
1041 84823 : bi.minpos = p;
1042 84823 : minvalp = t;
1043 : }
1044 39565506 : if (bi.maxpos != BUN_NONE &&
1045 19248733 : atomcmp(maxvalp, t) < 0) {
1046 2251146 : bi.maxpos = p;
1047 2251146 : maxvalp = t;
1048 : }
1049 : }
1050 : } else {
1051 : setnil = true;
1052 : }
1053 22225404 : p++;
1054 : }
1055 22200146 : if (minbound)
1056 0 : VALclear(&minprop);
1057 22201712 : if (maxbound)
1058 0 : VALclear(&maxprop);
1059 22194797 : if (b->thash) {
1060 6341 : p -= count;
1061 12682 : for (BUN i = 0; i < count; i++) {
1062 6341 : t = ((void **) values)[i];
1063 6341 : HASHappend_locked(b, p, t);
1064 6341 : p++;
1065 : }
1066 6341 : nunique = b->thash ? b->thash->nunique : 0;
1067 : }
1068 32971178 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1069 7951 : bi.minpos = bi.maxpos = BUN_NONE;
1070 7951 : minvalp = maxvalp = NULL;
1071 7951 : assert(!b->tnil);
1072 15902 : for (BUN i = 0; i < count; i++) {
1073 7951 : t = (void *) ((char *) values + (i << b->tshift));
1074 7951 : mskSetVal(b, p, *(msk *) t);
1075 7951 : p++;
1076 : }
1077 : } else {
1078 32963227 : if (bi.minpos != BUN_NONE)
1079 31379510 : minvalp = BUNtloc(bi, bi.minpos);
1080 32963227 : if (bi.maxpos != BUN_NONE)
1081 31429115 : maxvalp = BUNtloc(bi, bi.maxpos);
1082 65952902 : for (BUN i = 0; i < count; i++) {
1083 32999272 : t = (void *) ((char *) values + (i << b->tshift));
1084 32999272 : gdk_return rc = tfastins_nocheckFIX(b, p, t);
1085 32999712 : if (rc != GDK_SUCCEED) {
1086 0 : MT_rwlock_wrunlock(&b->thashlock);
1087 0 : return rc;
1088 : }
1089 32999712 : if (b->thash) {
1090 486678 : HASHappend_locked(b, p, t);
1091 : }
1092 32999712 : if (atomcmp(t, atomnil) != 0) {
1093 31946144 : if (p == 0) {
1094 383327 : bi.minpos = bi.maxpos = 0;
1095 383327 : minvalp = maxvalp = t;
1096 : } else {
1097 62969185 : if (bi.minpos != BUN_NONE &&
1098 31404244 : atomcmp(minvalp, t) > 0) {
1099 41428 : bi.minpos = p;
1100 41428 : minvalp = t;
1101 : }
1102 63018562 : if (bi.maxpos != BUN_NONE &&
1103 31455508 : atomcmp(maxvalp, t) < 0) {
1104 7950816 : bi.maxpos = p;
1105 7950816 : maxvalp = t;
1106 : }
1107 : }
1108 : } else {
1109 : setnil = true;
1110 : }
1111 32989675 : p++;
1112 : }
1113 32953630 : nunique = b->thash ? b->thash->nunique : 0;
1114 : }
1115 : } else {
1116 : /* inserting nils, unless it's msk */
1117 14453 : for (BUN i = 0; i < count; i++) {
1118 7828 : gdk_return rc = tfastins_nocheck(b, p, t);
1119 7830 : if (rc != GDK_SUCCEED) {
1120 0 : MT_rwlock_wrunlock(&b->thashlock);
1121 0 : return rc;
1122 : }
1123 7830 : if (b->thash) {
1124 0 : HASHappend_locked(b, p, t);
1125 : }
1126 7831 : p++;
1127 : }
1128 6625 : nunique = b->thash ? b->thash->nunique : 0;
1129 6625 : setnil |= b->ttype != TYPE_msk;
1130 : }
1131 55163003 : MT_lock_set(&b->theaplock);
1132 55144688 : if (setnil) {
1133 2752932 : b->tnil = true;
1134 2752932 : b->tnonil = false;
1135 : }
1136 55144688 : b->tminpos = bi.minpos;
1137 55144688 : b->tmaxpos = bi.maxpos;
1138 55144688 : if (count > BATcount(b) / gdk_unique_estimate_keep_fraction)
1139 17564380 : b->tunique_est = 0;
1140 :
1141 55144688 : if (b->ttype == TYPE_oid) {
1142 : /* spend extra effort on oid (possible candidate list) */
1143 622169 : if (values == NULL || is_oid_nil(((oid *) values)[0])) {
1144 159 : b->tsorted = false;
1145 159 : b->trevsorted = false;
1146 159 : b->tkey = false;
1147 159 : b->tseqbase = oid_nil;
1148 : } else {
1149 622010 : if (b->batCount == 0) {
1150 7350 : b->tsorted = true;
1151 7350 : b->trevsorted = true;
1152 7350 : b->tkey = true;
1153 7350 : b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
1154 : } else {
1155 614660 : if (!is_oid_nil(b->tseqbase) &&
1156 358060 : (count > 1 ||
1157 358060 : b->tseqbase + b->batCount != ((oid *) values)[0]))
1158 3086 : b->tseqbase = oid_nil;
1159 614660 : if (b->tsorted && !is_oid_nil(((oid *) b->theap->base)[b->batCount - 1]) && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
1160 59 : b->tsorted = false;
1161 59 : if (b->tnosorted == 0)
1162 59 : b->tnosorted = b->batCount;
1163 : }
1164 614660 : if (b->trevsorted && !is_oid_nil(((oid *) values)[0]) && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
1165 6525 : b->trevsorted = false;
1166 6525 : if (b->tnorevsorted == 0)
1167 6525 : b->tnorevsorted = b->batCount;
1168 : }
1169 614660 : if (b->tkey) {
1170 609736 : if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
1171 17 : b->tkey = false;
1172 17 : if (b->tnokey[1] == 0) {
1173 17 : b->tnokey[0] = b->batCount - 1;
1174 17 : b->tnokey[1] = b->batCount;
1175 : }
1176 609719 : } else if (!b->tsorted && !b->trevsorted)
1177 46 : b->tkey = false;
1178 : }
1179 : }
1180 622010 : for (BUN i = 1; i < count; i++) {
1181 0 : if (is_oid_nil(((oid *) values)[i])) {
1182 0 : b->tsorted = false;
1183 0 : b->trevsorted = false;
1184 0 : b->tkey = false;
1185 0 : b->tseqbase = oid_nil;
1186 0 : break;
1187 : }
1188 0 : if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
1189 0 : b->tkey = false;
1190 0 : if (b->tnokey[1] == 0) {
1191 0 : b->tnokey[0] = b->batCount + i - 1;
1192 0 : b->tnokey[1] = b->batCount + i;
1193 : }
1194 0 : } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
1195 0 : b->tsorted = false;
1196 0 : if (b->tnosorted == 0)
1197 0 : b->tnosorted = b->batCount + i;
1198 0 : if (!b->trevsorted)
1199 0 : b->tkey = false;
1200 : } else {
1201 0 : if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
1202 0 : b->tseqbase = oid_nil;
1203 0 : b->trevsorted = false;
1204 0 : if (b->tnorevsorted == 0)
1205 0 : b->tnorevsorted = b->batCount + i;
1206 0 : if (!b->tsorted)
1207 0 : b->tkey = false;
1208 : }
1209 : }
1210 : }
1211 54522519 : } else if (!ATOMlinear(b->ttype)) {
1212 7951 : b->tsorted = b->trevsorted = b->tkey = false;
1213 54514568 : } else if (b->batCount == 0) {
1214 602566 : if (values == NULL) {
1215 0 : b->tsorted = b->trevsorted = true;
1216 0 : b->tkey = count == 1;
1217 0 : b->tunique_est = 1;
1218 : } else {
1219 602566 : int c;
1220 602566 : switch (count) {
1221 602226 : case 1:
1222 602226 : b->tsorted = b->trevsorted = b->tkey = true;
1223 602226 : b->tunique_est = 1;
1224 602226 : break;
1225 123 : case 2:
1226 123 : if (b->tvheap)
1227 41 : c = ATOMcmp(b->ttype,
1228 : ((void **) values)[0],
1229 : ((void **) values)[1]);
1230 : else
1231 82 : c = ATOMcmp(b->ttype,
1232 : values,
1233 : (char *) values + b->twidth);
1234 123 : b->tsorted = c <= 0;
1235 123 : b->tnosorted = !b->tsorted;
1236 123 : b->trevsorted = c >= 0;
1237 123 : b->tnorevsorted = !b->trevsorted;
1238 123 : b->tkey = c != 0;
1239 123 : b->tnokey[0] = 0;
1240 123 : b->tnokey[1] = !b->tkey;
1241 123 : b->tunique_est = (double) (1 + b->tkey);
1242 123 : break;
1243 217 : default:
1244 217 : b->tsorted = b->trevsorted = b->tkey = false;
1245 217 : break;
1246 : }
1247 : }
1248 53912002 : } else if (b->batCount == 1 && count == 1) {
1249 434381 : bi = bat_iterator_nolock(b);
1250 434381 : t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
1251 434381 : if (values != NULL) {
1252 434613 : if (b->tvheap)
1253 164448 : t = ((void **) values)[0];
1254 : else
1255 : t = values;
1256 : }
1257 434381 : int c = ATOMcmp(b->ttype, BUNtail(bi, 0), t);
1258 430733 : b->tsorted = c <= 0;
1259 430733 : b->tnosorted = !b->tsorted;
1260 430733 : b->trevsorted = c >= 0;
1261 430733 : b->tnorevsorted = !b->trevsorted;
1262 430733 : b->tkey = c != 0;
1263 430733 : b->tnokey[0] = 0;
1264 430733 : b->tnokey[1] = !b->tkey;
1265 430733 : b->tunique_est = (double) (1 + b->tkey);
1266 : } else {
1267 53477621 : b->tsorted = b->trevsorted = b->tkey = false;
1268 : }
1269 55141040 : BATsetcount(b, p);
1270 55148062 : if (nunique != 0)
1271 493019 : b->tunique_est = (double) nunique;
1272 55148062 : MT_lock_unset(&b->theaplock);
1273 55192392 : MT_rwlock_wrunlock(&b->thashlock);
1274 :
1275 55185130 : OIDXdestroy(b);
1276 55185955 : STRMPdestroy(b); /* TODO: use STRMPappendBitstring */
1277 55181143 : RTREEdestroy(b);
1278 55181143 : return GDK_SUCCEED;
1279 : }
1280 :
1281 : /* Append a single value to the bat. */
1282 : gdk_return
1283 39665917 : BUNappend(BAT *b, const void *t, bool force)
1284 : {
1285 39665917 : return BUNappendmulti(b, b->ttype && b->tvheap ? (const void *) &t : (const void *) t, 1, force);
1286 : }
1287 :
1288 : gdk_return
1289 4 : BUNdelete(BAT *b, oid o)
1290 : {
1291 4 : BATiter bi = bat_iterator(b);
1292 :
1293 4 : if (bi.count == 0) {
1294 0 : bat_iterator_end(&bi);
1295 0 : GDKerror("cannot delete from empty bat\n");
1296 0 : return GDK_FAIL;
1297 : }
1298 4 : if (is_oid_nil(b->hseqbase)) {
1299 0 : bat_iterator_end(&bi);
1300 0 : GDKerror("cannot delete from bat with VOID hseqbase\n");
1301 0 : return GDK_FAIL;
1302 : }
1303 :
1304 4 : BUN p = o - b->hseqbase;
1305 :
1306 4 : if (bi.count - 1 != p) {
1307 2 : bat_iterator_end(&bi);
1308 2 : GDKerror("cannot delete anything other than last value\n");
1309 2 : return GDK_FAIL;
1310 : }
1311 2 : if (b->batInserted >= bi.count) {
1312 0 : bat_iterator_end(&bi);
1313 0 : GDKerror("cannot delete committed value\n");
1314 0 : return GDK_FAIL;
1315 : }
1316 :
1317 2 : TRC_DEBUG(ALGO, ALGOBATFMT " deleting oid " OIDFMT "\n", ALGOBATPAR(b), o);
1318 : /* load hash so that we can maintain it */
1319 2 : (void) BATcheckhash(b);
1320 :
1321 2 : BUN nunique = HASHdelete(&bi, p, BUNtail(bi, p));
1322 2 : ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
1323 2 : bat_iterator_end(&bi);
1324 :
1325 2 : MT_lock_set(&b->theaplock);
1326 2 : if (b->tmaxpos == p)
1327 0 : b->tmaxpos = BUN_NONE;
1328 2 : if (b->tminpos == p)
1329 0 : b->tminpos = BUN_NONE;
1330 2 : if (b->tnosorted >= p)
1331 0 : b->tnosorted = 0;
1332 2 : if (b->tnorevsorted >= p)
1333 0 : b->tnorevsorted = 0;
1334 2 : b->batCount--;
1335 2 : if (nunique != 0)
1336 0 : b->tunique_est = (double) nunique;
1337 2 : else if (BATcount(b) < gdk_unique_estimate_keep_fraction)
1338 2 : b->tunique_est = 0;
1339 2 : if (b->batCount <= 1) {
1340 : /* some trivial properties */
1341 0 : b->tkey = true;
1342 0 : b->tsorted = b->trevsorted = true;
1343 0 : b->tnosorted = b->tnorevsorted = 0;
1344 0 : if (b->batCount == 0) {
1345 0 : b->tnil = false;
1346 0 : b->tnonil = true;
1347 : }
1348 : }
1349 2 : MT_lock_unset(&b->theaplock);
1350 2 : OIDXdestroy(b);
1351 2 : STRMPdestroy(b);
1352 2 : RTREEdestroy(b);
1353 2 : PROPdestroy(b);
1354 2 : return GDK_SUCCEED;
1355 : }
1356 :
1357 : /* @- BUN replace
1358 : * The last operation in this context is BUN replace. It assumes that
1359 : * the header denotes a key. The old value association is destroyed
1360 : * (if it exists in the first place) and the new value takes its
1361 : * place.
1362 : *
1363 : * In order to make updates on void columns workable; replaces on them
1364 : * are always done in-place. Performing them without bun-movements
1365 : * greatly simplifies the problem. The 'downside' is that when
1366 : * transaction management has to be performed, replaced values should
1367 : * be saved explicitly.
1368 : */
1369 : static gdk_return
1370 1123523 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
1371 : {
1372 1123523 : BUN prv, nxt;
1373 1123523 : const void *val;
1374 1123523 : int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
1375 1123523 : const void *atomnil = ATOMnilptr(b->ttype);
1376 :
1377 1123523 : MT_lock_set(&b->theaplock);
1378 1134487 : BUN last = BATcount(b) - 1;
1379 1134487 : BATiter bi = bat_iterator_nolock(b);
1380 : /* zap alignment info */
1381 1134487 : if (!force && (b->batRestricted != BAT_WRITE ||
1382 573503 : ((ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1))) {
1383 0 : MT_lock_unset(&b->theaplock);
1384 0 : GDKerror("access denied to %s, aborting.\n",
1385 : BATgetId(b));
1386 0 : assert(0);
1387 : return GDK_FAIL;
1388 : }
1389 1134487 : TRC_DEBUG(ALGO, ALGOBATFMT " replacing " BUNFMT " values\n", ALGOBATPAR(b), count);
1390 1124386 : if (b->ttype == TYPE_void) {
1391 0 : PROPdestroy(b);
1392 0 : b->tminpos = BUN_NONE;
1393 0 : b->tmaxpos = BUN_NONE;
1394 0 : b->tunique_est = 0.0;
1395 1124386 : } else if (count > BATcount(b) / gdk_unique_estimate_keep_fraction) {
1396 546430 : b->tunique_est = 0;
1397 : }
1398 1124386 : const ValRecord *prop;
1399 1124386 : ValRecord minprop, maxprop;
1400 1124386 : const void *minbound = NULL, *maxbound = NULL;
1401 1124386 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL &&
1402 0 : VALcopy(&minprop, prop) != NULL)
1403 0 : minbound = VALptr(&minprop);
1404 1123182 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL &&
1405 0 : VALcopy(&maxprop, prop) != NULL)
1406 0 : maxbound = VALptr(&maxprop);
1407 1128937 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
1408 1129122 : MT_lock_unset(&b->theaplock);
1409 : /* load hash so that we can maintain it */
1410 1136334 : (void) BATcheckhash(b);
1411 1134746 : MT_rwlock_wrlock(&b->thashlock);
1412 2272191 : for (BUN i = 0; i < count; i++) {
1413 1135921 : BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
1414 1137074 : const void *t = b->ttype && b->tvheap ?
1415 1292050 : ((const void **) values)[i] :
1416 979792 : (const void *) ((const char *) values + (i << b->tshift));
1417 1135921 : bool isnil = atomnil && atomcmp(t, atomnil) == 0;
1418 1133033 : if (notnull && isnil) {
1419 0 : assert(0);
1420 : GDKerror("NULL value not within bounds\n");
1421 : MT_rwlock_wrunlock(&b->thashlock);
1422 : goto bailout;
1423 1133033 : } else if (!isnil &&
1424 843 : ((minbound &&
1425 1084532 : atomcmp(t, minbound) < 0) ||
1426 0 : (maxbound &&
1427 0 : atomcmp(t, maxbound) >= 0))) {
1428 0 : assert(0);
1429 : GDKerror("value not within bounds\n");
1430 : MT_rwlock_wrunlock(&b->thashlock);
1431 : goto bailout;
1432 : }
1433 :
1434 : /* retrieve old value, but if this comes from the
1435 : * logger, we need to deal with offsets that point
1436 : * outside of the valid vheap */
1437 1133876 : if (b->ttype == TYPE_void) {
1438 0 : val = BUNtpos(bi, p);
1439 1133876 : } else if (bi.type == TYPE_msk) {
1440 198 : val = BUNtmsk(bi, p);
1441 1133678 : } else if (b->tvheap) {
1442 155221 : size_t off = BUNtvaroff(bi, p);
1443 155221 : if (off < bi.vhfree)
1444 155857 : val = bi.vh->base + off;
1445 : else
1446 : val = NULL; /* bad offset */
1447 : } else {
1448 978457 : val = BUNtloc(bi, p);
1449 : }
1450 :
1451 1134512 : if (val) {
1452 1134512 : if (atomcmp(val, t) == 0)
1453 273308 : continue; /* nothing to do */
1454 861996 : if (!isnil &&
1455 164221 : b->tnil &&
1456 164219 : atomcmp(val, atomnil) == 0) {
1457 : /* if old value is nil and new value
1458 : * isn't, we're not sure anymore about
1459 : * the nil property, so we must clear
1460 : * it */
1461 164042 : MT_lock_set(&b->theaplock);
1462 164040 : b->tnil = false;
1463 164040 : MT_lock_unset(&b->theaplock);
1464 : }
1465 862000 : if (b->ttype != TYPE_void) {
1466 861624 : if (bi.maxpos != BUN_NONE) {
1467 540411 : if (!isnil && atomcmp(BUNtail(bi, bi.maxpos), t) < 0) {
1468 : /* new value is larger
1469 : * than previous
1470 : * largest */
1471 57766 : bi.maxpos = p;
1472 482645 : } else if (bi.maxpos == p && atomcmp(BUNtail(bi, bi.maxpos), t) != 0) {
1473 : /* old value is equal to
1474 : * largest and new value
1475 : * is smaller or nil (see
1476 : * above), so we don't
1477 : * know anymore which is
1478 : * the largest */
1479 506 : bi.maxpos = BUN_NONE;
1480 : }
1481 : }
1482 861624 : if (bi.minpos != BUN_NONE) {
1483 371902 : if (!isnil && atomcmp(BUNtail(bi, bi.minpos), t) > 0) {
1484 : /* new value is smaller
1485 : * than previous
1486 : * smallest */
1487 430 : bi.minpos = p;
1488 371470 : } else if (bi.minpos == p && atomcmp(BUNtail(bi, bi.minpos), t) != 0) {
1489 : /* old value is equal to
1490 : * smallest and new value
1491 : * is larger or nil (see
1492 : * above), so we don't
1493 : * know anymore which is
1494 : * the largest */
1495 634 : bi.minpos = BUN_NONE;
1496 : }
1497 : }
1498 : }
1499 861998 : HASHdelete_locked(&bi, p, val); /* first delete old value from hash */
1500 : } else {
1501 : /* out of range old value, so the properties and
1502 : * hash cannot be trusted */
1503 0 : PROPdestroy(b);
1504 0 : Hash *hs = b->thash;
1505 0 : if (hs) {
1506 0 : b->thash = NULL;
1507 0 : doHASHdestroy(b, hs);
1508 : }
1509 0 : MT_lock_set(&b->theaplock);
1510 0 : bi.minpos = BUN_NONE;
1511 0 : bi.maxpos = BUN_NONE;
1512 0 : b->tunique_est = 0.0;
1513 0 : MT_lock_unset(&b->theaplock);
1514 : }
1515 860058 : OIDXdestroy(b);
1516 862344 : STRMPdestroy(b);
1517 862779 : RTREEdestroy(b);
1518 :
1519 862814 : if (b->tvheap && b->ttype) {
1520 75463 : var_t _d;
1521 75463 : ptr _ptr;
1522 75463 : _ptr = BUNtloc(bi, p);
1523 75463 : switch (b->twidth) {
1524 8833 : case 1:
1525 8833 : _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
1526 8833 : break;
1527 60313 : case 2:
1528 60313 : _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
1529 60313 : break;
1530 6317 : case 4:
1531 6317 : _d = (var_t) * (uint32_t *) _ptr;
1532 6317 : break;
1533 : #if SIZEOF_VAR_T == 8
1534 0 : case 8:
1535 0 : _d = (var_t) * (uint64_t *) _ptr;
1536 0 : break;
1537 : #endif
1538 : default:
1539 0 : MT_UNREACHABLE();
1540 : }
1541 75463 : MT_lock_set(&b->theaplock);
1542 75496 : if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
1543 0 : MT_lock_unset(&b->theaplock);
1544 0 : MT_rwlock_wrunlock(&b->thashlock);
1545 0 : goto bailout;
1546 : }
1547 75556 : MT_lock_unset(&b->theaplock);
1548 75609 : if (b->twidth < SIZEOF_VAR_T &&
1549 75739 : (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
1550 : /* doesn't fit in current heap, upgrade it */
1551 80 : if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
1552 0 : MT_rwlock_wrunlock(&b->thashlock);
1553 0 : goto bailout;
1554 : }
1555 : }
1556 : /* reinitialize iterator after possible heap upgrade */
1557 : {
1558 : /* save and restore minpos/maxpos */
1559 75609 : BUN minpos = bi.minpos;
1560 75609 : BUN maxpos = bi.maxpos;
1561 75609 : bi = bat_iterator_nolock(b);
1562 75609 : bi.minpos = minpos;
1563 75609 : bi.maxpos = maxpos;
1564 : }
1565 75609 : _ptr = BUNtloc(bi, p);
1566 75609 : switch (b->twidth) {
1567 8762 : case 1:
1568 8762 : * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
1569 8762 : break;
1570 60521 : case 2:
1571 60521 : * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
1572 60521 : break;
1573 6326 : case 4:
1574 6326 : * (uint32_t *) _ptr = (uint32_t) _d;
1575 6326 : break;
1576 : #if SIZEOF_VAR_T == 8
1577 0 : case 8:
1578 0 : * (uint64_t *) _ptr = (uint64_t) _d;
1579 0 : break;
1580 : #endif
1581 : default:
1582 75609 : MT_UNREACHABLE();
1583 : }
1584 787351 : } else if (ATOMstorage(b->ttype) == TYPE_msk) {
1585 182 : mskSetVal(b, p, * (msk *) t);
1586 : } else {
1587 787169 : assert(BATatoms[b->ttype].atomPut == NULL);
1588 787169 : switch (ATOMsize(b->ttype)) {
1589 : case 0: /* void */
1590 : break;
1591 15045 : case 1:
1592 15045 : ((bte *) b->theap->base)[p] = * (bte *) t;
1593 15045 : break;
1594 7565 : case 2:
1595 7565 : ((sht *) b->theap->base)[p] = * (sht *) t;
1596 7565 : break;
1597 190393 : case 4:
1598 190393 : ((int *) b->theap->base)[p] = * (int *) t;
1599 190393 : break;
1600 574166 : case 8:
1601 574166 : ((lng *) b->theap->base)[p] = * (lng *) t;
1602 574166 : break;
1603 0 : case 16:
1604 : #ifdef HAVE_HGE
1605 0 : ((hge *) b->theap->base)[p] = * (hge *) t;
1606 : #else
1607 : ((uuid *) b->theap->base)[p] = * (uuid *) t;
1608 : #endif
1609 0 : break;
1610 0 : default:
1611 0 : memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
1612 0 : break;
1613 : }
1614 : }
1615 :
1616 862960 : HASHinsert_locked(&bi, p, t); /* insert new value into hash */
1617 :
1618 860350 : prv = p > 0 ? p - 1 : BUN_NONE;
1619 860350 : nxt = p < last ? p + 1 : BUN_NONE;
1620 :
1621 860350 : MT_lock_set(&b->theaplock);
1622 861894 : if (b->tsorted) {
1623 3301 : if (prv != BUN_NONE &&
1624 1339 : atomcmp(t, BUNtail(bi, prv)) < 0) {
1625 56 : b->tsorted = false;
1626 56 : b->tnosorted = p;
1627 2681 : } else if (nxt != BUN_NONE &&
1628 775 : atomcmp(t, BUNtail(bi, nxt)) > 0) {
1629 695 : b->tsorted = false;
1630 695 : b->tnosorted = nxt;
1631 1211 : } else if (b->ttype != TYPE_void && BATtdense(b)) {
1632 0 : if (prv != BUN_NONE &&
1633 0 : 1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
1634 0 : b->tseqbase = oid_nil;
1635 0 : } else if (nxt != BUN_NONE &&
1636 0 : * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
1637 0 : b->tseqbase = oid_nil;
1638 0 : } else if (prv == BUN_NONE &&
1639 0 : nxt == BUN_NONE) {
1640 0 : b->tseqbase = * (oid *) t;
1641 : }
1642 : }
1643 859932 : } else if (b->tnosorted >= p)
1644 3450 : b->tnosorted = 0;
1645 861894 : if (b->trevsorted) {
1646 1554 : if (prv != BUN_NONE &&
1647 492 : atomcmp(t, BUNtail(bi, prv)) > 0) {
1648 63 : b->trevsorted = false;
1649 63 : b->tnorevsorted = p;
1650 1184 : } else if (nxt != BUN_NONE &&
1651 185 : atomcmp(t, BUNtail(bi, nxt)) < 0) {
1652 48 : b->trevsorted = false;
1653 48 : b->tnorevsorted = nxt;
1654 : }
1655 860832 : } else if (b->tnorevsorted >= p)
1656 1757 : b->tnorevsorted = 0;
1657 861894 : if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
1658 977 : BATkey(b, false);
1659 860917 : } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
1660 982 : b->tnokey[0] = b->tnokey[1] = 0;
1661 861894 : if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
1662 629225 : b->tnonil = t && atomcmp(t, atomnil) != 0;
1663 1135229 : MT_lock_unset(&b->theaplock);
1664 : }
1665 1134589 : BUN nunique = b->thash ? b->thash->nunique : 0;
1666 1134589 : MT_rwlock_wrunlock(&b->thashlock);
1667 1135993 : MT_lock_set(&b->theaplock);
1668 1131761 : if (nunique != 0)
1669 27091 : b->tunique_est = (double) nunique;
1670 1131761 : b->tminpos = bi.minpos;
1671 1131761 : b->tmaxpos = bi.maxpos;
1672 1131761 : b->theap->dirty = true;
1673 1131761 : if (b->tvheap)
1674 154758 : b->tvheap->dirty = true;
1675 1131761 : MT_lock_unset(&b->theaplock);
1676 :
1677 1132888 : return GDK_SUCCEED;
1678 :
1679 0 : bailout:
1680 0 : if (minbound)
1681 0 : VALclear(&minprop);
1682 0 : if (maxbound)
1683 0 : VALclear(&maxprop);
1684 : return GDK_FAIL;
1685 : }
1686 :
1687 : /* Replace multiple values given by their positions with the given values. */
1688 : gdk_return
1689 577182 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
1690 : {
1691 577182 : BATcheck(b, GDK_FAIL);
1692 :
1693 577182 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1694 : return GDK_FAIL;
1695 :
1696 577182 : return BUNinplacemulti(b, positions, values, count, force, false);
1697 : }
1698 :
1699 : /* Replace multiple values starting from a given position with the given
1700 : * values. */
1701 : gdk_return
1702 547996 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
1703 : {
1704 547996 : BATcheck(b, GDK_FAIL);
1705 :
1706 547996 : if (b->ttype == TYPE_void && BATmaterialize(b, BUN_NONE) != GDK_SUCCEED)
1707 : return GDK_FAIL;
1708 :
1709 547996 : return BUNinplacemulti(b, &position, values, count, force, true);
1710 : }
1711 :
1712 : gdk_return
1713 577182 : BUNreplace(BAT *b, oid id, const void *t, bool force)
1714 : {
1715 577182 : return BUNreplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &t : t, 1, force);
1716 : }
1717 :
1718 : /* very much like BUNreplace, but this doesn't make any changes if the
1719 : * tail column is void */
1720 : gdk_return
1721 4503 : void_inplace(BAT *b, oid id, const void *val, bool force)
1722 : {
1723 4503 : assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
1724 4503 : if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
1725 : GDKerror("id out of range\n");
1726 : return GDK_FAIL;
1727 : }
1728 4503 : if (b->ttype == TYPE_void)
1729 : return GDK_SUCCEED;
1730 4503 : return BUNinplacemulti(b, &id, b->ttype && b->tvheap ? (const void *) &val : (const void *) val, 1, force, false);
1731 : }
1732 :
1733 : /*
1734 : * @- BUN Lookup
1735 : * Location of a BUN using a value should use the available indexes to
1736 : * speed up access. If indexes are lacking then a hash index is
1737 : * constructed under the assumption that 1) multiple access to the BAT
1738 : * can be expected and 2) building the hash is only slightly more
1739 : * expensive than the full linear scan. BUN_NONE is returned if no
1740 : * such element could be found. In those cases where the type is
1741 : * known and a hash index is available, one should use the inline
1742 : * functions to speed-up processing.
1743 : */
1744 : static BUN
1745 0 : slowfnd(BAT *b, const void *v)
1746 : {
1747 0 : BATiter bi = bat_iterator(b);
1748 0 : BUN p, q;
1749 0 : int (*cmp)(const void *, const void *) = ATOMcompare(bi.type);
1750 :
1751 0 : BATloop(b, p, q) {
1752 0 : if ((*cmp)(v, BUNtail(bi, p)) == 0) {
1753 0 : bat_iterator_end(&bi);
1754 0 : return p;
1755 : }
1756 : }
1757 0 : bat_iterator_end(&bi);
1758 0 : return BUN_NONE;
1759 : }
1760 :
1761 : static BUN
1762 0 : mskfnd(BAT *b, msk v)
1763 : {
1764 0 : BUN p, q;
1765 :
1766 0 : if (v) {
1767 : /* find a 1 value */
1768 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1769 0 : if (((uint32_t *) b->theap->base)[p] != 0) {
1770 : /* there's at least one 1 bit */
1771 0 : return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
1772 : }
1773 : }
1774 : } else {
1775 : /* find a 0 value */
1776 0 : for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
1777 0 : if (((uint32_t *) b->theap->base)[p] != ~0U) {
1778 : /* there's at least one 0 bit */
1779 0 : return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
1780 : }
1781 : }
1782 : }
1783 : return BUN_NONE;
1784 : }
1785 :
1786 : BUN
1787 1789215 : BUNfnd(BAT *b, const void *v)
1788 : {
1789 1789215 : BUN r = BUN_NONE;
1790 1789215 : BATiter bi;
1791 :
1792 1789215 : BATcheck(b, BUN_NONE);
1793 1789215 : if (!v || BATcount(b) == 0)
1794 : return r;
1795 1363858 : if (complex_cand(b)) {
1796 0 : struct canditer ci;
1797 0 : canditer_init(&ci, NULL, b);
1798 0 : return canditer_search(&ci, * (const oid *) v, false);
1799 : }
1800 1363858 : if (BATtvoid(b))
1801 741403 : return BUNfndVOID(b, v);
1802 622455 : if (ATOMstorage(b->ttype) == TYPE_msk) {
1803 0 : return mskfnd(b, *(msk*)v);
1804 : }
1805 622455 : if (!BATcheckhash(b)) {
1806 137172 : if (BATordered(b) || BATordered_rev(b))
1807 136808 : return SORTfnd(b, v);
1808 : }
1809 485642 : if (BAThash(b) == GDK_SUCCEED) {
1810 485637 : bi = bat_iterator(b); /* outside of hashlock */
1811 485650 : MT_rwlock_rdlock(&b->thashlock);
1812 485650 : if (b->thash == NULL) {
1813 0 : MT_rwlock_rdunlock(&b->thashlock);
1814 0 : bat_iterator_end(&bi);
1815 0 : goto hashfnd_failed;
1816 : }
1817 971155 : switch (ATOMbasetype(bi.type)) {
1818 5 : case TYPE_bte:
1819 5 : HASHloop_bte(bi, b->thash, r, v)
1820 : break;
1821 : break;
1822 0 : case TYPE_sht:
1823 0 : HASHloop_sht(bi, b->thash, r, v)
1824 : break;
1825 : break;
1826 32 : case TYPE_int:
1827 32 : HASHloop_int(bi, b->thash, r, v)
1828 : break;
1829 : break;
1830 0 : case TYPE_flt:
1831 0 : HASHloop_flt(bi, b->thash, r, v)
1832 : break;
1833 : break;
1834 0 : case TYPE_dbl:
1835 0 : HASHloop_dbl(bi, b->thash, r, v)
1836 : break;
1837 : break;
1838 128 : case TYPE_lng:
1839 128 : HASHloop_lng(bi, b->thash, r, v)
1840 : break;
1841 : break;
1842 : #ifdef HAVE_HGE
1843 0 : case TYPE_hge:
1844 0 : HASHloop_hge(bi, b->thash, r, v)
1845 : break;
1846 : break;
1847 : #endif
1848 0 : case TYPE_uuid:
1849 0 : HASHloop_uuid(bi, b->thash, r, v)
1850 : break;
1851 : break;
1852 485484 : case TYPE_str:
1853 672676 : HASHloop_str(bi, b->thash, r, v)
1854 : break;
1855 : break;
1856 1 : default:
1857 3 : HASHloop(bi, b->thash, r, v)
1858 : break;
1859 : break;
1860 : }
1861 485650 : MT_rwlock_rdunlock(&b->thashlock);
1862 485650 : bat_iterator_end(&bi);
1863 485650 : return r;
1864 : }
1865 0 : hashfnd_failed:
1866 : /* can't build hash table, search the slow way */
1867 0 : GDKclrerr();
1868 0 : return slowfnd(b, v);
1869 : }
1870 :
1871 : /*
1872 : * @+ BAT Property Management
1873 : *
1874 : * The function BATcount returns the number of active elements in a
1875 : * BAT. Counting is type independent. It can be implemented quickly,
1876 : * because the system ensures a dense BUN list.
1877 : */
1878 : void
1879 3054124 : BATsetcapacity(BAT *b, BUN cnt)
1880 : {
1881 3054124 : b->batCapacity = cnt;
1882 3054124 : assert(b->batCount <= cnt);
1883 3054124 : }
1884 :
1885 : /* Set the batCount value for the bat and also set some dependent
1886 : * properties. This function should be called only when it is save from
1887 : * concurrent use (e.g. when theaplock is being held). */
1888 : void
1889 68748785 : BATsetcount(BAT *b, BUN cnt)
1890 : {
1891 : /* head column is always VOID, and some head properties never change */
1892 68748785 : assert(!is_oid_nil(b->hseqbase));
1893 68748785 : assert(cnt <= BUN_MAX);
1894 :
1895 68748785 : b->batCount = cnt;
1896 68748785 : if (b->theap->parentid == b->batCacheid) {
1897 65677148 : b->theap->dirty |= b->ttype != TYPE_void && cnt > 0;
1898 65677148 : b->theap->free = tailsize(b, cnt);
1899 : }
1900 68748785 : if (b->ttype == TYPE_void)
1901 6143391 : b->batCapacity = cnt;
1902 68748785 : if (cnt <= 1) {
1903 5681354 : b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
1904 5681354 : b->tnosorted = b->tnorevsorted = 0;
1905 : }
1906 : /* if the BAT was made smaller, we need to zap some values */
1907 68748785 : if (b->tnosorted >= BATcount(b))
1908 3836638 : b->tnosorted = 0;
1909 68748785 : if (b->tnorevsorted >= BATcount(b))
1910 3620454 : b->tnorevsorted = 0;
1911 68748785 : if (b->tnokey[0] >= BATcount(b) || b->tnokey[1] >= BATcount(b)) {
1912 3639709 : b->tnokey[0] = 0;
1913 3639709 : b->tnokey[1] = 0;
1914 : }
1915 68748785 : if (b->ttype == TYPE_void) {
1916 6144748 : b->tsorted = true;
1917 6144748 : if (is_oid_nil(b->tseqbase)) {
1918 135206 : b->tkey = cnt <= 1;
1919 135206 : b->trevsorted = true;
1920 135206 : b->tnil = true;
1921 135206 : b->tnonil = false;
1922 : } else {
1923 6009542 : b->tkey = true;
1924 6009542 : b->trevsorted = cnt <= 1;
1925 6009542 : b->tnil = false;
1926 6009542 : b->tnonil = true;
1927 : }
1928 : }
1929 68748785 : assert(b->batCapacity >= cnt);
1930 68748785 : }
1931 :
1932 : /*
1933 : * The key and name properties can be changed at any time. Keyed
1934 : * dimensions are automatically supported by an auxiliary hash-based
1935 : * access structure to speed up searching. Turning off the key
1936 : * integrity property does not cause the index to disappear. It can
1937 : * still be used to speed-up retrieval. The routine BATkey sets the
1938 : * key property of the association head.
1939 : */
1940 : gdk_return
1941 56282 : BATkey(BAT *b, bool flag)
1942 : {
1943 56282 : BATcheck(b, GDK_FAIL);
1944 56282 : if (b->ttype == TYPE_void) {
1945 1167 : if (BATtdense(b) && !flag) {
1946 0 : GDKerror("dense column must be unique.\n");
1947 0 : return GDK_FAIL;
1948 : }
1949 1167 : if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
1950 0 : GDKerror("void column cannot be unique.\n");
1951 0 : return GDK_FAIL;
1952 : }
1953 : }
1954 56282 : b->tkey = flag;
1955 56282 : if (!flag) {
1956 40258 : b->tseqbase = oid_nil;
1957 : } else
1958 16024 : b->tnokey[0] = b->tnokey[1] = 0;
1959 56282 : gdk_return rc = GDK_SUCCEED;
1960 56282 : if (flag && VIEWtparent(b)) {
1961 : /* if a view is key, then so is the parent if the two
1962 : * are aligned */
1963 6 : BAT *bp = BATdescriptor(VIEWtparent(b));
1964 6 : if (bp != NULL) {
1965 6 : MT_lock_set(&bp->theaplock);
1966 12 : if (BATcount(b) == BATcount(bp) &&
1967 6 : ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
1968 6 : !BATtkey(bp) &&
1969 0 : ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
1970 : BATcount(b) == 0))
1971 0 : rc = BATkey(bp, true);
1972 6 : MT_lock_unset(&bp->theaplock);
1973 6 : BBPunfix(bp->batCacheid);
1974 : }
1975 : }
1976 : return rc;
1977 : }
1978 :
1979 : void
1980 2938267 : BAThseqbase(BAT *b, oid o)
1981 : {
1982 2938267 : if (b != NULL) {
1983 2938267 : assert(o <= GDK_oid_max); /* i.e., not oid_nil */
1984 2938267 : assert(o + BATcount(b) <= GDK_oid_max);
1985 2938267 : b->hseqbase = o;
1986 : }
1987 2938267 : }
1988 :
1989 : void
1990 7369729 : BATtseqbase(BAT *b, oid o)
1991 : {
1992 7369729 : assert(o <= oid_nil);
1993 7369729 : if (b == NULL)
1994 : return;
1995 7369729 : assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
1996 7369729 : if (ATOMtype(b->ttype) == TYPE_oid) {
1997 6248334 : b->tseqbase = o;
1998 :
1999 : /* adapt keyness */
2000 6248334 : if (BATtvoid(b)) {
2001 6219647 : b->tsorted = true;
2002 6219647 : if (is_oid_nil(o)) {
2003 115 : b->tkey = b->batCount <= 1;
2004 115 : b->tnonil = b->batCount == 0;
2005 115 : b->tnil = b->batCount > 0;
2006 115 : b->trevsorted = true;
2007 115 : b->tnosorted = b->tnorevsorted = 0;
2008 115 : if (!b->tkey) {
2009 0 : b->tnokey[0] = 0;
2010 0 : b->tnokey[1] = 1;
2011 : } else {
2012 115 : b->tnokey[0] = b->tnokey[1] = 0;
2013 : }
2014 : } else {
2015 6219532 : if (!b->tkey) {
2016 19713 : b->tkey = true;
2017 19713 : b->tnokey[0] = b->tnokey[1] = 0;
2018 : }
2019 6219532 : b->tnonil = true;
2020 6219532 : b->tnil = false;
2021 6219532 : b->trevsorted = b->batCount <= 1;
2022 6219532 : if (!b->trevsorted)
2023 20225 : b->tnorevsorted = 1;
2024 : }
2025 : }
2026 : } else {
2027 1121395 : assert(o == oid_nil);
2028 1121395 : b->tseqbase = oid_nil;
2029 : }
2030 : }
2031 :
2032 : /*
2033 : * @- Change the BAT access permissions (read, append, write)
2034 : * Regrettably, BAT access-permissions, persistent status and memory
2035 : * map modes, interact in ways that makes one's brain sizzle. This
2036 : * makes BATsetaccess and TMcommit (where a change in BAT persistence
2037 : * mode is made permanent) points in which the memory map status of
2038 : * bats needs to be carefully re-assessed and ensured.
2039 : *
2040 : * Another complication is the fact that during commit, concurrent
2041 : * users may access the heaps, such that the simple solution
2042 : * unmap;re-map is out of the question.
2043 : * Even worse, it is not possible to even rename an open mmap file in
2044 : * Windows. For this purpose, we dropped the old .priv scheme, which
2045 : * relied on file moves. Now, the file that is opened with mmap is
2046 : * always the X file, in case of newstorage=STORE_PRIV, we save in a
2047 : * new file X.new
2048 : *
2049 : * we must consider the following dimensions:
2050 : *
2051 : * persistence:
2052 : * not simply the current persistence mode but whether the bat *was*
2053 : * present at the last commit point (BBP status & BBPEXISTING).
2054 : * The crucial issue is namely whether we must guarantee recovery
2055 : * to a previous sane state.
2056 : *
2057 : * access:
2058 : * whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
2059 : * is usually the same as BAT_READ (as our concern are only data pages
2060 : * that already existed at the last commit).
2061 : *
2062 : * storage:
2063 : * the current way the heap file X is memory-mapped;
2064 : * STORE_MMAP uses direct mapping (so dirty pages may be flushed
2065 : * at any time to disk), STORE_PRIV uses copy-on-write.
2066 : *
2067 : * newstorage:
2068 : * the current save-regime. STORE_MMAP calls msync() on the heap X,
2069 : * whereas STORE_PRIV writes the *entire* heap in a file: X.new
2070 : * If a BAT is loaded from disk, the field newstorage is used
2071 : * to set storage as well (so before change-access and commit-
2072 : * persistence mayhem, we always have newstorage=storage).
2073 : *
2074 : * change-access:
2075 : * what happens if the bat-access mode is changed from
2076 : * BAT_READ into BAT_WRITE (or vice versa).
2077 : *
2078 : * commit-persistence:
2079 : * what happens during commit if the bat-persistence mode was
2080 : * changed (from TRANSIENT into PERSISTENT, or vice versa).
2081 : *
2082 : * this is the scheme:
2083 : *
2084 : * persistence access newstorage storage change-access commit-persistence
2085 : * =========== ========= ========== ========== ============= ==================
2086 : * 0 transient BAT_READ STORE_MMAP STORE_MMAP =>2 =>4
2087 : * 1 transient BAT_READ STORE_PRIV STORE_PRIV =>3 =>5
2088 : * 2 transient BAT_WRITE STORE_MMAP STORE_MMAP =>0 =>6+
2089 : * 3 transient BAT_WRITE STORE_PRIV STORE_PRIV =>1 =>7
2090 : * 4 persistent BAT_READ STORE_MMAP STORE_MMAP =>6+ =>0
2091 : * 5 persistent BAT_READ STORE_PRIV STORE_PRIV =>7 =>1
2092 : * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
2093 : * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5 =>3
2094 : *
2095 : * exception states:
2096 : * a transient BAT_READ STORE_PRIV STORE_MMAP =>b =>c
2097 : * b transient BAT_WRITE STORE_PRIV STORE_MMAP =>a =>6
2098 : * c persistent BAT_READ STORE_PRIV STORE_MMAP =>6 =>a
2099 : *
2100 : * (+) indicates that we must ensure that the heap gets saved in its new mode
2101 : *
2102 : * Note that we now allow a heap with save-regime STORE_PRIV that was
2103 : * actually mapped STORE_MMAP. In effect, the potential corruption of
2104 : * the X file is compensated by writing out full X.new files that take
2105 : * precedence. When transitioning out of this state towards one with
2106 : * both storage regime and OS as STORE_MMAP we need to move the X.new
2107 : * files into the backup directory. Then msync the X file and (on
2108 : * success) remove the X.new; see backup_new().
2109 : *
2110 : * Exception states are only reachable if the commit fails and those
2111 : * new persistent bats have already been processed (but never become
2112 : * part of a committed state). In that case a transition 2=>6 may end
2113 : * up 2=>b. Exception states a and c are reachable from b.
2114 : *
2115 : * Errors in HEAPchangeaccess() can be handled atomically inside the
2116 : * routine. The work on changing mmap modes HEAPcommitpersistence()
2117 : * is done during the BBPsync() for all bats that are newly persistent
2118 : * (BBPNEW). After the TMcommit(), it is done for those bats that are
2119 : * no longer persistent after the commit (BBPDELETED), only if it
2120 : * succeeds. Such transient bats cannot be processed before the
2121 : * commit, because the commit may fail and then the more unsafe
2122 : * transient mmap modes would be present on a persistent bat.
2123 : *
2124 : * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
2125 : * gdk_tm.c.
2126 : *
2127 : * Including the exception states, we have 11 of the 16
2128 : * combinations. As for the 5 avoided states, all four
2129 : * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
2130 : * omitted (this would amount to an msync() save regime on a
2131 : * copy-on-write heap -- which does not work). The remaining avoided
2132 : * state is the patently unsafe
2133 : * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
2134 : *
2135 : * Note that after a server restart exception states are gone, as on
2136 : * BAT loads the saved descriptor is inspected again (which will
2137 : * reproduce the state at the last succeeded commit).
2138 : *
2139 : * To avoid exception states, a TMsubcommit protocol would need to be
2140 : * used which is too heavy for BATsetaccess().
2141 : *
2142 : * Note that this code is not about making heaps mmap-ed in the first
2143 : * place. It is just about determining which flavor of mmap should be
2144 : * used. The MAL user is oblivious of such details.
2145 : */
2146 :
2147 : /* rather than deleting X.new, we comply with the commit protocol and
2148 : * move it to backup storage */
2149 : static gdk_return
2150 0 : backup_new(Heap *hp, bool lock)
2151 : {
2152 0 : int batret, bakret, ret = -1;
2153 0 : char batpath[MAXPATH], bakpath[MAXPATH];
2154 0 : struct stat st;
2155 :
2156 0 : char *bak_filename = NULL;
2157 0 : if ((bak_filename = strrchr(hp->filename, DIR_SEP)) != NULL)
2158 0 : bak_filename++;
2159 : else
2160 : bak_filename = hp->filename;
2161 : /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
2162 0 : if (GDKfilepath(batpath, sizeof(batpath), hp->farmid, BATDIR, hp->filename, "new") == GDK_SUCCEED &&
2163 0 : GDKfilepath(bakpath, sizeof(bakpath), hp->farmid, BAKDIR, bak_filename, "new") == GDK_SUCCEED) {
2164 : /* file actions here interact with the global commits */
2165 0 : if (lock)
2166 0 : BBPtmlock();
2167 :
2168 0 : batret = MT_stat(batpath, &st);
2169 0 : bakret = MT_stat(bakpath, &st);
2170 :
2171 0 : if (batret == 0 && bakret) {
2172 : /* no backup yet, so move the existing X.new there out
2173 : * of the way */
2174 0 : if ((ret = MT_rename(batpath, bakpath)) < 0)
2175 0 : GDKsyserror("backup_new: rename %s to %s failed\n",
2176 : batpath, bakpath);
2177 0 : TRC_DEBUG(IO, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
2178 0 : } else if (batret == 0) {
2179 : /* there is a backup already; just remove the X.new */
2180 0 : if ((ret = MT_remove(batpath)) != 0)
2181 0 : GDKsyserror("backup_new: remove %s failed\n", batpath);
2182 0 : TRC_DEBUG(IO, "remove(%s) = %d\n", batpath, ret);
2183 : } else {
2184 : ret = 0;
2185 : }
2186 0 : if (lock)
2187 0 : BBPtmunlock();
2188 : }
2189 0 : return ret ? GDK_FAIL : GDK_SUCCEED;
2190 : }
2191 :
2192 : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
2193 :
2194 : /* transition heap from readonly to writable */
2195 : static storage_t
2196 7411882 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
2197 : {
2198 7411882 : if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
2199 7411882 : return hp->newstorage; /* 0<=>2,1<=>3,a<=>b */
2200 :
2201 0 : if (dstmode == BAT_WRITE) {
2202 0 : if (hp->storage != STORE_PRIV)
2203 0 : hp->dirty = true; /* exception c does not make it dirty */
2204 0 : return STORE_PRIV; /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
2205 : }
2206 0 : if (hp->storage == STORE_MMAP) { /* 6=>4 */
2207 0 : hp->dirty = true;
2208 0 : return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2209 : }
2210 : return hp->storage; /* 7=>5 */
2211 : }
2212 :
2213 : /* heap changes persistence mode (at commit point) */
2214 : static storage_t
2215 201617 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
2216 : {
2217 201617 : if (existing) { /* existing, ie will become transient */
2218 59724 : if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) { /* 6=>2 */
2219 0 : hp->dirty = true;
2220 0 : return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
2221 : }
2222 59724 : return hp->newstorage; /* 4=>0,5=>1,7=>3,c=>a no change */
2223 : }
2224 : /* !existing, ie will become persistent */
2225 141893 : if (hp->newstorage == STORE_MEM)
2226 : return hp->newstorage;
2227 782 : if (hp->newstorage == STORE_MMAP && !writable)
2228 : return STORE_MMAP; /* 0=>4 STORE_MMAP */
2229 :
2230 0 : if (hp->newstorage == STORE_MMAP)
2231 0 : hp->dirty = true; /* 2=>6 */
2232 : return STORE_PRIV; /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
2233 : }
2234 :
2235 :
2236 : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
2237 :
2238 : /* change the heap modes at a commit */
2239 : gdk_return
2240 177880 : BATcheckmodes(BAT *b, bool existing)
2241 : {
2242 177880 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2243 177880 : bool dirty = false, wr;
2244 :
2245 177880 : BATcheck(b, GDK_FAIL);
2246 :
2247 177880 : wr = (b->batRestricted == BAT_WRITE);
2248 177880 : if (b->ttype) {
2249 177880 : m1 = HEAPcommitpersistence(b->theap, wr, existing);
2250 177880 : dirty |= (b->theap->newstorage != m1);
2251 : }
2252 :
2253 177880 : if (b->tvheap) {
2254 23737 : bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
2255 23737 : m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
2256 23737 : dirty |= (b->tvheap->newstorage != m3);
2257 : }
2258 177880 : if (m1 == STORE_INVALID || m3 == STORE_INVALID)
2259 : return GDK_FAIL;
2260 :
2261 177880 : if (dirty) {
2262 0 : b->theap->newstorage = m1;
2263 0 : if (b->tvheap)
2264 0 : b->tvheap->newstorage = m3;
2265 : }
2266 : return GDK_SUCCEED;
2267 : }
2268 :
2269 : BAT *
2270 9186663 : BATsetaccess(BAT *b, restrict_t newmode)
2271 : {
2272 9186663 : restrict_t bakmode;
2273 :
2274 9186663 : BATcheck(b, NULL);
2275 9186663 : if (newmode != BAT_READ &&
2276 19213 : (isVIEW(b) || (ATOMIC_GET(&b->theap->refs) & HEAPREFS) > 1)) {
2277 0 : BAT *bn = COLcopy(b, b->ttype, true, b->batRole);
2278 0 : BBPunfix(b->batCacheid);
2279 0 : if (bn == NULL)
2280 : return NULL;
2281 : b = bn;
2282 : }
2283 9186663 : MT_lock_set(&b->theaplock);
2284 9233943 : bakmode = b->batRestricted;
2285 9233943 : if (bakmode != newmode) {
2286 6479742 : bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
2287 6479742 : bool wr = (newmode == BAT_WRITE);
2288 6479742 : bool rd = (bakmode == BAT_WRITE);
2289 6479742 : storage_t m1 = STORE_MEM, m3 = STORE_MEM;
2290 6479742 : storage_t b1 = STORE_MEM, b3 = STORE_MEM;
2291 :
2292 6479742 : if (b->theap->parentid == b->batCacheid) {
2293 6471894 : b1 = b->theap->newstorage;
2294 6482349 : m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
2295 : }
2296 6476566 : if (b->tvheap && b->tvheap->parentid == b->batCacheid) {
2297 945899 : bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
2298 945899 : b3 = b->tvheap->newstorage;
2299 1892556 : m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
2300 : }
2301 6479971 : if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
2302 2443314 : MT_lock_unset(&b->theaplock);
2303 2448005 : BBPunfix(b->batCacheid);
2304 2448005 : return NULL;
2305 : }
2306 :
2307 : /* set new access mode and mmap modes */
2308 4036657 : b->batRestricted = newmode;
2309 4036657 : if (b->theap->parentid == b->batCacheid)
2310 4035854 : b->theap->newstorage = m1;
2311 4036657 : if (b->tvheap && b->tvheap->parentid == b->batCacheid)
2312 926769 : b->tvheap->newstorage = m3;
2313 :
2314 4036657 : MT_lock_unset(&b->theaplock);
2315 4048098 : if (existing && !isVIEW(b) && BBPsave(b) != GDK_SUCCEED) {
2316 : /* roll back all changes */
2317 0 : MT_lock_set(&b->theaplock);
2318 0 : b->batRestricted = bakmode;
2319 0 : b->theap->newstorage = b1;
2320 0 : if (b->tvheap)
2321 0 : b->tvheap->newstorage = b3;
2322 0 : MT_lock_unset(&b->theaplock);
2323 0 : BBPunfix(b->batCacheid);
2324 0 : return NULL;
2325 : }
2326 : } else {
2327 2754201 : MT_lock_unset(&b->theaplock);
2328 : }
2329 : return b;
2330 : }
2331 :
2332 : restrict_t
2333 0 : BATgetaccess(BAT *b)
2334 : {
2335 0 : BATcheck(b, BAT_WRITE);
2336 0 : MT_lock_set(&b->theaplock);
2337 0 : restrict_t restricted = b->batRestricted;
2338 0 : MT_lock_unset(&b->theaplock);
2339 0 : return restricted;
2340 : }
2341 :
2342 : /*
2343 : * @- change BAT persistency (persistent,session,transient)
2344 : * In the past, we prevented BATS with certain types from being saved at all:
2345 : * - BATs of BATs, as having recursive bats creates cascading
2346 : * complexities in commits/aborts.
2347 : * - any atom with refcounts, as the BBP has no overview of such
2348 : * user-defined refcounts.
2349 : * - pointer types, as the values they point to are bound to be transient.
2350 : *
2351 : * However, nowadays we do allow such saves, as the BBP swapping
2352 : * mechanism was altered to be able to save transient bats temporarily
2353 : * to disk in order to make room. Thus, we must be able to save any
2354 : * transient BAT to disk.
2355 : *
2356 : * What we don't allow is to make such bats persistent.
2357 : *
2358 : * Although the persistent state does influence the allowed mmap
2359 : * modes, this only goes for the *real* committed persistent
2360 : * state. Making the bat persistent with BATmode does not matter for
2361 : * the heap modes until the commit point is reached. So we do not need
2362 : * to do anything with heap modes yet at this point.
2363 : */
2364 : gdk_return
2365 433094 : BATmode(BAT *b, bool transient)
2366 : {
2367 433094 : BATcheck(b, GDK_FAIL);
2368 :
2369 : /* can only make a bat PERSISTENT if its role is already
2370 : * PERSISTENT */
2371 433094 : assert(transient || b->batRole == PERSISTENT);
2372 : /* cannot make a view PERSISTENT */
2373 247221 : assert(transient || !isVIEW(b));
2374 :
2375 433094 : if (b->batRole == TRANSIENT && !transient) {
2376 0 : GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
2377 0 : return GDK_FAIL;
2378 : }
2379 :
2380 433094 : BATiter bi = bat_iterator(b);
2381 433094 : bool mustrelease = false;
2382 433094 : bool mustretain = false;
2383 433094 : bat bid = b->batCacheid;
2384 :
2385 433094 : if (transient != bi.transient) {
2386 433094 : if (!transient) {
2387 247221 : if (ATOMisdescendant(b->ttype, TYPE_ptr)) {
2388 0 : GDKerror("%s type implies that %s[%s] "
2389 : "cannot be made persistent.\n",
2390 : ATOMname(b->ttype), BATgetId(b),
2391 : ATOMname(b->ttype));
2392 0 : bat_iterator_end(&bi);
2393 0 : return GDK_FAIL;
2394 : }
2395 : }
2396 :
2397 : /* we need to delay the calls to BBPretain and
2398 : * BBPrelease until after we have released our reference
2399 : * to the heaps (i.e. until after bat_iterator_end),
2400 : * because in either case, BBPfree can be called (either
2401 : * directly here or in BBPtrim) which waits for the heap
2402 : * reference to come down. BBPretain calls incref which
2403 : * waits until the trim that is waiting for us is done,
2404 : * so that causes deadlock, and BBPrelease can call
2405 : * BBPfree which causes deadlock with a single thread */
2406 185873 : if (!transient) {
2407 : /* persistent BATs get a logical reference */
2408 : mustretain = true;
2409 185873 : } else if (!bi.transient) {
2410 : /* transient BATs loose their logical reference */
2411 185873 : mustrelease = true;
2412 : }
2413 433094 : MT_lock_set(&GDKswapLock(bid));
2414 433094 : if (!transient) {
2415 247221 : if (BBP_status(bid) & BBPDELETED) {
2416 0 : BBP_status_on(bid, BBPEXISTING);
2417 0 : BBP_status_off(bid, BBPDELETED);
2418 : } else
2419 247221 : BBP_status_on(bid, BBPNEW);
2420 185873 : } else if (!bi.transient) {
2421 185873 : if (!(BBP_status(bid) & BBPNEW))
2422 60491 : BBP_status_on(bid, BBPDELETED);
2423 185873 : BBP_status_off(bid, BBPPERSISTENT);
2424 : }
2425 : /* session bats or persistent bats that did not
2426 : * witness a commit yet may have been saved */
2427 433094 : MT_lock_set(&b->theaplock);
2428 433094 : if (b->batCopiedtodisk) {
2429 49956 : if (!transient) {
2430 45 : BBP_status_off(bid, BBPTMP);
2431 : } else {
2432 : /* TMcommit must remove it to
2433 : * guarantee free space */
2434 49911 : BBP_status_on(bid, BBPTMP);
2435 : }
2436 : }
2437 433094 : b->batTransient = transient;
2438 433094 : MT_lock_unset(&b->theaplock);
2439 433094 : MT_lock_unset(&GDKswapLock(bid));
2440 : }
2441 433094 : bat_iterator_end(&bi);
2442 : /* retain/release after bat_iterator_end because of refs to heaps */
2443 433094 : if (mustretain)
2444 247221 : BBPretain(bid);
2445 185873 : else if (mustrelease)
2446 185873 : BBPrelease(bid);
2447 : return GDK_SUCCEED;
2448 : }
2449 :
2450 : /* BATassertProps checks whether properties are set correctly. Under
2451 : * no circumstances will it change any properties. Note that the
2452 : * "nil" property is not actually used anywhere, but it is checked. */
2453 :
2454 : #ifdef NDEBUG
2455 : /* assertions are disabled, turn failing tests into a message */
2456 : #undef assert
2457 : #define assert(test) ((void) ((test) || (TRC_CRITICAL(CHECK, "Assertion `%s' failed\n", #test), 0)))
2458 : #endif
2459 :
2460 : static void
2461 319195009 : assert_ascii(const char *s)
2462 : {
2463 638390018 : if (!strNil(s)) {
2464 4728075437 : while (*s) {
2465 4459424182 : assert((*s & 0x80) == 0);
2466 4459424182 : s++;
2467 : }
2468 : }
2469 319195009 : }
2470 :
2471 : /* Assert that properties are set correctly.
2472 : *
2473 : * A BAT can have a bunch of properties set. Mostly, the property
2474 : * bits are set if we *know* the property holds, and not set if we
2475 : * don't know whether the property holds (or if we know it doesn't
2476 : * hold). All properties are per column.
2477 : *
2478 : * The properties currently maintained are:
2479 : *
2480 : * seqbase Only valid for TYPE_oid and TYPE_void columns: each
2481 : * value in the column is exactly one more than the
2482 : * previous value, starting at position 0 with the value
2483 : * stored in this property.
2484 : * This implies sorted, key, nonil (which therefore need
2485 : * to be set).
2486 : * nil There is at least one NIL value in the column.
2487 : * nonil There are no NIL values in the column.
2488 : * key All values in the column are distinct.
2489 : * sorted The column is sorted (ascending). If also revsorted,
2490 : * then all values are equal.
2491 : * revsorted The column is reversely sorted (descending). If
2492 : * also sorted, then all values are equal.
2493 : * nosorted BUN position which proofs not sorted (given position
2494 : * and one before are not ordered correctly).
2495 : * norevsorted BUN position which proofs not revsorted (given position
2496 : * and one before are not ordered correctly).
2497 : * nokey Pair of BUN positions that proof not all values are
2498 : * distinct (i.e. values at given locations are equal).
2499 : * ascii Only valid for TYPE_str columns: all strings in the column
2500 : * are ASCII, i.e. the UTF-8 encoding for all characters is a
2501 : * single byte.
2502 : *
2503 : * Note that the functions BATtseqbase and BATkey also set more
2504 : * properties than you might suspect. When setting properties on a
2505 : * newly created and filled BAT, you may want to first make sure the
2506 : * batCount is set correctly (e.g. by calling BATsetcount), then use
2507 : * BATtseqbase and BATkey, and finally set the other properties.
2508 : *
2509 : * For a view, we cannot check all properties, since it is possible with
2510 : * the way the SQL layer works, that a parent BAT gets changed, changing
2511 : * the properties, while there is a view. The view is supposed to look
2512 : * at only the non-changing part of the BAT (through candidate lists),
2513 : * but this means that the properties of the view might not be correct.
2514 : * For this reason, for views, we skip all property checking that looks
2515 : * at the BAT content.
2516 : */
2517 :
2518 : void
2519 20646222 : BATassertProps(BAT *b)
2520 : {
2521 20646222 : unsigned bbpstatus;
2522 20646222 : BUN p, q;
2523 20646222 : int (*cmpf)(const void *, const void *);
2524 20646222 : int cmp;
2525 20646222 : const void *prev = NULL, *valp, *nilp;
2526 20646222 : char filename[sizeof(b->theap->filename)];
2527 20646222 : bool isview1, isview2;
2528 :
2529 : /* do the complete check within a lock */
2530 20646222 : MT_lock_set(&b->theaplock);
2531 :
2532 : /* general BAT sanity */
2533 20699574 : assert(b != NULL);
2534 20699574 : assert(b->batCacheid > 0);
2535 20699574 : assert(b->batCacheid < getBBPsize());
2536 20625485 : assert(b == BBP_desc(b->batCacheid));
2537 20625485 : assert(b->batCount >= b->batInserted);
2538 :
2539 : /* headless */
2540 20625485 : assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
2541 20625485 : assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
2542 :
2543 20625485 : isview1 = b->theap->parentid != b->batCacheid;
2544 20625485 : isview2 = b->tvheap && b->tvheap->parentid != b->batCacheid;
2545 :
2546 20625485 : bbpstatus = BBP_status(b->batCacheid);
2547 : /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
2548 20625485 : assert(((bbpstatus & BBPDELETED) != 0) +
2549 : ((bbpstatus & BBPEXISTING) != 0) +
2550 : ((bbpstatus & BBPNEW) != 0) <= 1);
2551 :
2552 20625485 : assert(b->ttype >= TYPE_void);
2553 20625485 : assert(b->ttype < GDKatomcnt);
2554 20625485 : assert(isview1 ||
2555 : b->ttype == TYPE_void ||
2556 : BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
2557 20625485 : assert(isview2 ||
2558 : b->tvheap == NULL ||
2559 : (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
2560 :
2561 20625485 : cmpf = ATOMcompare(b->ttype);
2562 20625485 : nilp = ATOMnilptr(b->ttype);
2563 :
2564 20625485 : assert(isview1 || b->theap->free >= tailsize(b, BATcount(b)));
2565 20625485 : if (b->ttype != TYPE_void) {
2566 15542913 : assert(b->batCount <= b->batCapacity);
2567 15542913 : assert(isview1 || b->theap->size >= b->theap->free);
2568 15542913 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2569 : /* 32 values per 4-byte word (that's not the
2570 : * same as 8 values per byte...) */
2571 4906 : assert(isview1 || b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
2572 : } else
2573 15538007 : assert(isview1 || b->theap->size >> b->tshift >= b->batCapacity);
2574 : }
2575 20625485 : if (!isview1) {
2576 15760987 : strconcat_len(filename, sizeof(filename),
2577 15760987 : BBP_physical(b->theap->parentid),
2578 2805921 : b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
2579 : #if SIZEOF_VAR_T == 8
2580 : b->twidth == 4 ? ".tail4" :
2581 : #endif
2582 : ".tail" : ".tail",
2583 : NULL);
2584 15829708 : assert(strcmp(b->theap->filename, filename) == 0);
2585 : }
2586 20694206 : if (!isview2 && b->tvheap) {
2587 2644033 : strconcat_len(filename, sizeof(filename),
2588 2644033 : BBP_physical(b->tvheap->parentid),
2589 : ".theap",
2590 : NULL);
2591 2644454 : assert(strcmp(b->tvheap->filename, filename) == 0);
2592 : }
2593 :
2594 : /* void, str and blob imply varsized */
2595 20694627 : if (ATOMstorage(b->ttype) == TYPE_str ||
2596 : ATOMstorage(b->ttype) == TYPE_blob)
2597 3607370 : assert(b->tvheap != NULL);
2598 : /* other "known" types are not varsized */
2599 20694627 : if (ATOMstorage(b->ttype) > TYPE_void &&
2600 : ATOMstorage(b->ttype) < TYPE_str)
2601 11962967 : assert(b->tvheap == NULL);
2602 : /* shift and width have a particular relationship */
2603 20694627 : if (ATOMstorage(b->ttype) == TYPE_str)
2604 3604324 : assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
2605 : else
2606 17090303 : assert(b->twidth == ATOMsize(b->ttype));
2607 20694627 : assert(b->tseqbase <= oid_nil);
2608 : /* only oid/void columns can be dense */
2609 20694627 : assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
2610 : /* a column cannot both have and not have NILs */
2611 20694627 : assert(!b->tnil || !b->tnonil);
2612 : /* only string columns can be ASCII */
2613 20694627 : assert(!b->tascii || ATOMstorage(b->ttype) == TYPE_str);
2614 20694627 : if (b->ttype == TYPE_void) {
2615 5211481 : assert(b->tshift == 0);
2616 5211481 : assert(b->twidth == 0);
2617 5211481 : assert(b->tsorted);
2618 5211481 : if (is_oid_nil(b->tseqbase)) {
2619 303 : assert(b->tvheap == NULL);
2620 303 : assert(BATcount(b) == 0 || !b->tnonil);
2621 303 : assert(BATcount(b) <= 1 || !b->tkey);
2622 303 : assert(b->trevsorted);
2623 : } else {
2624 5211178 : if (b->tvheap != NULL) {
2625 : /* candidate list with exceptions */
2626 48615 : assert(b->batRole == TRANSIENT || b->batRole == SYSTRANS);
2627 48615 : assert(b->tvheap->free <= b->tvheap->size);
2628 48615 : assert(b->tvheap->free >= sizeof(ccand_t));
2629 48615 : assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
2630 48615 : if (negoid_cand(b) && ccand_free(b) > 0) {
2631 376 : const oid *oids = (const oid *) ccand_first(b);
2632 376 : q = ccand_free(b) / SIZEOF_OID;
2633 376 : assert(oids != NULL);
2634 376 : assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
2635 : /* exceptions within range */
2636 376 : assert(oids[0] >= b->tseqbase);
2637 376 : assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
2638 : /* exceptions sorted */
2639 2839122 : for (p = 1; p < q; p++)
2640 2838746 : assert(oids[p - 1] < oids[p]);
2641 : }
2642 : }
2643 5211178 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2644 5211178 : assert(BATcount(b) == 0 || !b->tnil);
2645 5211178 : assert(BATcount(b) <= 1 || !b->trevsorted);
2646 5211178 : assert(b->tkey);
2647 5211178 : assert(b->tnonil);
2648 : }
2649 5211481 : MT_lock_unset(&b->theaplock);
2650 13124697 : return;
2651 : }
2652 :
2653 15483146 : BATiter bi = bat_iterator_nolock(b);
2654 :
2655 15483146 : if (BATtdense(b)) {
2656 250048 : assert(b->tseqbase + b->batCount <= GDK_oid_max);
2657 250048 : assert(b->ttype == TYPE_oid);
2658 250048 : assert(b->tsorted);
2659 250048 : assert(b->tkey);
2660 250048 : assert(b->tnonil);
2661 250048 : if ((q = b->batCount) != 0) {
2662 181955 : const oid *o = (const oid *) Tloc(b, 0);
2663 181955 : assert(*o == b->tseqbase);
2664 222938477 : for (p = 1; p < q; p++)
2665 222756522 : assert(o[p - 1] + 1 == o[p]);
2666 : }
2667 250048 : MT_lock_unset(&b->theaplock);
2668 249920 : return;
2669 : }
2670 15233098 : assert(1 << b->tshift == b->twidth);
2671 : /* only linear atoms can be sorted */
2672 15233098 : assert(!b->tsorted || ATOMlinear(b->ttype));
2673 15233098 : assert(!b->trevsorted || ATOMlinear(b->ttype));
2674 15233098 : if (ATOMlinear(b->ttype)) {
2675 15246133 : assert(b->tnosorted == 0 ||
2676 : (b->tnosorted > 0 &&
2677 : b->tnosorted < b->batCount));
2678 15246133 : assert(!b->tsorted || b->tnosorted == 0);
2679 15246133 : if (!isview1 &&
2680 15246133 : !isview2 &&
2681 2016428 : !b->tsorted &&
2682 177452 : b->tnosorted > 0 &&
2683 177452 : b->tnosorted < b->batCount)
2684 177848 : assert(cmpf(BUNtail(bi, b->tnosorted - 1),
2685 : BUNtail(bi, b->tnosorted)) > 0);
2686 15245142 : assert(b->tnorevsorted == 0 ||
2687 : (b->tnorevsorted > 0 &&
2688 : b->tnorevsorted < b->batCount));
2689 15245142 : assert(!b->trevsorted || b->tnorevsorted == 0);
2690 15245142 : if (!isview1 &&
2691 10049998 : !isview2 &&
2692 2649563 : !b->trevsorted &&
2693 568377 : b->tnorevsorted > 0 &&
2694 568377 : b->tnorevsorted < b->batCount)
2695 568460 : assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
2696 : BUNtail(bi, b->tnorevsorted)) < 0);
2697 : }
2698 : /* if tkey property set, both tnokey values must be 0 */
2699 15229387 : assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
2700 15229387 : if (!isview1 &&
2701 15229387 : !isview2 &&
2702 2457452 : !b->tkey &&
2703 2457452 : (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
2704 : /* if tkey not set and tnokey indicates a proof of
2705 : * non-key-ness, make sure the tnokey values are in
2706 : * range and indeed provide a proof */
2707 562288 : assert(b->tnokey[0] != b->tnokey[1]);
2708 562288 : assert(b->tnokey[0] < b->batCount);
2709 562288 : assert(b->tnokey[1] < b->batCount);
2710 562288 : assert(cmpf(BUNtail(bi, b->tnokey[0]),
2711 : BUNtail(bi, b->tnokey[1])) == 0);
2712 : }
2713 : /* var heaps must have sane sizes */
2714 15227508 : assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
2715 :
2716 15227508 : if (!b->tkey && !b->tsorted && !b->trevsorted &&
2717 5360740 : !b->tnonil && !b->tnil) {
2718 : /* nothing more to prove */
2719 2434394 : MT_lock_unset(&b->theaplock);
2720 2429781 : return;
2721 : }
2722 :
2723 : /* only do a scan if the bat is not a view */
2724 12793114 : if (!isview1 && !isview2) {
2725 9599865 : const ValRecord *prop;
2726 9599865 : const void *maxval = NULL;
2727 9599865 : const void *minval = NULL;
2728 9599865 : const void *maxbound = NULL;
2729 9599865 : const void *minbound = NULL;
2730 9599865 : const bool notnull = BATgetprop_nolock(b, GDK_NOT_NULL) != NULL;
2731 9538146 : bool seenmax = false, seenmin = false;
2732 9538146 : bool seennil = false;
2733 :
2734 9538146 : if ((prop = BATgetprop_nolock(b, GDK_MAX_BOUND)) != NULL)
2735 12 : maxbound = VALptr(prop);
2736 9610317 : if ((prop = BATgetprop_nolock(b, GDK_MIN_BOUND)) != NULL)
2737 11613 : minbound = VALptr(prop);
2738 9631907 : if (b->tmaxpos != BUN_NONE) {
2739 1195882 : assert(b->tmaxpos < BATcount(b));
2740 1195882 : maxval = BUNtail(bi, b->tmaxpos);
2741 1195882 : assert(cmpf(maxval, nilp) != 0);
2742 : }
2743 9617753 : if (b->tminpos != BUN_NONE) {
2744 1125495 : assert(b->tminpos < BATcount(b));
2745 1125495 : minval = BUNtail(bi, b->tminpos);
2746 1125495 : assert(cmpf(minval, nilp) != 0);
2747 : }
2748 9620045 : if (ATOMstorage(b->ttype) == TYPE_msk) {
2749 : /* for now, don't do extra checks for bit mask */
2750 : ;
2751 9615203 : } else if (b->tsorted || b->trevsorted || !b->tkey) {
2752 : /* if sorted (either way), or we don't have to
2753 : * prove uniqueness, we can do a simple
2754 : * scan */
2755 : /* only call compare function if we have to */
2756 9593519 : bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
2757 :
2758 5543509160 : BATloop(b, p, q) {
2759 5534120142 : valp = BUNtail(bi, p);
2760 5534120142 : bool isnil = cmpf(valp, nilp) == 0;
2761 5393218260 : assert(!isnil || !notnull);
2762 5393218260 : assert(!b->tnonil || !isnil);
2763 5393218260 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
2764 5393218260 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
2765 5393218260 : if (b->tascii)
2766 316856845 : assert_ascii(valp);
2767 5393286449 : if (minbound && !isnil) {
2768 30 : cmp = cmpf(minbound, valp);
2769 30 : assert(cmp <= 0);
2770 : }
2771 5393286449 : if (maxbound && !isnil) {
2772 30 : cmp = cmpf(maxbound, valp);
2773 30 : assert(cmp > 0);
2774 : }
2775 5393286449 : if (maxval && !isnil) {
2776 885438014 : cmp = cmpf(maxval, valp);
2777 885437587 : assert(cmp >= 0);
2778 885437587 : seenmax |= cmp == 0;
2779 : }
2780 5393286022 : if (minval && !isnil) {
2781 133709459 : cmp = cmpf(minval, valp);
2782 133710438 : assert(cmp <= 0);
2783 133710438 : seenmin |= cmp == 0;
2784 : }
2785 5393287001 : if (prev && cmpprv) {
2786 2554249160 : cmp = cmpf(prev, valp);
2787 2695049885 : assert(!b->tsorted || cmp <= 0);
2788 2695049885 : assert(!b->trevsorted || cmp >= 0);
2789 2695049885 : assert(!b->tkey || cmp != 0);
2790 : }
2791 5534087726 : seennil |= isnil;
2792 5534087726 : if (seennil && !cmpprv &&
2793 4622473 : maxval == NULL && minval == NULL &&
2794 172019 : minbound == NULL && maxbound == NULL) {
2795 : /* we've done all the checking
2796 : * we can do */
2797 : break;
2798 : }
2799 5533915641 : prev = valp;
2800 : }
2801 : } else { /* b->tkey && !b->tsorted && !b->trevsorted */
2802 : /* we need to check for uniqueness the hard
2803 : * way (i.e. using a hash table) */
2804 21684 : const char *nme = BBP_physical(b->batCacheid);
2805 21684 : Hash *hs = NULL;
2806 21684 : BUN mask;
2807 :
2808 21684 : if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
2809 0 : TRC_WARNING(BAT, "Cannot allocate hash table\n");
2810 0 : goto abort_check;
2811 : }
2812 21706 : if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heaplink.filename) ||
2813 21693 : snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs->heapbckt.filename)) {
2814 : /* cannot happen, see comment in gdk.h
2815 : * about sizes near definition of
2816 : * BBPINIT */
2817 0 : GDKfree(hs);
2818 0 : TRC_CRITICAL(BAT, "Heap filename is too large\n");
2819 0 : goto abort_check;
2820 : }
2821 21703 : if (ATOMsize(b->ttype) == 1)
2822 : mask = (BUN) 1 << 8;
2823 21690 : else if (ATOMsize(b->ttype) == 2)
2824 : mask = (BUN) 1 << 16;
2825 : else
2826 21690 : mask = HASHmask(b->batCount);
2827 21703 : hs->heapbckt.parentid = b->batCacheid;
2828 21703 : hs->heaplink.parentid = b->batCacheid;
2829 21703 : if ((hs->heaplink.farmid = BBPselectfarm(
2830 21704 : TRANSIENT, b->ttype, hashheap)) < 0 ||
2831 21704 : (hs->heapbckt.farmid = BBPselectfarm(
2832 43382 : TRANSIENT, b->ttype, hashheap)) < 0 ||
2833 21704 : HASHnew(hs, b->ttype, BATcount(b),
2834 : mask, BUN_NONE, false) != GDK_SUCCEED) {
2835 0 : GDKfree(hs);
2836 0 : TRC_WARNING(BAT, "Cannot allocate hash table\n");
2837 0 : goto abort_check;
2838 : }
2839 133562711 : BATloop(b, p, q) {
2840 133541006 : BUN hb;
2841 133541006 : BUN prb;
2842 133541006 : valp = BUNtail(bi, p);
2843 133541006 : bool isnil = cmpf(valp, nilp) == 0;
2844 133420260 : assert(!isnil || !notnull);
2845 133420260 : assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
2846 133420260 : assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
2847 133420260 : if (b->tascii)
2848 12160 : assert_ascii(valp);
2849 133420260 : if (minbound && !isnil) {
2850 0 : cmp = cmpf(minbound, valp);
2851 0 : assert(cmp <= 0);
2852 : }
2853 133420260 : if (maxbound && !isnil) {
2854 0 : cmp = cmpf(maxbound, valp);
2855 0 : assert(cmp > 0);
2856 : }
2857 133420260 : if (maxval && !isnil) {
2858 7804 : cmp = cmpf(maxval, valp);
2859 7804 : assert(cmp >= 0);
2860 7804 : seenmax |= cmp == 0;
2861 : }
2862 133420260 : if (minval && !isnil) {
2863 7804 : cmp = cmpf(minval, valp);
2864 7804 : assert(cmp <= 0);
2865 7804 : seenmin |= cmp == 0;
2866 : }
2867 133420260 : prb = HASHprobe(hs, valp);
2868 133483645 : for (hb = HASHget(hs, prb);
2869 134633667 : hb != BUN_NONE;
2870 1150022 : hb = HASHgetlink(hs, hb))
2871 1163854 : if (cmpf(valp, BUNtail(bi, hb)) == 0)
2872 0 : assert(!b->tkey);
2873 133469813 : HASHputlink(hs, p, HASHget(hs, prb));
2874 133513437 : HASHput(hs, prb, p);
2875 133541015 : assert(!b->tnonil || !isnil);
2876 133541015 : seennil |= isnil;
2877 : }
2878 21705 : HEAPfree(&hs->heaplink, true);
2879 21671 : HEAPfree(&hs->heapbckt, true);
2880 21684 : GDKfree(hs);
2881 : }
2882 9587649 : abort_check:
2883 9587649 : GDKclrerr();
2884 9547962 : assert(maxval == NULL || seenmax);
2885 9547962 : assert(minval == NULL || seenmin);
2886 9547962 : assert(!b->tnil || seennil);
2887 : }
2888 12741211 : MT_lock_unset(&b->theaplock);
2889 : }
|